| { | |
| "best_metric": 0.8978955572876072, | |
| "best_model_checkpoint": "./results/finetunes/20250205-121158__microsoft_Phi-3.5-mini-instruct__ft/checkpoint-1792", | |
| "epoch": 0.13208520675167687, | |
| "eval_steps": 16, | |
| "global_step": 1792, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0001474165253924965, | |
| "grad_norm": 112.13977813720703, | |
| "learning_rate": 0.00012128399488167067, | |
| "loss": 2.0334, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.000294833050784993, | |
| "grad_norm": 47.18525695800781, | |
| "learning_rate": 0.00012128399457682722, | |
| "loss": 0.4295, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0004422495761774895, | |
| "grad_norm": 46.89369583129883, | |
| "learning_rate": 0.0001212839940687548, | |
| "loss": 1.793, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.000589666101569986, | |
| "grad_norm": 90.68251037597656, | |
| "learning_rate": 0.00012128399335745342, | |
| "loss": 1.582, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0007370826269624825, | |
| "grad_norm": 10.48133373260498, | |
| "learning_rate": 0.00012128399244292309, | |
| "loss": 1.152, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.000884499152354979, | |
| "grad_norm": 57.58028030395508, | |
| "learning_rate": 0.00012128399132516379, | |
| "loss": 0.8417, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0010319156777474755, | |
| "grad_norm": 24.7613468170166, | |
| "learning_rate": 0.00012128399000417552, | |
| "loss": 0.6337, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.001179332203139972, | |
| "grad_norm": 5.995689868927002, | |
| "learning_rate": 0.00012128398847995831, | |
| "loss": 0.29, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.001179332203139972, | |
| "eval_1_ratio_diff": -0.12081060015588468, | |
| "eval_accuracy": 0.6360093530787218, | |
| "eval_f1": 0.5856255545696539, | |
| "eval_loss": 0.7121835350990295, | |
| "eval_precision": 0.6790123456790124, | |
| "eval_recall": 0.514820592823713, | |
| "eval_runtime": 1440.0319, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0013267487285324685, | |
| "grad_norm": 20.756057739257812, | |
| "learning_rate": 0.00012128398675251216, | |
| "loss": 0.4541, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.001474165253924965, | |
| "grad_norm": 49.25767135620117, | |
| "learning_rate": 0.00012128398482183706, | |
| "loss": 1.1751, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0016215817793174615, | |
| "grad_norm": 10.73904037475586, | |
| "learning_rate": 0.00012128398268793303, | |
| "loss": 0.2334, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.001768998304709958, | |
| "grad_norm": 3.5153348445892334, | |
| "learning_rate": 0.00012128398035080009, | |
| "loss": 0.8965, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0019164148301024544, | |
| "grad_norm": 117.84137725830078, | |
| "learning_rate": 0.0001212839778104382, | |
| "loss": 2.9108, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.002063831355494951, | |
| "grad_norm": 108.86376190185547, | |
| "learning_rate": 0.00012128397506684742, | |
| "loss": 2.1317, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0022112478808874476, | |
| "grad_norm": 19.305322647094727, | |
| "learning_rate": 0.00012128397212002774, | |
| "loss": 0.2653, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.002358664406279944, | |
| "grad_norm": 46.865966796875, | |
| "learning_rate": 0.00012128396896997918, | |
| "loss": 2.2461, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.002358664406279944, | |
| "eval_1_ratio_diff": -0.49961028838659394, | |
| "eval_accuracy": 0.5003897116134061, | |
| "eval_f1": 0.0, | |
| "eval_loss": 1.7971160411834717, | |
| "eval_precision": 0.0, | |
| "eval_recall": 0.0, | |
| "eval_runtime": 1438.1269, | |
| "eval_samples_per_second": 0.892, | |
| "eval_steps_per_second": 0.446, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0025060809316724405, | |
| "grad_norm": 55.90218734741211, | |
| "learning_rate": 0.00012128396561670172, | |
| "loss": 1.0773, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.002653497457064937, | |
| "grad_norm": 8.257821083068848, | |
| "learning_rate": 0.0001212839620601954, | |
| "loss": 0.7481, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0028009139824574335, | |
| "grad_norm": 11.776910781860352, | |
| "learning_rate": 0.00012128395830046022, | |
| "loss": 0.0906, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.00294833050784993, | |
| "grad_norm": 115.57841491699219, | |
| "learning_rate": 0.00012128395433749618, | |
| "loss": 3.0851, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0030957470332424264, | |
| "grad_norm": 5.130585193634033, | |
| "learning_rate": 0.00012128395017130333, | |
| "loss": 0.9399, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.003243163558634923, | |
| "grad_norm": 43.877689361572266, | |
| "learning_rate": 0.00012128394580188166, | |
| "loss": 0.9284, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0033905800840274194, | |
| "grad_norm": 48.76664733886719, | |
| "learning_rate": 0.00012128394122923118, | |
| "loss": 0.5431, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.003537996609419916, | |
| "grad_norm": 33.9229736328125, | |
| "learning_rate": 0.00012128393645335193, | |
| "loss": 0.6688, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.003537996609419916, | |
| "eval_1_ratio_diff": -0.09353078721745911, | |
| "eval_accuracy": 0.764614185502728, | |
| "eval_f1": 0.7401032702237521, | |
| "eval_loss": 0.49912577867507935, | |
| "eval_precision": 0.8253358925143954, | |
| "eval_recall": 0.6708268330733229, | |
| "eval_runtime": 1439.1521, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0036854131348124123, | |
| "grad_norm": 20.783430099487305, | |
| "learning_rate": 0.00012128393147424389, | |
| "loss": 0.7502, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.003832829660204909, | |
| "grad_norm": 28.81708335876465, | |
| "learning_rate": 0.0001212839262919071, | |
| "loss": 0.8271, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.003980246185597405, | |
| "grad_norm": 58.47079086303711, | |
| "learning_rate": 0.00012128392090634156, | |
| "loss": 1.0213, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.004127662710989902, | |
| "grad_norm": 107.4663314819336, | |
| "learning_rate": 0.00012128391531754733, | |
| "loss": 1.6449, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.004275079236382398, | |
| "grad_norm": 21.926761627197266, | |
| "learning_rate": 0.00012128390952552436, | |
| "loss": 1.5282, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.004422495761774895, | |
| "grad_norm": 108.13206481933594, | |
| "learning_rate": 0.00012128390353027275, | |
| "loss": 1.2688, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.004569912287167391, | |
| "grad_norm": 85.27387237548828, | |
| "learning_rate": 0.00012128389733179246, | |
| "loss": 1.4725, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.004717328812559888, | |
| "grad_norm": 3.8993313312530518, | |
| "learning_rate": 0.00012128389093008353, | |
| "loss": 0.1737, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.004717328812559888, | |
| "eval_1_ratio_diff": 0.05378020265003891, | |
| "eval_accuracy": 0.7809820732657833, | |
| "eval_f1": 0.7920059215396003, | |
| "eval_loss": 0.4972352981567383, | |
| "eval_precision": 0.7535211267605634, | |
| "eval_recall": 0.8346333853354134, | |
| "eval_runtime": 1439.2432, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.004864745337952384, | |
| "grad_norm": 1.8932548761367798, | |
| "learning_rate": 0.00012128388432514599, | |
| "loss": 0.1574, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.005012161863344881, | |
| "grad_norm": 31.92827606201172, | |
| "learning_rate": 0.00012128387751697984, | |
| "loss": 0.2239, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.005159578388737377, | |
| "grad_norm": 57.11052703857422, | |
| "learning_rate": 0.00012128387050558512, | |
| "loss": 1.4278, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.005306994914129874, | |
| "grad_norm": 0.45575767755508423, | |
| "learning_rate": 0.00012128386329096184, | |
| "loss": 2.7855, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.00545441143952237, | |
| "grad_norm": 71.60086059570312, | |
| "learning_rate": 0.00012128385587311005, | |
| "loss": 1.4066, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.005601827964914867, | |
| "grad_norm": 0.1034606546163559, | |
| "learning_rate": 0.00012128384825202977, | |
| "loss": 2.1198, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.005749244490307364, | |
| "grad_norm": 0.3067642152309418, | |
| "learning_rate": 0.00012128384042772098, | |
| "loss": 0.0126, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.00589666101569986, | |
| "grad_norm": 63.32870101928711, | |
| "learning_rate": 0.00012128383240018376, | |
| "loss": 1.4007, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.00589666101569986, | |
| "eval_1_ratio_diff": 0.04130943102104445, | |
| "eval_accuracy": 0.7653936087295401, | |
| "eval_f1": 0.7745318352059926, | |
| "eval_loss": 1.208424687385559, | |
| "eval_precision": 0.7449567723342939, | |
| "eval_recall": 0.8065522620904836, | |
| "eval_runtime": 1438.9869, | |
| "eval_samples_per_second": 0.892, | |
| "eval_steps_per_second": 0.446, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.006044077541092357, | |
| "grad_norm": 0.002626498695462942, | |
| "learning_rate": 0.00012128382416941812, | |
| "loss": 0.003, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.006191494066484853, | |
| "grad_norm": 78.83605194091797, | |
| "learning_rate": 0.00012128381573542408, | |
| "loss": 1.7103, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.00633891059187735, | |
| "grad_norm": 0.04237201437354088, | |
| "learning_rate": 0.00012128380709820168, | |
| "loss": 0.0184, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.006486327117269846, | |
| "grad_norm": 57.11608123779297, | |
| "learning_rate": 0.00012128379825775094, | |
| "loss": 0.3886, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.006633743642662343, | |
| "grad_norm": 71.66314697265625, | |
| "learning_rate": 0.00012128378921407189, | |
| "loss": 1.0122, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.006781160168054839, | |
| "grad_norm": 60.63711166381836, | |
| "learning_rate": 0.00012128377996716456, | |
| "loss": 2.2072, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.006928576693447336, | |
| "grad_norm": 64.88410186767578, | |
| "learning_rate": 0.00012128377051702896, | |
| "loss": 1.7641, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.007075993218839832, | |
| "grad_norm": 15.290694236755371, | |
| "learning_rate": 0.00012128376086366519, | |
| "loss": 0.2084, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.007075993218839832, | |
| "eval_1_ratio_diff": -0.07794232268121593, | |
| "eval_accuracy": 0.764614185502728, | |
| "eval_f1": 0.7445008460236887, | |
| "eval_loss": 0.6278901100158691, | |
| "eval_precision": 0.8133086876155268, | |
| "eval_recall": 0.6864274570982839, | |
| "eval_runtime": 1439.7986, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.007223409744232329, | |
| "grad_norm": 4.278674125671387, | |
| "learning_rate": 0.00012128375100707322, | |
| "loss": 0.0205, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.007370826269624825, | |
| "grad_norm": 12.730552673339844, | |
| "learning_rate": 0.00012128374094725308, | |
| "loss": 0.0596, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.007518242795017322, | |
| "grad_norm": 0.03387758880853653, | |
| "learning_rate": 0.00012128373068420486, | |
| "loss": 1.1734, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.007665659320409818, | |
| "grad_norm": 0.002689527813345194, | |
| "learning_rate": 0.00012128372021792852, | |
| "loss": 0.016, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.007813075845802315, | |
| "grad_norm": 46.29806900024414, | |
| "learning_rate": 0.00012128370954842415, | |
| "loss": 3.8453, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.00796049237119481, | |
| "grad_norm": 65.56766510009766, | |
| "learning_rate": 0.00012128369867569178, | |
| "loss": 3.0592, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.008107908896587307, | |
| "grad_norm": 67.830322265625, | |
| "learning_rate": 0.00012128368759973141, | |
| "loss": 1.5232, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.008255325421979804, | |
| "grad_norm": 1.828292965888977, | |
| "learning_rate": 0.00012128367632054312, | |
| "loss": 0.899, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.008255325421979804, | |
| "eval_1_ratio_diff": -0.24707716289945442, | |
| "eval_accuracy": 0.6952455183164459, | |
| "eval_f1": 0.5948186528497409, | |
| "eval_loss": 1.2687604427337646, | |
| "eval_precision": 0.8858024691358025, | |
| "eval_recall": 0.44773790951638065, | |
| "eval_runtime": 1440.6646, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.0084027419473723, | |
| "grad_norm": 2.445478916168213, | |
| "learning_rate": 0.00012128366483812693, | |
| "loss": 1.3983, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.008550158472764796, | |
| "grad_norm": 0.8839952349662781, | |
| "learning_rate": 0.00012128365315248287, | |
| "loss": 2.515, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.008697574998157294, | |
| "grad_norm": 20.67784881591797, | |
| "learning_rate": 0.000121283641263611, | |
| "loss": 1.5722, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.00884499152354979, | |
| "grad_norm": 1.1078622341156006, | |
| "learning_rate": 0.00012128362917151136, | |
| "loss": 0.0058, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.008992408048942286, | |
| "grad_norm": 52.540367126464844, | |
| "learning_rate": 0.00012128361687618396, | |
| "loss": 2.8601, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.009139824574334782, | |
| "grad_norm": 40.01364517211914, | |
| "learning_rate": 0.00012128360437762885, | |
| "loss": 0.6845, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.00928724109972728, | |
| "grad_norm": 4.011626243591309, | |
| "learning_rate": 0.00012128359167584609, | |
| "loss": 0.6806, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.009434657625119776, | |
| "grad_norm": 12.99624252319336, | |
| "learning_rate": 0.00012128357877083573, | |
| "loss": 0.8965, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.009434657625119776, | |
| "eval_1_ratio_diff": 0.33982852689010135, | |
| "eval_accuracy": 0.6492595479345284, | |
| "eval_f1": 0.7380675203725262, | |
| "eval_loss": 0.9785400629043579, | |
| "eval_precision": 0.5886722376973074, | |
| "eval_recall": 0.9890795631825273, | |
| "eval_runtime": 1440.0679, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.009582074150512272, | |
| "grad_norm": 34.23851013183594, | |
| "learning_rate": 0.00012128356566259777, | |
| "loss": 0.3434, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.009729490675904768, | |
| "grad_norm": 66.7353286743164, | |
| "learning_rate": 0.0001212835523511323, | |
| "loss": 0.475, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.009876907201297266, | |
| "grad_norm": 56.82964324951172, | |
| "learning_rate": 0.00012128353883643935, | |
| "loss": 0.7709, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.010024323726689762, | |
| "grad_norm": 34.38500213623047, | |
| "learning_rate": 0.00012128352511851894, | |
| "loss": 0.7302, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.010171740252082258, | |
| "grad_norm": 106.88589477539062, | |
| "learning_rate": 0.00012128351119737116, | |
| "loss": 1.332, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.010319156777474754, | |
| "grad_norm": 85.7337875366211, | |
| "learning_rate": 0.00012128349707299602, | |
| "loss": 1.6342, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.010466573302867252, | |
| "grad_norm": 4.05411958694458, | |
| "learning_rate": 0.00012128348274539358, | |
| "loss": 0.0673, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.010613989828259748, | |
| "grad_norm": 2.334378719329834, | |
| "learning_rate": 0.0001212834682145639, | |
| "loss": 0.0332, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.010613989828259748, | |
| "eval_1_ratio_diff": -0.2346063912704599, | |
| "eval_accuracy": 0.7014809041309431, | |
| "eval_f1": 0.6095820591233435, | |
| "eval_loss": 1.218570351600647, | |
| "eval_precision": 0.8794117647058823, | |
| "eval_recall": 0.4664586583463339, | |
| "eval_runtime": 1440.6194, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.010761406353652244, | |
| "grad_norm": 1.3649911880493164, | |
| "learning_rate": 0.00012128345348050701, | |
| "loss": 0.985, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.01090882287904474, | |
| "grad_norm": 6.569690227508545, | |
| "learning_rate": 0.00012128343854322297, | |
| "loss": 0.0316, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.011056239404437238, | |
| "grad_norm": 50.96843719482422, | |
| "learning_rate": 0.00012128342340271183, | |
| "loss": 2.5112, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.011203655929829734, | |
| "grad_norm": 46.42570877075195, | |
| "learning_rate": 0.00012128340805897364, | |
| "loss": 2.5907, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.01135107245522223, | |
| "grad_norm": 35.919315338134766, | |
| "learning_rate": 0.00012128339251200845, | |
| "loss": 0.5731, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.011498488980614728, | |
| "grad_norm": 0.33857831358909607, | |
| "learning_rate": 0.0001212833767618163, | |
| "loss": 0.0029, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.011645905506007224, | |
| "grad_norm": 0.6119909286499023, | |
| "learning_rate": 0.00012128336080839724, | |
| "loss": 0.0036, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.01179332203139972, | |
| "grad_norm": 34.078514099121094, | |
| "learning_rate": 0.00012128334465175136, | |
| "loss": 3.0454, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.01179332203139972, | |
| "eval_1_ratio_diff": -0.05222135619641466, | |
| "eval_accuracy": 0.8106001558846454, | |
| "eval_f1": 0.8, | |
| "eval_loss": 0.9759823083877563, | |
| "eval_precision": 0.8466898954703833, | |
| "eval_recall": 0.7581903276131046, | |
| "eval_runtime": 1440.5068, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.011940738556792216, | |
| "grad_norm": 0.10960781574249268, | |
| "learning_rate": 0.0001212833282918787, | |
| "loss": 0.0036, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.012088155082184714, | |
| "grad_norm": 0.12220565974712372, | |
| "learning_rate": 0.0001212833117287793, | |
| "loss": 0.0025, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.01223557160757721, | |
| "grad_norm": 127.77825164794922, | |
| "learning_rate": 0.00012128329496245321, | |
| "loss": 2.7251, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.012382988132969706, | |
| "grad_norm": 65.698486328125, | |
| "learning_rate": 0.0001212832779929005, | |
| "loss": 0.4867, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.012530404658362202, | |
| "grad_norm": 37.85614013671875, | |
| "learning_rate": 0.00012128326082012124, | |
| "loss": 0.2097, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0126778211837547, | |
| "grad_norm": 12.939319610595703, | |
| "learning_rate": 0.00012128324344411546, | |
| "loss": 1.4561, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.012825237709147196, | |
| "grad_norm": 81.24678039550781, | |
| "learning_rate": 0.00012128322586488326, | |
| "loss": 1.1304, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.012972654234539692, | |
| "grad_norm": 58.61750030517578, | |
| "learning_rate": 0.00012128320808242463, | |
| "loss": 0.9005, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.012972654234539692, | |
| "eval_1_ratio_diff": 0.2533125487139517, | |
| "eval_accuracy": 0.7186282151208107, | |
| "eval_f1": 0.7753578095830741, | |
| "eval_loss": 0.8996144533157349, | |
| "eval_precision": 0.644927536231884, | |
| "eval_recall": 0.9719188767550702, | |
| "eval_runtime": 1439.76, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.013120070759932188, | |
| "grad_norm": 60.69062805175781, | |
| "learning_rate": 0.00012128319009673968, | |
| "loss": 1.4957, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.013267487285324685, | |
| "grad_norm": 6.7324652671813965, | |
| "learning_rate": 0.00012128317190782848, | |
| "loss": 0.2882, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.013414903810717181, | |
| "grad_norm": 0.18422821164131165, | |
| "learning_rate": 0.00012128315351569106, | |
| "loss": 0.5841, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.013562320336109678, | |
| "grad_norm": 106.35135650634766, | |
| "learning_rate": 0.00012128313492032748, | |
| "loss": 1.3522, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.013709736861502174, | |
| "grad_norm": 35.63379669189453, | |
| "learning_rate": 0.00012128311612173782, | |
| "loss": 1.237, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.013857153386894671, | |
| "grad_norm": 83.5736312866211, | |
| "learning_rate": 0.00012128309711992214, | |
| "loss": 1.3351, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.014004569912287167, | |
| "grad_norm": 97.8160400390625, | |
| "learning_rate": 0.0001212830779148805, | |
| "loss": 1.6019, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.014151986437679663, | |
| "grad_norm": 2.5867555141448975, | |
| "learning_rate": 0.00012128305850661298, | |
| "loss": 0.0897, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.014151986437679663, | |
| "eval_1_ratio_diff": 0.24863600935307872, | |
| "eval_accuracy": 0.7295401402961809, | |
| "eval_f1": 0.783260462211118, | |
| "eval_loss": 1.138918161392212, | |
| "eval_precision": 0.653125, | |
| "eval_recall": 0.9781591263650546, | |
| "eval_runtime": 1440.7407, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.014299402963072161, | |
| "grad_norm": 71.02184295654297, | |
| "learning_rate": 0.00012128303889511963, | |
| "loss": 1.3708, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.014446819488464657, | |
| "grad_norm": 0.5830493569374084, | |
| "learning_rate": 0.0001212830190804005, | |
| "loss": 3.0855, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.014594236013857153, | |
| "grad_norm": 63.9030876159668, | |
| "learning_rate": 0.00012128299906245568, | |
| "loss": 1.6675, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.01474165253924965, | |
| "grad_norm": 0.18025726079940796, | |
| "learning_rate": 0.00012128297884128523, | |
| "loss": 0.1379, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.014889069064642147, | |
| "grad_norm": 0.8397954702377319, | |
| "learning_rate": 0.00012128295841688921, | |
| "loss": 1.528, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.015036485590034643, | |
| "grad_norm": 78.28919219970703, | |
| "learning_rate": 0.0001212829377892677, | |
| "loss": 1.2677, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.01518390211542714, | |
| "grad_norm": 5.996486186981201, | |
| "learning_rate": 0.00012128291695842078, | |
| "loss": 1.205, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.015331318640819635, | |
| "grad_norm": 1.2115447521209717, | |
| "learning_rate": 0.0001212828959243485, | |
| "loss": 0.0076, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.015331318640819635, | |
| "eval_1_ratio_diff": 0.03351519875292286, | |
| "eval_accuracy": 0.8402182385035074, | |
| "eval_f1": 0.8452830188679246, | |
| "eval_loss": 0.5696436166763306, | |
| "eval_precision": 0.8187134502923976, | |
| "eval_recall": 0.8736349453978159, | |
| "eval_runtime": 1440.7431, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.015478735166212133, | |
| "grad_norm": 41.47733688354492, | |
| "learning_rate": 0.00012128287468705092, | |
| "loss": 1.0424, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.01562615169160463, | |
| "grad_norm": 13.133481979370117, | |
| "learning_rate": 0.00012128285324652816, | |
| "loss": 0.0602, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.015773568216997127, | |
| "grad_norm": 14.336326599121094, | |
| "learning_rate": 0.00012128283160278022, | |
| "loss": 0.0887, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.01592098474238962, | |
| "grad_norm": 2.6840479373931885, | |
| "learning_rate": 0.00012128280975580723, | |
| "loss": 0.0105, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.01606840126778212, | |
| "grad_norm": 0.026224393397569656, | |
| "learning_rate": 0.00012128278770560924, | |
| "loss": 0.0006, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.016215817793174613, | |
| "grad_norm": 0.0356808602809906, | |
| "learning_rate": 0.00012128276545218633, | |
| "loss": 1.6274, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.01636323431856711, | |
| "grad_norm": 0.03703249245882034, | |
| "learning_rate": 0.00012128274299553858, | |
| "loss": 1.6564, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.01651065084395961, | |
| "grad_norm": 0.23091621696949005, | |
| "learning_rate": 0.00012128272033566606, | |
| "loss": 0.0017, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.01651065084395961, | |
| "eval_1_ratio_diff": 0.10210444271239283, | |
| "eval_accuracy": 0.8106001558846454, | |
| "eval_f1": 0.8280254777070064, | |
| "eval_loss": 1.4256943464279175, | |
| "eval_precision": 0.7577720207253886, | |
| "eval_recall": 0.9126365054602185, | |
| "eval_runtime": 1440.6468, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.016658067369352103, | |
| "grad_norm": 0.2899627983570099, | |
| "learning_rate": 0.00012128269747256883, | |
| "loss": 0.0048, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.0168054838947446, | |
| "grad_norm": 138.98680114746094, | |
| "learning_rate": 0.00012128267440624699, | |
| "loss": 7.0607, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.0169529004201371, | |
| "grad_norm": 64.21833801269531, | |
| "learning_rate": 0.0001212826511367006, | |
| "loss": 2.4323, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.017100316945529593, | |
| "grad_norm": 69.21852111816406, | |
| "learning_rate": 0.00012128262766392974, | |
| "loss": 3.8941, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.01724773347092209, | |
| "grad_norm": 0.6788825988769531, | |
| "learning_rate": 0.00012128260398793452, | |
| "loss": 0.0033, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.01739514999631459, | |
| "grad_norm": 0.5503783822059631, | |
| "learning_rate": 0.000121282580108715, | |
| "loss": 0.0089, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.017542566521707083, | |
| "grad_norm": 1.4736528396606445, | |
| "learning_rate": 0.00012128255602627122, | |
| "loss": 0.6923, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.01768998304709958, | |
| "grad_norm": 0.052145253866910934, | |
| "learning_rate": 0.0001212825317406033, | |
| "loss": 0.003, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.01768998304709958, | |
| "eval_1_ratio_diff": 0.05689789555728764, | |
| "eval_accuracy": 0.8667186282151208, | |
| "eval_f1": 0.8738007380073801, | |
| "eval_loss": 0.5649486184120178, | |
| "eval_precision": 0.8291316526610645, | |
| "eval_recall": 0.9235569422776911, | |
| "eval_runtime": 1440.858, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.446, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.017837399572492075, | |
| "grad_norm": 33.907466888427734, | |
| "learning_rate": 0.00012128250725171133, | |
| "loss": 1.0754, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.017984816097884573, | |
| "grad_norm": 1.5523881912231445, | |
| "learning_rate": 0.00012128248255959539, | |
| "loss": 2.2872, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.01813223262327707, | |
| "grad_norm": 0.45814594626426697, | |
| "learning_rate": 0.00012128245766425553, | |
| "loss": 0.0082, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.018279649148669565, | |
| "grad_norm": 63.94032669067383, | |
| "learning_rate": 0.00012128243256569185, | |
| "loss": 1.7641, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.018427065674062063, | |
| "grad_norm": 0.17571286857128143, | |
| "learning_rate": 0.00012128240726390445, | |
| "loss": 0.0017, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.01857448219945456, | |
| "grad_norm": 0.08677598834037781, | |
| "learning_rate": 0.0001212823817588934, | |
| "loss": 2.0446, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.018721898724847055, | |
| "grad_norm": 0.06298824399709702, | |
| "learning_rate": 0.00012128235605065879, | |
| "loss": 0.0031, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.018869315250239552, | |
| "grad_norm": 0.04490824043750763, | |
| "learning_rate": 0.00012128233013920071, | |
| "loss": 0.0016, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.018869315250239552, | |
| "eval_1_ratio_diff": 0.26032735775526106, | |
| "eval_accuracy": 0.7272018706157444, | |
| "eval_f1": 0.7834158415841584, | |
| "eval_loss": 1.7306467294692993, | |
| "eval_precision": 0.6492307692307693, | |
| "eval_recall": 0.9875195007800313, | |
| "eval_runtime": 1441.1243, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.019016731775632047, | |
| "grad_norm": 64.88382720947266, | |
| "learning_rate": 0.00012128230402451925, | |
| "loss": 1.4818, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.019164148301024544, | |
| "grad_norm": 0.04304850101470947, | |
| "learning_rate": 0.00012128227770661447, | |
| "loss": 0.0006, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.019311564826417042, | |
| "grad_norm": 99.55477142333984, | |
| "learning_rate": 0.00012128225118548648, | |
| "loss": 1.3041, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.019458981351809537, | |
| "grad_norm": 64.24674987792969, | |
| "learning_rate": 0.00012128222446113537, | |
| "loss": 3.4221, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.019606397877202034, | |
| "grad_norm": 1.130561351776123, | |
| "learning_rate": 0.00012128219753356123, | |
| "loss": 0.0047, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.019753814402594532, | |
| "grad_norm": 60.320674896240234, | |
| "learning_rate": 0.00012128217040276413, | |
| "loss": 0.7215, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.019901230927987026, | |
| "grad_norm": 56.348636627197266, | |
| "learning_rate": 0.0001212821430687442, | |
| "loss": 3.0486, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.020048647453379524, | |
| "grad_norm": 4.682687759399414, | |
| "learning_rate": 0.0001212821155315015, | |
| "loss": 0.0195, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.020048647453379524, | |
| "eval_1_ratio_diff": -0.07170693686671864, | |
| "eval_accuracy": 0.8035853468433359, | |
| "eval_f1": 0.788235294117647, | |
| "eval_loss": 0.7957486510276794, | |
| "eval_precision": 0.8542805100182149, | |
| "eval_recall": 0.7316692667706708, | |
| "eval_runtime": 1438.4097, | |
| "eval_samples_per_second": 0.892, | |
| "eval_steps_per_second": 0.446, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.020196063978772022, | |
| "grad_norm": 0.11813419312238693, | |
| "learning_rate": 0.00012128208779103613, | |
| "loss": 0.1104, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.020343480504164516, | |
| "grad_norm": 61.332427978515625, | |
| "learning_rate": 0.0001212820598473482, | |
| "loss": 0.8622, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.020490897029557014, | |
| "grad_norm": 9.628612518310547, | |
| "learning_rate": 0.00012128203170043776, | |
| "loss": 0.0682, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.02063831355494951, | |
| "grad_norm": 59.6220703125, | |
| "learning_rate": 0.00012128200335030495, | |
| "loss": 0.7833, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.020785730080342006, | |
| "grad_norm": 1.084692358970642, | |
| "learning_rate": 0.00012128197479694983, | |
| "loss": 1.5881, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.020933146605734504, | |
| "grad_norm": 0.44916099309921265, | |
| "learning_rate": 0.00012128194604037253, | |
| "loss": 0.0187, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.021080563131126998, | |
| "grad_norm": 0.11146622151136398, | |
| "learning_rate": 0.00012128191708057311, | |
| "loss": 0.0025, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.021227979656519496, | |
| "grad_norm": 0.05726571008563042, | |
| "learning_rate": 0.00012128188791755172, | |
| "loss": 0.0004, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.021227979656519496, | |
| "eval_1_ratio_diff": 0.09119251753702262, | |
| "eval_accuracy": 0.8277474668745128, | |
| "eval_f1": 0.8420300214438885, | |
| "eval_loss": 1.1355745792388916, | |
| "eval_precision": 0.7770448548812665, | |
| "eval_recall": 0.9188767550702028, | |
| "eval_runtime": 1440.4727, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.021375396181911994, | |
| "grad_norm": 63.95652770996094, | |
| "learning_rate": 0.0001212818585513084, | |
| "loss": 2.2186, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.021522812707304488, | |
| "grad_norm": 0.041420936584472656, | |
| "learning_rate": 0.00012128182898184326, | |
| "loss": 2.2755, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.021670229232696986, | |
| "grad_norm": 0.19315005838871002, | |
| "learning_rate": 0.00012128179920915643, | |
| "loss": 1.7156, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.02181764575808948, | |
| "grad_norm": 0.06642986834049225, | |
| "learning_rate": 0.00012128176923324799, | |
| "loss": 0.0021, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.021965062283481978, | |
| "grad_norm": 0.22619064152240753, | |
| "learning_rate": 0.00012128173905411805, | |
| "loss": 1.2636, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.022112478808874476, | |
| "grad_norm": 0.30320611596107483, | |
| "learning_rate": 0.00012128170867176669, | |
| "loss": 0.0031, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.02225989533426697, | |
| "grad_norm": 62.3597412109375, | |
| "learning_rate": 0.00012128167808619403, | |
| "loss": 1.3432, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.022407311859659468, | |
| "grad_norm": 63.980323791503906, | |
| "learning_rate": 0.00012128164729740015, | |
| "loss": 0.8526, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.022407311859659468, | |
| "eval_1_ratio_diff": 0.15354637568199536, | |
| "eval_accuracy": 0.8028059236165238, | |
| "eval_f1": 0.8289384719405003, | |
| "eval_loss": 0.781088650226593, | |
| "eval_precision": 0.7315035799522673, | |
| "eval_recall": 0.9563182527301092, | |
| "eval_runtime": 1439.8087, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.022554728385051966, | |
| "grad_norm": 0.5441477298736572, | |
| "learning_rate": 0.0001212816163053852, | |
| "loss": 0.023, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.02270214491044446, | |
| "grad_norm": 60.2026252746582, | |
| "learning_rate": 0.00012128158511014924, | |
| "loss": 0.4811, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.022849561435836958, | |
| "grad_norm": 3.5183231830596924, | |
| "learning_rate": 0.00012128155371169238, | |
| "loss": 0.0164, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.022996977961229455, | |
| "grad_norm": 49.883365631103516, | |
| "learning_rate": 0.00012128152211001475, | |
| "loss": 2.6559, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.02314439448662195, | |
| "grad_norm": 0.21442897617816925, | |
| "learning_rate": 0.00012128149030511643, | |
| "loss": 1.0737, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.023291811012014448, | |
| "grad_norm": 66.95639038085938, | |
| "learning_rate": 0.00012128145829699753, | |
| "loss": 2.2649, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.023439227537406942, | |
| "grad_norm": 41.275150299072266, | |
| "learning_rate": 0.00012128142608565818, | |
| "loss": 1.4307, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.02358664406279944, | |
| "grad_norm": 60.39665603637695, | |
| "learning_rate": 0.00012128139367109845, | |
| "loss": 0.8912, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.02358664406279944, | |
| "eval_1_ratio_diff": 0.15666406858924398, | |
| "eval_accuracy": 0.7903351519875292, | |
| "eval_f1": 0.8186109238031019, | |
| "eval_loss": 0.6988638043403625, | |
| "eval_precision": 0.7209026128266033, | |
| "eval_recall": 0.9469578783151326, | |
| "eval_runtime": 1440.1147, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.023734060588191937, | |
| "grad_norm": 0.26957735419273376, | |
| "learning_rate": 0.0001212813610533185, | |
| "loss": 0.0109, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.02388147711358443, | |
| "grad_norm": 1.1442532539367676, | |
| "learning_rate": 0.00012128132823231837, | |
| "loss": 0.8164, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.02402889363897693, | |
| "grad_norm": 2.7633121013641357, | |
| "learning_rate": 0.00012128129520809825, | |
| "loss": 0.0146, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.024176310164369427, | |
| "grad_norm": 103.85281372070312, | |
| "learning_rate": 0.00012128126198065819, | |
| "loss": 2.8926, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.02432372668976192, | |
| "grad_norm": 4.870635032653809, | |
| "learning_rate": 0.00012128122854999832, | |
| "loss": 0.0289, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.02447114321515442, | |
| "grad_norm": 0.17178401350975037, | |
| "learning_rate": 0.00012128119491611876, | |
| "loss": 0.7425, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.024618559740546914, | |
| "grad_norm": 37.24171447753906, | |
| "learning_rate": 0.00012128116107901961, | |
| "loss": 3.577, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.02476597626593941, | |
| "grad_norm": 12.520587921142578, | |
| "learning_rate": 0.00012128112703870099, | |
| "loss": 0.0673, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.02476597626593941, | |
| "eval_1_ratio_diff": -0.025720966484801266, | |
| "eval_accuracy": 0.857365549493375, | |
| "eval_f1": 0.8534827862289832, | |
| "eval_loss": 0.4316674470901489, | |
| "eval_precision": 0.8766447368421053, | |
| "eval_recall": 0.8315132605304212, | |
| "eval_runtime": 1440.3285, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.02491339279133191, | |
| "grad_norm": 41.28479766845703, | |
| "learning_rate": 0.00012128109279516303, | |
| "loss": 0.2896, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.025060809316724404, | |
| "grad_norm": 6.806232452392578, | |
| "learning_rate": 0.00012128105834840581, | |
| "loss": 0.0378, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.0252082258421169, | |
| "grad_norm": 2.091874361038208, | |
| "learning_rate": 0.00012128102369842947, | |
| "loss": 0.0118, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.0253556423675094, | |
| "grad_norm": 57.055580139160156, | |
| "learning_rate": 0.00012128098884523412, | |
| "loss": 0.6633, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.025503058892901893, | |
| "grad_norm": 59.19140625, | |
| "learning_rate": 0.00012128095378881987, | |
| "loss": 0.4166, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.02565047541829439, | |
| "grad_norm": 0.08690566569566727, | |
| "learning_rate": 0.00012128091852918686, | |
| "loss": 0.0041, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.02579789194368689, | |
| "grad_norm": 0.4953851103782654, | |
| "learning_rate": 0.00012128088306633519, | |
| "loss": 0.0058, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.025945308469079383, | |
| "grad_norm": 0.8310350179672241, | |
| "learning_rate": 0.00012128084740026497, | |
| "loss": 0.0115, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.025945308469079383, | |
| "eval_1_ratio_diff": -0.05455962587685115, | |
| "eval_accuracy": 0.8659392049883087, | |
| "eval_f1": 0.858085808580858, | |
| "eval_loss": 0.6554389595985413, | |
| "eval_precision": 0.9106830122591943, | |
| "eval_recall": 0.8112324492979719, | |
| "eval_runtime": 1441.1917, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.02609272499447188, | |
| "grad_norm": 75.97391510009766, | |
| "learning_rate": 0.00012128081153097633, | |
| "loss": 1.0946, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.026240141519864375, | |
| "grad_norm": 0.1318621188402176, | |
| "learning_rate": 0.0001212807754584694, | |
| "loss": 0.0013, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.026387558045256873, | |
| "grad_norm": 0.07249584794044495, | |
| "learning_rate": 0.0001212807391827443, | |
| "loss": 0.2854, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.02653497457064937, | |
| "grad_norm": 23.931421279907227, | |
| "learning_rate": 0.00012128070270380113, | |
| "loss": 0.0587, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.026682391096041865, | |
| "grad_norm": 228.77931213378906, | |
| "learning_rate": 0.00012128066602164004, | |
| "loss": 0.6358, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.026829807621434363, | |
| "grad_norm": 0.020578529685735703, | |
| "learning_rate": 0.00012128062913626113, | |
| "loss": 0.0003, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.02697722414682686, | |
| "grad_norm": 0.044141389429569244, | |
| "learning_rate": 0.00012128059204766453, | |
| "loss": 0.0003, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.027124640672219355, | |
| "grad_norm": 35.83491516113281, | |
| "learning_rate": 0.00012128055475585035, | |
| "loss": 2.1523, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.027124640672219355, | |
| "eval_1_ratio_diff": -0.044427123928293066, | |
| "eval_accuracy": 0.8604832424006236, | |
| "eval_f1": 0.8538775510204082, | |
| "eval_loss": 1.1068644523620605, | |
| "eval_precision": 0.8955479452054794, | |
| "eval_recall": 0.8159126365054602, | |
| "eval_runtime": 1440.348, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.027272057197611853, | |
| "grad_norm": 0.004144140053540468, | |
| "learning_rate": 0.00012128051726081876, | |
| "loss": 0.0, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.027419473723004347, | |
| "grad_norm": 0.0015425934689119458, | |
| "learning_rate": 0.00012128047956256984, | |
| "loss": 0.0002, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.027566890248396845, | |
| "grad_norm": 36.42764663696289, | |
| "learning_rate": 0.00012128044166110374, | |
| "loss": 2.8486, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.027714306773789343, | |
| "grad_norm": 0.6206398010253906, | |
| "learning_rate": 0.00012128040355642058, | |
| "loss": 2.924, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.027861723299181837, | |
| "grad_norm": 97.60330963134766, | |
| "learning_rate": 0.00012128036524852049, | |
| "loss": 1.9209, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.028009139824574335, | |
| "grad_norm": 2.1615848541259766, | |
| "learning_rate": 0.0001212803267374036, | |
| "loss": 0.0215, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.028156556349966833, | |
| "grad_norm": 41.35491180419922, | |
| "learning_rate": 0.00012128028802307003, | |
| "loss": 0.8105, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.028303972875359327, | |
| "grad_norm": 39.422916412353516, | |
| "learning_rate": 0.00012128024910551992, | |
| "loss": 1.131, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.028303972875359327, | |
| "eval_1_ratio_diff": -0.3904910366328917, | |
| "eval_accuracy": 0.5876851130163678, | |
| "eval_f1": 0.322663252240717, | |
| "eval_loss": 1.1657379865646362, | |
| "eval_precision": 0.9, | |
| "eval_recall": 0.19656786271450857, | |
| "eval_runtime": 1441.4939, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.028451389400751825, | |
| "grad_norm": 1.7290548086166382, | |
| "learning_rate": 0.0001212802099847534, | |
| "loss": 0.0986, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.028598805926144322, | |
| "grad_norm": 40.167484283447266, | |
| "learning_rate": 0.00012128017066077058, | |
| "loss": 1.1352, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.028746222451536817, | |
| "grad_norm": 36.6862678527832, | |
| "learning_rate": 0.00012128013113357162, | |
| "loss": 2.6405, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.028893638976929314, | |
| "grad_norm": 1.1684958934783936, | |
| "learning_rate": 0.00012128009140315665, | |
| "loss": 1.1565, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.02904105550232181, | |
| "grad_norm": 28.306957244873047, | |
| "learning_rate": 0.00012128005146952578, | |
| "loss": 1.6548, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.029188472027714307, | |
| "grad_norm": 18.64267349243164, | |
| "learning_rate": 0.00012128001133267917, | |
| "loss": 1.1205, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.029335888553106804, | |
| "grad_norm": 7.279528617858887, | |
| "learning_rate": 0.00012127997099261693, | |
| "loss": 0.6742, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.0294833050784993, | |
| "grad_norm": 41.569854736328125, | |
| "learning_rate": 0.00012127993044933921, | |
| "loss": 0.6977, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0294833050784993, | |
| "eval_1_ratio_diff": -0.2704598597038192, | |
| "eval_accuracy": 0.6344505066250974, | |
| "eval_f1": 0.4983957219251337, | |
| "eval_loss": 0.6263108849525452, | |
| "eval_precision": 0.7925170068027211, | |
| "eval_recall": 0.36349453978159124, | |
| "eval_runtime": 1441.3891, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.029630721603891796, | |
| "grad_norm": 14.647398948669434, | |
| "learning_rate": 0.00012127988970284616, | |
| "loss": 0.4508, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.029778138129284294, | |
| "grad_norm": 21.75971221923828, | |
| "learning_rate": 0.00012127984875313788, | |
| "loss": 0.6282, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.02992555465467679, | |
| "grad_norm": 32.292236328125, | |
| "learning_rate": 0.00012127980760021456, | |
| "loss": 1.0279, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.030072971180069286, | |
| "grad_norm": 59.10111999511719, | |
| "learning_rate": 0.00012127976624407626, | |
| "loss": 1.7322, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.03022038770546178, | |
| "grad_norm": 56.45620346069336, | |
| "learning_rate": 0.00012127972468472319, | |
| "loss": 2.3399, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.03036780423085428, | |
| "grad_norm": 33.3152961730957, | |
| "learning_rate": 0.00012127968292215546, | |
| "loss": 1.1374, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.030515220756246776, | |
| "grad_norm": 9.003528594970703, | |
| "learning_rate": 0.00012127964095637322, | |
| "loss": 0.531, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.03066263728163927, | |
| "grad_norm": 11.181624412536621, | |
| "learning_rate": 0.00012127959878737659, | |
| "loss": 0.167, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.03066263728163927, | |
| "eval_1_ratio_diff": -0.49961028838659394, | |
| "eval_accuracy": 0.5003897116134061, | |
| "eval_f1": 0.0, | |
| "eval_loss": 0.9164891839027405, | |
| "eval_precision": 0.0, | |
| "eval_recall": 0.0, | |
| "eval_runtime": 1441.9045, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.030810053807031768, | |
| "grad_norm": 81.1378173828125, | |
| "learning_rate": 0.00012127955641516573, | |
| "loss": 1.5427, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.030957470332424266, | |
| "grad_norm": 40.89067840576172, | |
| "learning_rate": 0.00012127951383974079, | |
| "loss": 0.8105, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.03110488685781676, | |
| "grad_norm": 0.7650836706161499, | |
| "learning_rate": 0.00012127947106110188, | |
| "loss": 0.8716, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.03125230338320926, | |
| "grad_norm": 41.49223709106445, | |
| "learning_rate": 0.00012127942807924917, | |
| "loss": 3.4998, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.031399719908601756, | |
| "grad_norm": 0.12294773012399673, | |
| "learning_rate": 0.00012127938489418281, | |
| "loss": 1.8698, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.031547136433994254, | |
| "grad_norm": 35.12305450439453, | |
| "learning_rate": 0.00012127934150590295, | |
| "loss": 1.6532, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.031694552959386744, | |
| "grad_norm": 27.799177169799805, | |
| "learning_rate": 0.00012127929791440968, | |
| "loss": 0.5514, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.03184196948477924, | |
| "grad_norm": 24.18194580078125, | |
| "learning_rate": 0.00012127925411970319, | |
| "loss": 0.6588, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.03184196948477924, | |
| "eval_1_ratio_diff": 0.0615744349181605, | |
| "eval_accuracy": 0.8074824629773967, | |
| "eval_f1": 0.8185157972079353, | |
| "eval_loss": 0.46238815784454346, | |
| "eval_precision": 0.7736111111111111, | |
| "eval_recall": 0.8689547581903276, | |
| "eval_runtime": 1441.3065, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.03198938601017174, | |
| "grad_norm": 39.476436614990234, | |
| "learning_rate": 0.00012127921012178362, | |
| "loss": 0.5056, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.03213680253556424, | |
| "grad_norm": 17.45188331604004, | |
| "learning_rate": 0.00012127916592065112, | |
| "loss": 1.9197, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.032284219060956736, | |
| "grad_norm": 37.614906311035156, | |
| "learning_rate": 0.00012127912151630586, | |
| "loss": 1.4371, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.032431635586349226, | |
| "grad_norm": 6.937824726104736, | |
| "learning_rate": 0.00012127907690874794, | |
| "loss": 0.1527, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.032579052111741724, | |
| "grad_norm": 1.9573392868041992, | |
| "learning_rate": 0.00012127903209797754, | |
| "loss": 0.0619, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.03272646863713422, | |
| "grad_norm": 5.234042167663574, | |
| "learning_rate": 0.00012127898708399481, | |
| "loss": 0.0308, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.03287388516252672, | |
| "grad_norm": 19.76664161682129, | |
| "learning_rate": 0.00012127894186679988, | |
| "loss": 2.5914, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.03302130168791922, | |
| "grad_norm": 48.643428802490234, | |
| "learning_rate": 0.00012127889644639293, | |
| "loss": 3.5738, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.03302130168791922, | |
| "eval_1_ratio_diff": -0.4964925954793453, | |
| "eval_accuracy": 0.5035074045206547, | |
| "eval_f1": 0.012403100775193798, | |
| "eval_loss": 2.0848419666290283, | |
| "eval_precision": 1.0, | |
| "eval_recall": 0.0062402496099844, | |
| "eval_runtime": 1441.7896, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.033168718213311715, | |
| "grad_norm": 41.91992950439453, | |
| "learning_rate": 0.0001212788508227741, | |
| "loss": 3.656, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.033316134738704206, | |
| "grad_norm": 58.21712112426758, | |
| "learning_rate": 0.00012127880499594355, | |
| "loss": 2.5973, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.033463551264096704, | |
| "grad_norm": 14.196877479553223, | |
| "learning_rate": 0.00012127875896590141, | |
| "loss": 0.9817, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.0336109677894892, | |
| "grad_norm": 21.982349395751953, | |
| "learning_rate": 0.00012127871273264783, | |
| "loss": 0.6516, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.0337583843148817, | |
| "grad_norm": 26.360563278198242, | |
| "learning_rate": 0.00012127866629618302, | |
| "loss": 0.5606, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.0339058008402742, | |
| "grad_norm": 15.224770545959473, | |
| "learning_rate": 0.00012127861965650708, | |
| "loss": 0.4791, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.03405321736566669, | |
| "grad_norm": 40.95515441894531, | |
| "learning_rate": 0.0001212785728136202, | |
| "loss": 0.8481, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.034200633891059186, | |
| "grad_norm": 0.4365566670894623, | |
| "learning_rate": 0.00012127852576752252, | |
| "loss": 0.2475, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.034200633891059186, | |
| "eval_1_ratio_diff": 0.2938425565081839, | |
| "eval_accuracy": 0.6890101325019485, | |
| "eval_f1": 0.759493670886076, | |
| "eval_loss": 0.8622868061065674, | |
| "eval_precision": 0.618860510805501, | |
| "eval_recall": 0.982839313572543, | |
| "eval_runtime": 1441.2401, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.034348050416451684, | |
| "grad_norm": 15.40101432800293, | |
| "learning_rate": 0.0001212784785182142, | |
| "loss": 0.6156, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.03449546694184418, | |
| "grad_norm": 5.0568013191223145, | |
| "learning_rate": 0.00012127843106569541, | |
| "loss": 0.4877, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.03464288346723668, | |
| "grad_norm": 0.277358740568161, | |
| "learning_rate": 0.00012127838340996629, | |
| "loss": 0.6857, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.03479029999262918, | |
| "grad_norm": 0.04443424195051193, | |
| "learning_rate": 0.00012127833555102701, | |
| "loss": 0.0286, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.03493771651802167, | |
| "grad_norm": 35.34669876098633, | |
| "learning_rate": 0.00012127828748887773, | |
| "loss": 1.7842, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.035085133043414166, | |
| "grad_norm": 0.08662135899066925, | |
| "learning_rate": 0.00012127823922351861, | |
| "loss": 0.0011, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.03523254956880666, | |
| "grad_norm": 0.021065138280391693, | |
| "learning_rate": 0.00012127819075494979, | |
| "loss": 0.0013, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.03537996609419916, | |
| "grad_norm": 39.31500244140625, | |
| "learning_rate": 0.00012127814208317148, | |
| "loss": 1.3799, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.03537996609419916, | |
| "eval_1_ratio_diff": 0.03975058456742009, | |
| "eval_accuracy": 0.838659392049883, | |
| "eval_f1": 0.8447111777944486, | |
| "eval_loss": 0.967132568359375, | |
| "eval_precision": 0.8135838150289018, | |
| "eval_recall": 0.8783151326053042, | |
| "eval_runtime": 1441.5685, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.03552738261959166, | |
| "grad_norm": 0.021114541217684746, | |
| "learning_rate": 0.0001212780932081838, | |
| "loss": 0.0022, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.03567479914498415, | |
| "grad_norm": 0.15021076798439026, | |
| "learning_rate": 0.00012127804412998695, | |
| "loss": 0.0023, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.03582221567037665, | |
| "grad_norm": 0.017235957086086273, | |
| "learning_rate": 0.00012127799484858106, | |
| "loss": 0.0157, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.035969632195769145, | |
| "grad_norm": 0.07619292289018631, | |
| "learning_rate": 0.00012127794536396632, | |
| "loss": 0.0006, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.03611704872116164, | |
| "grad_norm": 0.35548681020736694, | |
| "learning_rate": 0.0001212778956761429, | |
| "loss": 0.0025, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.03626446524655414, | |
| "grad_norm": 0.019310960546135902, | |
| "learning_rate": 0.00012127784578511092, | |
| "loss": 0.0006, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.03641188177194664, | |
| "grad_norm": 0.0059149437583982944, | |
| "learning_rate": 0.00012127779569087061, | |
| "loss": 0.0222, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.03655929829733913, | |
| "grad_norm": 0.0023440527729690075, | |
| "learning_rate": 0.00012127774539342209, | |
| "loss": 2.0713, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.03655929829733913, | |
| "eval_1_ratio_diff": 0.0615744349181605, | |
| "eval_accuracy": 0.848012470771629, | |
| "eval_f1": 0.8567229977957385, | |
| "eval_loss": 1.1258606910705566, | |
| "eval_precision": 0.8097222222222222, | |
| "eval_recall": 0.9095163806552262, | |
| "eval_runtime": 1442.1776, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.03670671482273163, | |
| "grad_norm": 0.4357898235321045, | |
| "learning_rate": 0.00012127769489276555, | |
| "loss": 0.0017, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.036854131348124125, | |
| "grad_norm": 0.0051942430436611176, | |
| "learning_rate": 0.00012127764418890117, | |
| "loss": 0.0001, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03700154787351662, | |
| "grad_norm": 0.048877667635679245, | |
| "learning_rate": 0.0001212775932818291, | |
| "loss": 1.0276, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.03714896439890912, | |
| "grad_norm": 0.030356034636497498, | |
| "learning_rate": 0.00012127754217154949, | |
| "loss": 2.3301, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.03729638092430161, | |
| "grad_norm": 0.06719710677862167, | |
| "learning_rate": 0.00012127749085806257, | |
| "loss": 0.0008, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.03744379744969411, | |
| "grad_norm": 0.8071137070655823, | |
| "learning_rate": 0.00012127743934136846, | |
| "loss": 0.0034, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.03759121397508661, | |
| "grad_norm": 66.58085632324219, | |
| "learning_rate": 0.00012127738762146735, | |
| "loss": 2.0918, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.037738630500479105, | |
| "grad_norm": 0.5617576241493225, | |
| "learning_rate": 0.00012127733569835943, | |
| "loss": 0.004, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.037738630500479105, | |
| "eval_1_ratio_diff": 0.13795791114575218, | |
| "eval_accuracy": 0.8152766952455183, | |
| "eval_f1": 0.8375599725839616, | |
| "eval_loss": 1.003125548362732, | |
| "eval_precision": 0.7469437652811736, | |
| "eval_recall": 0.953198127925117, | |
| "eval_runtime": 1441.8288, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.0378860470258716, | |
| "grad_norm": 0.019583938643336296, | |
| "learning_rate": 0.00012127728357204487, | |
| "loss": 0.0029, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.03803346355126409, | |
| "grad_norm": 66.44640350341797, | |
| "learning_rate": 0.00012127723124252383, | |
| "loss": 1.346, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.03818088007665659, | |
| "grad_norm": 0.05073532462120056, | |
| "learning_rate": 0.00012127717870979647, | |
| "loss": 1.726, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.03832829660204909, | |
| "grad_norm": 0.008476372808218002, | |
| "learning_rate": 0.000121277125973863, | |
| "loss": 0.0002, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.03847571312744159, | |
| "grad_norm": 78.07063293457031, | |
| "learning_rate": 0.00012127707303472356, | |
| "loss": 3.8118, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.038623129652834085, | |
| "grad_norm": 37.921451568603516, | |
| "learning_rate": 0.00012127701989237836, | |
| "loss": 3.374, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.03877054617822658, | |
| "grad_norm": 38.97615432739258, | |
| "learning_rate": 0.0001212769665468276, | |
| "loss": 1.849, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.03891796270361907, | |
| "grad_norm": 1.3990278244018555, | |
| "learning_rate": 0.0001212769129980714, | |
| "loss": 0.2307, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.03891796270361907, | |
| "eval_1_ratio_diff": -0.002338269680436489, | |
| "eval_accuracy": 0.8511301636788776, | |
| "eval_f1": 0.8506645817044566, | |
| "eval_loss": 0.5836467742919922, | |
| "eval_precision": 0.8526645768025078, | |
| "eval_recall": 0.8486739469578783, | |
| "eval_runtime": 1442.6344, | |
| "eval_samples_per_second": 0.889, | |
| "eval_steps_per_second": 0.445, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.03906537922901157, | |
| "grad_norm": 0.5216283798217773, | |
| "learning_rate": 0.00012127685924610997, | |
| "loss": 0.0092, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.03921279575440407, | |
| "grad_norm": 0.716465950012207, | |
| "learning_rate": 0.00012127680529094349, | |
| "loss": 0.0057, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.039360212279796566, | |
| "grad_norm": 0.17090915143489838, | |
| "learning_rate": 0.00012127675113257214, | |
| "loss": 0.0031, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.039507628805189064, | |
| "grad_norm": 62.14753723144531, | |
| "learning_rate": 0.00012127669677099608, | |
| "loss": 1.6501, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.039655045330581555, | |
| "grad_norm": 35.18620681762695, | |
| "learning_rate": 0.00012127664220621553, | |
| "loss": 0.8287, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.03980246185597405, | |
| "grad_norm": 34.50994873046875, | |
| "learning_rate": 0.00012127658743823064, | |
| "loss": 2.5161, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.03994987838136655, | |
| "grad_norm": 0.9479020237922668, | |
| "learning_rate": 0.00012127653246704162, | |
| "loss": 0.0155, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.04009729490675905, | |
| "grad_norm": 0.040624819695949554, | |
| "learning_rate": 0.00012127647729264862, | |
| "loss": 1.536, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.04009729490675905, | |
| "eval_1_ratio_diff": -0.008573655494933774, | |
| "eval_accuracy": 0.8791893998441154, | |
| "eval_f1": 0.8780487804878049, | |
| "eval_loss": 0.49514248967170715, | |
| "eval_precision": 0.8857142857142857, | |
| "eval_recall": 0.8705148205928237, | |
| "eval_runtime": 1441.6693, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.040244711432151546, | |
| "grad_norm": 0.08040345460176468, | |
| "learning_rate": 0.00012127642191505187, | |
| "loss": 0.0205, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.040392127957544044, | |
| "grad_norm": 58.783809661865234, | |
| "learning_rate": 0.00012127636633425152, | |
| "loss": 1.1192, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.040539544482936535, | |
| "grad_norm": 0.25617870688438416, | |
| "learning_rate": 0.00012127631055024779, | |
| "loss": 1.0263, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.04068696100832903, | |
| "grad_norm": 46.056339263916016, | |
| "learning_rate": 0.00012127625456304081, | |
| "loss": 1.1183, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.04083437753372153, | |
| "grad_norm": 0.17480018734931946, | |
| "learning_rate": 0.00012127619837263082, | |
| "loss": 0.0055, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.04098179405911403, | |
| "grad_norm": 0.37528491020202637, | |
| "learning_rate": 0.000121276141979018, | |
| "loss": 0.0032, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.041129210584506526, | |
| "grad_norm": 0.35542991757392883, | |
| "learning_rate": 0.00012127608538220252, | |
| "loss": 0.9512, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.04127662710989902, | |
| "grad_norm": 0.08831676840782166, | |
| "learning_rate": 0.00012127602858218457, | |
| "loss": 0.0184, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.04127662710989902, | |
| "eval_1_ratio_diff": 0.12860483242400622, | |
| "eval_accuracy": 0.8402182385035074, | |
| "eval_f1": 0.8583275742916379, | |
| "eval_loss": 1.0018821954727173, | |
| "eval_precision": 0.7704714640198511, | |
| "eval_recall": 0.968798751950078, | |
| "eval_runtime": 1442.4789, | |
| "eval_samples_per_second": 0.889, | |
| "eval_steps_per_second": 0.445, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.041424043635291515, | |
| "grad_norm": 46.25735092163086, | |
| "learning_rate": 0.00012127597157896437, | |
| "loss": 0.6495, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.04157146016068401, | |
| "grad_norm": 58.521575927734375, | |
| "learning_rate": 0.00012127591437254209, | |
| "loss": 1.4757, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.04171887668607651, | |
| "grad_norm": 0.3296540379524231, | |
| "learning_rate": 0.0001212758569629179, | |
| "loss": 2.2725, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.04186629321146901, | |
| "grad_norm": 0.03395453095436096, | |
| "learning_rate": 0.00012127579935009204, | |
| "loss": 0.0006, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.042013709736861506, | |
| "grad_norm": 0.02328958362340927, | |
| "learning_rate": 0.00012127574153406467, | |
| "loss": 0.0004, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.042161126262253996, | |
| "grad_norm": 58.99131774902344, | |
| "learning_rate": 0.000121275683514836, | |
| "loss": 2.0081, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.042308542787646494, | |
| "grad_norm": 0.9085908532142639, | |
| "learning_rate": 0.0001212756252924062, | |
| "loss": 0.006, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.04245595931303899, | |
| "grad_norm": 0.5718927383422852, | |
| "learning_rate": 0.00012127556686677549, | |
| "loss": 2.0144, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.04245595931303899, | |
| "eval_1_ratio_diff": 0.22291504286827746, | |
| "eval_accuracy": 0.7443491816056118, | |
| "eval_f1": 0.7908163265306123, | |
| "eval_loss": 0.9025093913078308, | |
| "eval_precision": 0.668824163969795, | |
| "eval_recall": 0.9672386895475819, | |
| "eval_runtime": 1442.0314, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.04260337583843149, | |
| "grad_norm": 0.4269089698791504, | |
| "learning_rate": 0.00012127550823794406, | |
| "loss": 1.8595, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.04275079236382399, | |
| "grad_norm": 1.5817714929580688, | |
| "learning_rate": 0.00012127544940591211, | |
| "loss": 0.4153, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.04289820888921648, | |
| "grad_norm": 56.673728942871094, | |
| "learning_rate": 0.00012127539037067981, | |
| "loss": 1.8132, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.043045625414608976, | |
| "grad_norm": 4.291464805603027, | |
| "learning_rate": 0.0001212753311322474, | |
| "loss": 0.3818, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.043193041940001474, | |
| "grad_norm": 18.92963981628418, | |
| "learning_rate": 0.00012127527169061505, | |
| "loss": 0.0941, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.04334045846539397, | |
| "grad_norm": 27.108686447143555, | |
| "learning_rate": 0.00012127521204578297, | |
| "loss": 0.1314, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.04348787499078647, | |
| "grad_norm": 33.73942184448242, | |
| "learning_rate": 0.00012127515219775134, | |
| "loss": 0.1772, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.04363529151617896, | |
| "grad_norm": 52.08650588989258, | |
| "learning_rate": 0.00012127509214652041, | |
| "loss": 0.4505, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.04363529151617896, | |
| "eval_1_ratio_diff": -0.1184723304754482, | |
| "eval_accuracy": 0.798908807482463, | |
| "eval_f1": 0.7716814159292036, | |
| "eval_loss": 0.7536761164665222, | |
| "eval_precision": 0.8916155419222904, | |
| "eval_recall": 0.6801872074882995, | |
| "eval_runtime": 1442.1268, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.04378270804157146, | |
| "grad_norm": 0.05625031143426895, | |
| "learning_rate": 0.00012127503189209032, | |
| "loss": 0.3175, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.043930124566963956, | |
| "grad_norm": 0.10953383892774582, | |
| "learning_rate": 0.0001212749714344613, | |
| "loss": 0.0059, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.044077541092356454, | |
| "grad_norm": 71.34505462646484, | |
| "learning_rate": 0.00012127491077363357, | |
| "loss": 0.5113, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.04422495761774895, | |
| "grad_norm": 0.012292311526834965, | |
| "learning_rate": 0.00012127484990960732, | |
| "loss": 0.0008, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.04437237414314145, | |
| "grad_norm": 0.010139914229512215, | |
| "learning_rate": 0.00012127478884238274, | |
| "loss": 0.0002, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.04451979066853394, | |
| "grad_norm": 58.99741744995117, | |
| "learning_rate": 0.00012127472757196004, | |
| "loss": 3.6273, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.04466720719392644, | |
| "grad_norm": 56.25634765625, | |
| "learning_rate": 0.00012127466609833943, | |
| "loss": 3.663, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.044814623719318936, | |
| "grad_norm": 56.98939895629883, | |
| "learning_rate": 0.00012127460442152114, | |
| "loss": 1.6247, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.044814623719318936, | |
| "eval_1_ratio_diff": -0.03273577552611068, | |
| "eval_accuracy": 0.8456742010911925, | |
| "eval_f1": 0.8403225806451613, | |
| "eval_loss": 0.6838305592536926, | |
| "eval_precision": 0.8697829716193656, | |
| "eval_recall": 0.8127925117004681, | |
| "eval_runtime": 1441.7869, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.04496204024471143, | |
| "grad_norm": 41.00777053833008, | |
| "learning_rate": 0.00012127454254150532, | |
| "loss": 3.2637, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.04510945677010393, | |
| "grad_norm": 15.958291053771973, | |
| "learning_rate": 0.00012127448045829223, | |
| "loss": 0.0749, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.04525687329549642, | |
| "grad_norm": 52.62068176269531, | |
| "learning_rate": 0.00012127441817188204, | |
| "loss": 1.1452, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.04540428982088892, | |
| "grad_norm": 0.8104878067970276, | |
| "learning_rate": 0.00012127435568227499, | |
| "loss": 0.0086, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.04555170634628142, | |
| "grad_norm": 6.7712883949279785, | |
| "learning_rate": 0.00012127429298947129, | |
| "loss": 0.035, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.045699122871673915, | |
| "grad_norm": 1.2900152206420898, | |
| "learning_rate": 0.00012127423009347112, | |
| "loss": 0.0133, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.04584653939706641, | |
| "grad_norm": 0.5468306541442871, | |
| "learning_rate": 0.00012127416699427471, | |
| "loss": 0.0066, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.04599395592245891, | |
| "grad_norm": 0.16869762539863586, | |
| "learning_rate": 0.00012127410369188226, | |
| "loss": 0.0026, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.04599395592245891, | |
| "eval_1_ratio_diff": 0.015588464536243185, | |
| "eval_accuracy": 0.8862042088854248, | |
| "eval_f1": 0.8878648233486943, | |
| "eval_loss": 0.570717990398407, | |
| "eval_precision": 0.8744326777609682, | |
| "eval_recall": 0.9017160686427457, | |
| "eval_runtime": 1441.498, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.0461413724478514, | |
| "grad_norm": 0.20163878798484802, | |
| "learning_rate": 0.00012127404018629401, | |
| "loss": 0.0013, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.0462887889732439, | |
| "grad_norm": 0.1430014669895172, | |
| "learning_rate": 0.00012127397647751014, | |
| "loss": 0.0016, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.0464362054986364, | |
| "grad_norm": 60.50364303588867, | |
| "learning_rate": 0.00012127391256553088, | |
| "loss": 1.6526, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.046583622024028895, | |
| "grad_norm": 0.009336289949715137, | |
| "learning_rate": 0.00012127384845035646, | |
| "loss": 0.0005, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.04673103854942139, | |
| "grad_norm": 0.02924017794430256, | |
| "learning_rate": 0.00012127378413198706, | |
| "loss": 2.0099, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.046878455074813884, | |
| "grad_norm": 0.1369701325893402, | |
| "learning_rate": 0.00012127371961042292, | |
| "loss": 1.9002, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.04702587160020638, | |
| "grad_norm": 77.09698486328125, | |
| "learning_rate": 0.00012127365488566423, | |
| "loss": 1.0021, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.04717328812559888, | |
| "grad_norm": 4.486428260803223, | |
| "learning_rate": 0.00012127358995771124, | |
| "loss": 1.8971, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.04717328812559888, | |
| "eval_1_ratio_diff": -0.17225253312548716, | |
| "eval_accuracy": 0.7669524551831645, | |
| "eval_f1": 0.7181903864278982, | |
| "eval_loss": 1.282883644104004, | |
| "eval_precision": 0.9071428571428571, | |
| "eval_recall": 0.594383775351014, | |
| "eval_runtime": 1441.6631, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.04732070465099138, | |
| "grad_norm": 0.3835877478122711, | |
| "learning_rate": 0.00012127352482656414, | |
| "loss": 1.5125, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.047468121176383875, | |
| "grad_norm": 0.3453172445297241, | |
| "learning_rate": 0.00012127345949222316, | |
| "loss": 1.4256, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.04761553770177637, | |
| "grad_norm": 56.087467193603516, | |
| "learning_rate": 0.00012127339395468855, | |
| "loss": 1.389, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.04776295422716886, | |
| "grad_norm": 39.20930099487305, | |
| "learning_rate": 0.00012127332821396047, | |
| "loss": 2.2849, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.04791037075256136, | |
| "grad_norm": 2.4249165058135986, | |
| "learning_rate": 0.00012127326227003918, | |
| "loss": 0.0286, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.04805778727795386, | |
| "grad_norm": 1.4587557315826416, | |
| "learning_rate": 0.0001212731961229249, | |
| "loss": 0.775, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.04820520380334636, | |
| "grad_norm": 45.33637237548828, | |
| "learning_rate": 0.00012127312977261783, | |
| "loss": 0.2852, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.048352620328738855, | |
| "grad_norm": 0.07065322250127792, | |
| "learning_rate": 0.0001212730632191182, | |
| "loss": 0.7127, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.048352620328738855, | |
| "eval_1_ratio_diff": -0.05455962587685115, | |
| "eval_accuracy": 0.8487918939984411, | |
| "eval_f1": 0.8399339933993399, | |
| "eval_loss": 0.5940015316009521, | |
| "eval_precision": 0.8914185639229422, | |
| "eval_recall": 0.7940717628705148, | |
| "eval_runtime": 1441.517, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.048500036854131345, | |
| "grad_norm": 35.70323181152344, | |
| "learning_rate": 0.00012127299646242624, | |
| "loss": 0.6816, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.04864745337952384, | |
| "grad_norm": 1.4870625734329224, | |
| "learning_rate": 0.00012127292950254218, | |
| "loss": 0.1488, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.04879486990491634, | |
| "grad_norm": 0.6423426866531372, | |
| "learning_rate": 0.00012127286233946625, | |
| "loss": 0.0136, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.04894228643030884, | |
| "grad_norm": 0.3320056200027466, | |
| "learning_rate": 0.00012127279497319864, | |
| "loss": 0.0058, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.049089702955701336, | |
| "grad_norm": 4.33368444442749, | |
| "learning_rate": 0.00012127272740373959, | |
| "loss": 0.5196, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.04923711948109383, | |
| "grad_norm": 71.66387939453125, | |
| "learning_rate": 0.00012127265963108935, | |
| "loss": 2.7961, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.049384536006486325, | |
| "grad_norm": 94.96151733398438, | |
| "learning_rate": 0.00012127259165524814, | |
| "loss": 3.8152, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.04953195253187882, | |
| "grad_norm": 39.40300369262695, | |
| "learning_rate": 0.00012127252347621616, | |
| "loss": 1.1659, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.04953195253187882, | |
| "eval_1_ratio_diff": -0.11301636788776309, | |
| "eval_accuracy": 0.779423226812159, | |
| "eval_f1": 0.751099384344767, | |
| "eval_loss": 1.093988060951233, | |
| "eval_precision": 0.8608870967741935, | |
| "eval_recall": 0.6661466458658346, | |
| "eval_runtime": 1441.6444, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.04967936905727132, | |
| "grad_norm": 142.39564514160156, | |
| "learning_rate": 0.00012127245509399365, | |
| "loss": 1.9772, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.04982678558266382, | |
| "grad_norm": 0.43099793791770935, | |
| "learning_rate": 0.00012127238650858088, | |
| "loss": 0.0056, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.049974202108056316, | |
| "grad_norm": 0.22017613053321838, | |
| "learning_rate": 0.00012127231771997801, | |
| "loss": 0.0026, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.05012161863344881, | |
| "grad_norm": 0.06024312227964401, | |
| "learning_rate": 0.00012127224872818532, | |
| "loss": 1.4556, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.050269035158841305, | |
| "grad_norm": 30.382848739624023, | |
| "learning_rate": 0.00012127217953320302, | |
| "loss": 0.959, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.0504164516842338, | |
| "grad_norm": 0.12178266048431396, | |
| "learning_rate": 0.00012127211013503136, | |
| "loss": 0.0025, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.0505638682096263, | |
| "grad_norm": 0.2670276165008545, | |
| "learning_rate": 0.00012127204053367056, | |
| "loss": 0.0059, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.0507112847350188, | |
| "grad_norm": 0.7420686483383179, | |
| "learning_rate": 0.00012127197072912085, | |
| "loss": 0.0205, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.0507112847350188, | |
| "eval_1_ratio_diff": -0.05300077942322684, | |
| "eval_accuracy": 0.828526890101325, | |
| "eval_f1": 0.8187808896210873, | |
| "eval_loss": 0.5867729783058167, | |
| "eval_precision": 0.8673647469458988, | |
| "eval_recall": 0.7753510140405616, | |
| "eval_runtime": 1441.426, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.05085870126041129, | |
| "grad_norm": 69.81874084472656, | |
| "learning_rate": 0.00012127190072138247, | |
| "loss": 0.853, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.05100611778580379, | |
| "grad_norm": 0.51251220703125, | |
| "learning_rate": 0.00012127183051045567, | |
| "loss": 0.037, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.051153534311196285, | |
| "grad_norm": 32.83553695678711, | |
| "learning_rate": 0.00012127176009634066, | |
| "loss": 1.7711, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.05130095083658878, | |
| "grad_norm": 0.029091738164424896, | |
| "learning_rate": 0.00012127168947903768, | |
| "loss": 0.0006, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.05144836736198128, | |
| "grad_norm": 59.94422912597656, | |
| "learning_rate": 0.00012127161865854698, | |
| "loss": 1.6607, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.05159578388737378, | |
| "grad_norm": 60.350067138671875, | |
| "learning_rate": 0.00012127154763486877, | |
| "loss": 2.055, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.05174320041276627, | |
| "grad_norm": 0.08221148699522018, | |
| "learning_rate": 0.00012127147640800332, | |
| "loss": 1.6475, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.051890616938158766, | |
| "grad_norm": 39.905357360839844, | |
| "learning_rate": 0.00012127140497795086, | |
| "loss": 1.2104, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.051890616938158766, | |
| "eval_1_ratio_diff": -0.0919719407638348, | |
| "eval_accuracy": 0.8316445830085737, | |
| "eval_f1": 0.8144329896907216, | |
| "eval_loss": 0.7349568605422974, | |
| "eval_precision": 0.9063097514340345, | |
| "eval_recall": 0.7394695787831513, | |
| "eval_runtime": 1441.7333, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.052038033463551264, | |
| "grad_norm": 4.226317882537842, | |
| "learning_rate": 0.00012127133334471161, | |
| "loss": 0.4275, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.05218544998894376, | |
| "grad_norm": 0.05035168305039406, | |
| "learning_rate": 0.00012127126150828585, | |
| "loss": 1.3166, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.05233286651433626, | |
| "grad_norm": 0.25760674476623535, | |
| "learning_rate": 0.00012127118946867378, | |
| "loss": 0.0081, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.05248028303972875, | |
| "grad_norm": 36.74332809448242, | |
| "learning_rate": 0.00012127111722587565, | |
| "loss": 1.1506, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.05262769956512125, | |
| "grad_norm": 36.16116714477539, | |
| "learning_rate": 0.00012127104477989172, | |
| "loss": 1.2632, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.052775116090513746, | |
| "grad_norm": 37.083343505859375, | |
| "learning_rate": 0.00012127097213072223, | |
| "loss": 1.8408, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.052922532615906244, | |
| "grad_norm": 3.0497827529907227, | |
| "learning_rate": 0.0001212708992783674, | |
| "loss": 0.0247, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.05306994914129874, | |
| "grad_norm": 4.117802619934082, | |
| "learning_rate": 0.00012127082622282751, | |
| "loss": 0.0342, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.05306994914129874, | |
| "eval_1_ratio_diff": -0.08183943881527672, | |
| "eval_accuracy": 0.8121590023382697, | |
| "eval_f1": 0.7952421410365336, | |
| "eval_loss": 0.5786097645759583, | |
| "eval_precision": 0.8731343283582089, | |
| "eval_recall": 0.7301092043681747, | |
| "eval_runtime": 1442.0276, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.05321736566669124, | |
| "grad_norm": 0.633588969707489, | |
| "learning_rate": 0.00012127075296410277, | |
| "loss": 0.0056, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.05336478219208373, | |
| "grad_norm": 36.505218505859375, | |
| "learning_rate": 0.00012127067950219344, | |
| "loss": 0.7263, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.05351219871747623, | |
| "grad_norm": 0.6716632843017578, | |
| "learning_rate": 0.00012127060583709976, | |
| "loss": 0.0045, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.053659615242868726, | |
| "grad_norm": 36.19940948486328, | |
| "learning_rate": 0.000121270531968822, | |
| "loss": 0.377, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.053807031768261224, | |
| "grad_norm": 81.25736236572266, | |
| "learning_rate": 0.00012127045789736038, | |
| "loss": 0.6006, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.05395444829365372, | |
| "grad_norm": 29.044986724853516, | |
| "learning_rate": 0.00012127038362271517, | |
| "loss": 1.1609, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.05410186481904621, | |
| "grad_norm": 0.1593562811613083, | |
| "learning_rate": 0.0001212703091448866, | |
| "loss": 0.0055, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.05424928134443871, | |
| "grad_norm": 7.988092422485352, | |
| "learning_rate": 0.00012127023446387492, | |
| "loss": 0.0238, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.05424928134443871, | |
| "eval_1_ratio_diff": -0.07092751363990646, | |
| "eval_accuracy": 0.838659392049883, | |
| "eval_f1": 0.8261964735516373, | |
| "eval_loss": 0.6887457370758057, | |
| "eval_precision": 0.8945454545454545, | |
| "eval_recall": 0.7675507020280812, | |
| "eval_runtime": 1441.3319, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.05439669786983121, | |
| "grad_norm": 58.56552505493164, | |
| "learning_rate": 0.00012127015957968041, | |
| "loss": 2.3194, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.054544114395223706, | |
| "grad_norm": 0.37152421474456787, | |
| "learning_rate": 0.00012127008449230329, | |
| "loss": 0.0029, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.0546915309206162, | |
| "grad_norm": 33.52932357788086, | |
| "learning_rate": 0.00012127000920174381, | |
| "loss": 1.1549, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.054838947446008694, | |
| "grad_norm": 0.02616913430392742, | |
| "learning_rate": 0.00012126993370800224, | |
| "loss": 0.0021, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.05498636397140119, | |
| "grad_norm": 36.83317565917969, | |
| "learning_rate": 0.00012126985801107882, | |
| "loss": 1.2016, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.05513378049679369, | |
| "grad_norm": 0.006011671852320433, | |
| "learning_rate": 0.00012126978211097381, | |
| "loss": 2.834, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.05528119702218619, | |
| "grad_norm": 58.966102600097656, | |
| "learning_rate": 0.00012126970600768747, | |
| "loss": 2.0661, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.055428613547578685, | |
| "grad_norm": 57.80133819580078, | |
| "learning_rate": 0.00012126962970122005, | |
| "loss": 1.2417, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.055428613547578685, | |
| "eval_1_ratio_diff": 0.09508963367108336, | |
| "eval_accuracy": 0.8503507404520655, | |
| "eval_f1": 0.8632478632478633, | |
| "eval_loss": 0.7459388375282288, | |
| "eval_precision": 0.7942332896461337, | |
| "eval_recall": 0.9453978159126365, | |
| "eval_runtime": 1440.976, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.446, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.05557603007297118, | |
| "grad_norm": 0.10538947582244873, | |
| "learning_rate": 0.00012126955319157181, | |
| "loss": 1.5568, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.055723446598363674, | |
| "grad_norm": 0.3577294647693634, | |
| "learning_rate": 0.000121269476478743, | |
| "loss": 1.3633, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.05587086312375617, | |
| "grad_norm": 111.04033660888672, | |
| "learning_rate": 0.00012126939956273387, | |
| "loss": 1.5691, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.05601827964914867, | |
| "grad_norm": 8.450987815856934, | |
| "learning_rate": 0.00012126932244354469, | |
| "loss": 0.6036, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.05616569617454117, | |
| "grad_norm": 6.646569728851318, | |
| "learning_rate": 0.00012126924512117572, | |
| "loss": 0.0554, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.056313112699933665, | |
| "grad_norm": 10.05777359008789, | |
| "learning_rate": 0.00012126916759562719, | |
| "loss": 0.0507, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.056460529225326156, | |
| "grad_norm": 1.5429670810699463, | |
| "learning_rate": 0.00012126908986689941, | |
| "loss": 0.5476, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.056607945750718654, | |
| "grad_norm": 0.7471988201141357, | |
| "learning_rate": 0.0001212690119349926, | |
| "loss": 0.0357, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.056607945750718654, | |
| "eval_1_ratio_diff": -0.03273577552611068, | |
| "eval_accuracy": 0.8534684333593141, | |
| "eval_f1": 0.8483870967741935, | |
| "eval_loss": 0.5041674971580505, | |
| "eval_precision": 0.8781302170283807, | |
| "eval_recall": 0.8205928237129485, | |
| "eval_runtime": 1441.5634, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.05675536227611115, | |
| "grad_norm": 25.423622131347656, | |
| "learning_rate": 0.00012126893379990705, | |
| "loss": 0.0991, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.05690277880150365, | |
| "grad_norm": 8.131854057312012, | |
| "learning_rate": 0.00012126885546164299, | |
| "loss": 0.0467, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.05705019532689615, | |
| "grad_norm": 0.7007619738578796, | |
| "learning_rate": 0.00012126877692020069, | |
| "loss": 0.0319, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.057197611852288645, | |
| "grad_norm": 0.0242279302328825, | |
| "learning_rate": 0.00012126869817558045, | |
| "loss": 0.5106, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.057345028377681136, | |
| "grad_norm": 1.126301646232605, | |
| "learning_rate": 0.00012126861922778249, | |
| "loss": 0.0068, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.05749244490307363, | |
| "grad_norm": 2.2255496978759766, | |
| "learning_rate": 0.0001212685400768071, | |
| "loss": 0.0125, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.05763986142846613, | |
| "grad_norm": 53.08203125, | |
| "learning_rate": 0.00012126846072265453, | |
| "loss": 3.4784, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.05778727795385863, | |
| "grad_norm": 53.75185012817383, | |
| "learning_rate": 0.00012126838116532506, | |
| "loss": 5.3382, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.05778727795385863, | |
| "eval_1_ratio_diff": 0.024162120031176904, | |
| "eval_accuracy": 0.8074824629773967, | |
| "eval_f1": 0.8118811881188119, | |
| "eval_loss": 1.1864495277404785, | |
| "eval_precision": 0.7931547619047619, | |
| "eval_recall": 0.8315132605304212, | |
| "eval_runtime": 1441.6478, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.05793469447925113, | |
| "grad_norm": 20.95121955871582, | |
| "learning_rate": 0.00012126830140481893, | |
| "loss": 3.3432, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.05808211100464362, | |
| "grad_norm": 49.42118453979492, | |
| "learning_rate": 0.00012126822144113646, | |
| "loss": 0.762, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.058229527530036115, | |
| "grad_norm": 16.03618812561035, | |
| "learning_rate": 0.00012126814127427784, | |
| "loss": 0.1045, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.05837694405542861, | |
| "grad_norm": 34.1168212890625, | |
| "learning_rate": 0.00012126806090424342, | |
| "loss": 3.1091, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.05852436058082111, | |
| "grad_norm": 34.757083892822266, | |
| "learning_rate": 0.00012126798033103342, | |
| "loss": 2.0632, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.05867177710621361, | |
| "grad_norm": 1.412405014038086, | |
| "learning_rate": 0.00012126789955464813, | |
| "loss": 0.2568, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.058819193631606106, | |
| "grad_norm": 55.76416015625, | |
| "learning_rate": 0.00012126781857508779, | |
| "loss": 0.497, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.0589666101569986, | |
| "grad_norm": 0.3345389664173126, | |
| "learning_rate": 0.00012126773739235272, | |
| "loss": 0.0121, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0589666101569986, | |
| "eval_1_ratio_diff": 0.06703039750584561, | |
| "eval_accuracy": 0.8363211223694466, | |
| "eval_f1": 0.8464912280701754, | |
| "eval_loss": 0.7451047897338867, | |
| "eval_precision": 0.796423658872077, | |
| "eval_recall": 0.9032761310452418, | |
| "eval_runtime": 1440.5179, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.059114026682391095, | |
| "grad_norm": 0.15297777950763702, | |
| "learning_rate": 0.00012126765600644314, | |
| "loss": 0.0082, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.05926144320778359, | |
| "grad_norm": 0.19080302119255066, | |
| "learning_rate": 0.00012126757441735937, | |
| "loss": 0.0026, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.05940885973317609, | |
| "grad_norm": 0.43317776918411255, | |
| "learning_rate": 0.00012126749262510164, | |
| "loss": 0.0055, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.05955627625856859, | |
| "grad_norm": 6.4003984334704e-06, | |
| "learning_rate": 0.00012126741062967027, | |
| "loss": 0.0031, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.05970369278396108, | |
| "grad_norm": 0.03450751677155495, | |
| "learning_rate": 0.00012126732843106551, | |
| "loss": 0.0052, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.05985110930935358, | |
| "grad_norm": 52.02117156982422, | |
| "learning_rate": 0.00012126724602928764, | |
| "loss": 5.0919, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.059998525834746075, | |
| "grad_norm": 50.249900817871094, | |
| "learning_rate": 0.00012126716342433692, | |
| "loss": 4.0749, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.06014594236013857, | |
| "grad_norm": 0.0067368014715611935, | |
| "learning_rate": 0.00012126708061621366, | |
| "loss": 0.0001, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.06014594236013857, | |
| "eval_1_ratio_diff": 0.06469212782540923, | |
| "eval_accuracy": 0.8651597817614964, | |
| "eval_f1": 0.8732600732600733, | |
| "eval_loss": 0.9449532628059387, | |
| "eval_precision": 0.8232044198895028, | |
| "eval_recall": 0.9297971918876755, | |
| "eval_runtime": 1440.6727, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.06029335888553107, | |
| "grad_norm": 44.037471771240234, | |
| "learning_rate": 0.00012126699760491808, | |
| "loss": 2.1184, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.06044077541092356, | |
| "grad_norm": 31.20966148376465, | |
| "learning_rate": 0.00012126691439045052, | |
| "loss": 2.3532, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.06058819193631606, | |
| "grad_norm": 1.108382225036621, | |
| "learning_rate": 0.00012126683097281125, | |
| "loss": 0.0093, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.06073560846170856, | |
| "grad_norm": 1.2753050327301025, | |
| "learning_rate": 0.0001212667473520005, | |
| "loss": 0.011, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.060883024987101055, | |
| "grad_norm": 4.512105941772461, | |
| "learning_rate": 0.00012126666352801861, | |
| "loss": 0.0212, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.06103044151249355, | |
| "grad_norm": 0.3488874137401581, | |
| "learning_rate": 0.00012126657950086582, | |
| "loss": 1.2435, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.06117785803788605, | |
| "grad_norm": 0.11297665536403656, | |
| "learning_rate": 0.00012126649527054243, | |
| "loss": 0.0027, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.06132527456327854, | |
| "grad_norm": 0.07631942629814148, | |
| "learning_rate": 0.00012126641083704874, | |
| "loss": 0.0032, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.06132527456327854, | |
| "eval_1_ratio_diff": 0.08885424785658613, | |
| "eval_accuracy": 0.8799688230709275, | |
| "eval_f1": 0.8896848137535817, | |
| "eval_loss": 0.7254036068916321, | |
| "eval_precision": 0.8225165562913908, | |
| "eval_recall": 0.968798751950078, | |
| "eval_runtime": 1440.5593, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.06147269108867104, | |
| "grad_norm": 41.37874984741211, | |
| "learning_rate": 0.00012126632620038498, | |
| "loss": 1.3108, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.061620107614063536, | |
| "grad_norm": 55.71513366699219, | |
| "learning_rate": 0.00012126624136055149, | |
| "loss": 1.7068, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.061767524139456034, | |
| "grad_norm": 1.5174663066864014, | |
| "learning_rate": 0.0001212661563175485, | |
| "loss": 0.0173, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.06191494066484853, | |
| "grad_norm": 0.10353035479784012, | |
| "learning_rate": 0.00012126607107137636, | |
| "loss": 1.2081, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.06206235719024102, | |
| "grad_norm": 0.05997217819094658, | |
| "learning_rate": 0.00012126598562203531, | |
| "loss": 1.8296, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.06220977371563352, | |
| "grad_norm": 0.17887941002845764, | |
| "learning_rate": 0.00012126589996952563, | |
| "loss": 0.0016, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.06235719024102602, | |
| "grad_norm": 0.08932141214609146, | |
| "learning_rate": 0.00012126581411384764, | |
| "loss": 1.5849, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.06250460676641852, | |
| "grad_norm": 41.82356643676758, | |
| "learning_rate": 0.0001212657280550016, | |
| "loss": 1.2425, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.06250460676641852, | |
| "eval_1_ratio_diff": 0.09586905689789549, | |
| "eval_accuracy": 0.8542478565861262, | |
| "eval_f1": 0.8669039145907473, | |
| "eval_loss": 0.6706948280334473, | |
| "eval_precision": 0.7971204188481675, | |
| "eval_recall": 0.9500780031201248, | |
| "eval_runtime": 1441.8264, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.06265202329181101, | |
| "grad_norm": 45.45724105834961, | |
| "learning_rate": 0.00012126564179298783, | |
| "loss": 0.7189, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.06279943981720351, | |
| "grad_norm": 1.7170765399932861, | |
| "learning_rate": 0.00012126555532780658, | |
| "loss": 0.014, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.062946856342596, | |
| "grad_norm": 1.8296376466751099, | |
| "learning_rate": 0.00012126546865945818, | |
| "loss": 0.0249, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.06309427286798851, | |
| "grad_norm": 28.392093658447266, | |
| "learning_rate": 0.00012126538178794288, | |
| "loss": 2.1082, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.063241689393381, | |
| "grad_norm": 0.024955546483397484, | |
| "learning_rate": 0.00012126529471326101, | |
| "loss": 0.0026, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.06338910591877349, | |
| "grad_norm": 29.141136169433594, | |
| "learning_rate": 0.00012126520743541283, | |
| "loss": 1.5827, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.063536522444166, | |
| "grad_norm": 0.3031620383262634, | |
| "learning_rate": 0.00012126511995439865, | |
| "loss": 1.4029, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.06368393896955848, | |
| "grad_norm": 0.2821040451526642, | |
| "learning_rate": 0.00012126503227021874, | |
| "loss": 1.3931, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.06368393896955848, | |
| "eval_1_ratio_diff": -0.04053000779423227, | |
| "eval_accuracy": 0.8581449727201871, | |
| "eval_f1": 0.8520325203252033, | |
| "eval_loss": 0.5900216102600098, | |
| "eval_precision": 0.8896434634974533, | |
| "eval_recall": 0.8174726989079563, | |
| "eval_runtime": 1439.3591, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.06383135549495099, | |
| "grad_norm": 4.8987860679626465, | |
| "learning_rate": 0.00012126494438287343, | |
| "loss": 0.0265, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.06397877202034348, | |
| "grad_norm": 0.27837908267974854, | |
| "learning_rate": 0.000121264856292363, | |
| "loss": 0.0093, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.06412618854573597, | |
| "grad_norm": 0.5379538536071777, | |
| "learning_rate": 0.00012126476799868773, | |
| "loss": 0.0095, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.06427360507112848, | |
| "grad_norm": 23.87804412841797, | |
| "learning_rate": 0.00012126467950184793, | |
| "loss": 0.8342, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.06442102159652097, | |
| "grad_norm": 1.2284973859786987, | |
| "learning_rate": 0.0001212645908018439, | |
| "loss": 0.0162, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.06456843812191347, | |
| "grad_norm": 36.555442810058594, | |
| "learning_rate": 0.00012126450189867592, | |
| "loss": 2.2561, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.06471585464730596, | |
| "grad_norm": 24.54311180114746, | |
| "learning_rate": 0.00012126441279234432, | |
| "loss": 3.1743, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.06486327117269845, | |
| "grad_norm": 0.13615825772285461, | |
| "learning_rate": 0.00012126432348284936, | |
| "loss": 0.0021, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.06486327117269845, | |
| "eval_1_ratio_diff": -0.2899454403741232, | |
| "eval_accuracy": 0.6975837879968823, | |
| "eval_f1": 0.5736263736263736, | |
| "eval_loss": 1.3224732875823975, | |
| "eval_precision": 0.9702602230483272, | |
| "eval_recall": 0.40717628705148207, | |
| "eval_runtime": 1440.3311, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.06501068769809096, | |
| "grad_norm": 3.8478543758392334, | |
| "learning_rate": 0.00012126423397019136, | |
| "loss": 0.021, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.06515810422348345, | |
| "grad_norm": 0.08823257684707642, | |
| "learning_rate": 0.00012126414425437062, | |
| "loss": 0.0016, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.06530552074887595, | |
| "grad_norm": 27.02589988708496, | |
| "learning_rate": 0.00012126405433538744, | |
| "loss": 2.9462, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.06545293727426844, | |
| "grad_norm": 24.244503021240234, | |
| "learning_rate": 0.00012126396421324212, | |
| "loss": 0.8423, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.06560035379966095, | |
| "grad_norm": 0.3652421236038208, | |
| "learning_rate": 0.00012126387388793495, | |
| "loss": 0.0081, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.06574777032505344, | |
| "grad_norm": 22.919225692749023, | |
| "learning_rate": 0.00012126378335946625, | |
| "loss": 1.1268, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.06589518685044593, | |
| "grad_norm": 0.18866397440433502, | |
| "learning_rate": 0.00012126369262783633, | |
| "loss": 1.8645, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.06604260337583844, | |
| "grad_norm": 2.1540791988372803, | |
| "learning_rate": 0.00012126360169304547, | |
| "loss": 0.036, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.06604260337583844, | |
| "eval_1_ratio_diff": 0.017147310989867437, | |
| "eval_accuracy": 0.8862042088854248, | |
| "eval_f1": 0.8880368098159509, | |
| "eval_loss": 0.49060943722724915, | |
| "eval_precision": 0.8733031674208145, | |
| "eval_recall": 0.9032761310452418, | |
| "eval_runtime": 1439.5517, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.06619001990123093, | |
| "grad_norm": 0.7136353850364685, | |
| "learning_rate": 0.00012126351055509399, | |
| "loss": 1.4136, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.06633743642662343, | |
| "grad_norm": 1.6063231229782104, | |
| "learning_rate": 0.00012126341921398221, | |
| "loss": 0.0358, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.06648485295201592, | |
| "grad_norm": 4.673253536224365, | |
| "learning_rate": 0.00012126332766971038, | |
| "loss": 0.0494, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.06663226947740841, | |
| "grad_norm": 0.21607956290245056, | |
| "learning_rate": 0.00012126323592227886, | |
| "loss": 0.0053, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.06677968600280092, | |
| "grad_norm": 32.70335006713867, | |
| "learning_rate": 0.00012126314397168796, | |
| "loss": 1.5106, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.06692710252819341, | |
| "grad_norm": 38.56415557861328, | |
| "learning_rate": 0.00012126305181793794, | |
| "loss": 0.8798, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.06707451905358591, | |
| "grad_norm": 0.018692007288336754, | |
| "learning_rate": 0.00012126295946102917, | |
| "loss": 0.0004, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.0672219355789784, | |
| "grad_norm": 22.49344825744629, | |
| "learning_rate": 0.00012126286690096191, | |
| "loss": 0.9364, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.0672219355789784, | |
| "eval_1_ratio_diff": -0.18706157443491817, | |
| "eval_accuracy": 0.7802026500389712, | |
| "eval_f1": 0.7293666026871402, | |
| "eval_loss": 1.4581658840179443, | |
| "eval_precision": 0.9476309226932669, | |
| "eval_recall": 0.592823712948518, | |
| "eval_runtime": 1440.0947, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.0673693521043709, | |
| "grad_norm": 182.47938537597656, | |
| "learning_rate": 0.00012126277413773649, | |
| "loss": 1.0293, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.0675167686297634, | |
| "grad_norm": 4.0591816902160645, | |
| "learning_rate": 0.00012126268117135323, | |
| "loss": 0.0308, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.06766418515515589, | |
| "grad_norm": 6.380730628967285, | |
| "learning_rate": 0.00012126258800181242, | |
| "loss": 1.2327, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.0678116016805484, | |
| "grad_norm": 31.462488174438477, | |
| "learning_rate": 0.00012126249462911438, | |
| "loss": 0.8761, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.06795901820594089, | |
| "grad_norm": 0.0057801539078354836, | |
| "learning_rate": 0.00012126240105325944, | |
| "loss": 0.0077, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.06810643473133338, | |
| "grad_norm": 1.9855010509490967, | |
| "learning_rate": 0.0001212623072742479, | |
| "loss": 0.0239, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.06825385125672588, | |
| "grad_norm": 0.4254453480243683, | |
| "learning_rate": 0.00012126221329208006, | |
| "loss": 0.0032, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.06840126778211837, | |
| "grad_norm": 0.502257227897644, | |
| "learning_rate": 0.00012126211910675626, | |
| "loss": 0.0103, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.06840126778211837, | |
| "eval_1_ratio_diff": 0.026500389711613392, | |
| "eval_accuracy": 0.8768511301636789, | |
| "eval_f1": 0.8799392097264438, | |
| "eval_loss": 0.7347307205200195, | |
| "eval_precision": 0.8577777777777778, | |
| "eval_recall": 0.9032761310452418, | |
| "eval_runtime": 1440.0126, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.06854868430751088, | |
| "grad_norm": 0.6557896733283997, | |
| "learning_rate": 0.00012126202471827679, | |
| "loss": 0.003, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.06869610083290337, | |
| "grad_norm": 0.020085789263248444, | |
| "learning_rate": 0.00012126193012664201, | |
| "loss": 0.0004, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.06884351735829586, | |
| "grad_norm": 0.006013574078679085, | |
| "learning_rate": 0.00012126183533185218, | |
| "loss": 0.0001, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.06899093388368836, | |
| "grad_norm": 252.07472229003906, | |
| "learning_rate": 0.00012126174033390767, | |
| "loss": 0.8075, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.06913835040908085, | |
| "grad_norm": 0.002460025018081069, | |
| "learning_rate": 0.00012126164513280875, | |
| "loss": 0.0011, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.06928576693447336, | |
| "grad_norm": 0.01136123575270176, | |
| "learning_rate": 0.00012126154972855578, | |
| "loss": 0.0003, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.06943318345986585, | |
| "grad_norm": 37.112640380859375, | |
| "learning_rate": 0.00012126145412114907, | |
| "loss": 2.9468, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.06958059998525835, | |
| "grad_norm": 0.006933971308171749, | |
| "learning_rate": 0.00012126135831058891, | |
| "loss": 0.0001, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.06958059998525835, | |
| "eval_1_ratio_diff": 0.2478565861262666, | |
| "eval_accuracy": 0.7443491816056118, | |
| "eval_f1": 0.795, | |
| "eval_loss": 2.4577670097351074, | |
| "eval_precision": 0.6631908237747653, | |
| "eval_recall": 0.9921996879875195, | |
| "eval_runtime": 1439.5508, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.06972801651065084, | |
| "grad_norm": 36.87862777709961, | |
| "learning_rate": 0.00012126126229687566, | |
| "loss": 5.0295, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.06987543303604334, | |
| "grad_norm": 70.17023468017578, | |
| "learning_rate": 0.00012126116608000961, | |
| "loss": 4.0308, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.07002284956143584, | |
| "grad_norm": 37.03538513183594, | |
| "learning_rate": 0.00012126106965999112, | |
| "loss": 1.8733, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.07017026608682833, | |
| "grad_norm": 66.47712707519531, | |
| "learning_rate": 0.00012126097303682048, | |
| "loss": 4.2016, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.07031768261222084, | |
| "grad_norm": 29.390884399414062, | |
| "learning_rate": 0.00012126087621049803, | |
| "loss": 1.9788, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.07046509913761333, | |
| "grad_norm": 10.997523307800293, | |
| "learning_rate": 0.00012126077918102409, | |
| "loss": 0.1381, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.07061251566300582, | |
| "grad_norm": 38.46750259399414, | |
| "learning_rate": 0.00012126068194839898, | |
| "loss": 0.8822, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.07075993218839832, | |
| "grad_norm": 18.62594985961914, | |
| "learning_rate": 0.00012126058451262304, | |
| "loss": 0.3758, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.07075993218839832, | |
| "eval_1_ratio_diff": -0.4505066250974279, | |
| "eval_accuracy": 0.5494933749025721, | |
| "eval_f1": 0.17897727272727273, | |
| "eval_loss": 0.5460181832313538, | |
| "eval_precision": 1.0, | |
| "eval_recall": 0.09828393135725429, | |
| "eval_runtime": 1440.4539, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.07090734871379081, | |
| "grad_norm": 21.68712615966797, | |
| "learning_rate": 0.00012126048687369658, | |
| "loss": 0.3891, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.07105476523918332, | |
| "grad_norm": 7.1598124504089355, | |
| "learning_rate": 0.00012126038903161995, | |
| "loss": 0.3555, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.07120218176457581, | |
| "grad_norm": 28.80471420288086, | |
| "learning_rate": 0.00012126029098639344, | |
| "loss": 0.9078, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.0713495982899683, | |
| "grad_norm": 18.606401443481445, | |
| "learning_rate": 0.00012126019273801743, | |
| "loss": 0.2927, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.0714970148153608, | |
| "grad_norm": 21.51089859008789, | |
| "learning_rate": 0.0001212600942864922, | |
| "loss": 0.6348, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.0716444313407533, | |
| "grad_norm": 4.713807582855225, | |
| "learning_rate": 0.00012125999563181809, | |
| "loss": 0.5351, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.0717918478661458, | |
| "grad_norm": 11.428181648254395, | |
| "learning_rate": 0.00012125989677399546, | |
| "loss": 0.2465, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.07193926439153829, | |
| "grad_norm": 13.697668075561523, | |
| "learning_rate": 0.00012125979771302464, | |
| "loss": 0.1411, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.07193926439153829, | |
| "eval_1_ratio_diff": 0.03897116134060796, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.8888888888888888, | |
| "eval_loss": 0.38700371980667114, | |
| "eval_precision": 0.8567293777134588, | |
| "eval_recall": 0.9235569422776911, | |
| "eval_runtime": 1440.5564, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.07208668091693078, | |
| "grad_norm": 3.2123868465423584, | |
| "learning_rate": 0.0001212596984489059, | |
| "loss": 0.0295, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.07223409744232329, | |
| "grad_norm": 0.026355383917689323, | |
| "learning_rate": 0.00012125959898163965, | |
| "loss": 0.0005, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.07238151396771578, | |
| "grad_norm": 0.10228274017572403, | |
| "learning_rate": 0.00012125949931122618, | |
| "loss": 0.002, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.07252893049310828, | |
| "grad_norm": 0.031520161777734756, | |
| "learning_rate": 0.00012125939943766583, | |
| "loss": 0.0008, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.07267634701850077, | |
| "grad_norm": 0.1047026515007019, | |
| "learning_rate": 0.00012125929936095894, | |
| "loss": 0.0009, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.07282376354389328, | |
| "grad_norm": 30.88459587097168, | |
| "learning_rate": 0.00012125919908110585, | |
| "loss": 2.267, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.07297118006928577, | |
| "grad_norm": 0.029362376779317856, | |
| "learning_rate": 0.0001212590985981069, | |
| "loss": 0.0003, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.07311859659467826, | |
| "grad_norm": 0.2791018784046173, | |
| "learning_rate": 0.0001212589979119624, | |
| "loss": 0.0017, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.07311859659467826, | |
| "eval_1_ratio_diff": 0.05378020265003891, | |
| "eval_accuracy": 0.877630553390491, | |
| "eval_f1": 0.8837897853441895, | |
| "eval_loss": 0.7231972813606262, | |
| "eval_precision": 0.8408450704225352, | |
| "eval_recall": 0.9313572542901716, | |
| "eval_runtime": 1440.0578, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.07326601312007076, | |
| "grad_norm": 0.056903205811977386, | |
| "learning_rate": 0.00012125889702267272, | |
| "loss": 0.0007, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.07341342964546325, | |
| "grad_norm": 0.015094200149178505, | |
| "learning_rate": 0.00012125879593023818, | |
| "loss": 0.0002, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.07356084617085576, | |
| "grad_norm": 0.6008047461509705, | |
| "learning_rate": 0.00012125869463465912, | |
| "loss": 0.0045, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.07370826269624825, | |
| "grad_norm": 0.6626961827278137, | |
| "learning_rate": 0.00012125859313593587, | |
| "loss": 0.004, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.07385567922164074, | |
| "grad_norm": 0.009313930757343769, | |
| "learning_rate": 0.0001212584914340688, | |
| "loss": 0.0002, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.07400309574703325, | |
| "grad_norm": 0.01076335646212101, | |
| "learning_rate": 0.00012125838952905822, | |
| "loss": 0.0004, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.07415051227242574, | |
| "grad_norm": 0.008014670573174953, | |
| "learning_rate": 0.00012125828742090447, | |
| "loss": 0.0001, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.07429792879781824, | |
| "grad_norm": 33.344932556152344, | |
| "learning_rate": 0.00012125818510960795, | |
| "loss": 2.0841, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.07429792879781824, | |
| "eval_1_ratio_diff": 0.08261886204208879, | |
| "eval_accuracy": 0.8643803585346843, | |
| "eval_f1": 0.8746397694524496, | |
| "eval_loss": 0.954525887966156, | |
| "eval_precision": 0.8125836680053548, | |
| "eval_recall": 0.9469578783151326, | |
| "eval_runtime": 1439.8312, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.07444534532321073, | |
| "grad_norm": 0.009119726717472076, | |
| "learning_rate": 0.00012125808259516893, | |
| "loss": 2.2253, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.07459276184860322, | |
| "grad_norm": 0.08696369081735611, | |
| "learning_rate": 0.00012125797987758778, | |
| "loss": 0.0009, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.07474017837399573, | |
| "grad_norm": 26.136661529541016, | |
| "learning_rate": 0.00012125787695686484, | |
| "loss": 1.5774, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.07488759489938822, | |
| "grad_norm": 32.24976348876953, | |
| "learning_rate": 0.00012125777383300048, | |
| "loss": 1.1735, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.07503501142478072, | |
| "grad_norm": 0.5457736253738403, | |
| "learning_rate": 0.00012125767050599501, | |
| "loss": 0.0112, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.07518242795017321, | |
| "grad_norm": 0.7166759967803955, | |
| "learning_rate": 0.0001212575669758488, | |
| "loss": 0.2859, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.0753298444755657, | |
| "grad_norm": 0.32718005776405334, | |
| "learning_rate": 0.00012125746324256221, | |
| "loss": 1.5148, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.07547726100095821, | |
| "grad_norm": 88.85284423828125, | |
| "learning_rate": 0.00012125735930613554, | |
| "loss": 2.6444, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.07547726100095821, | |
| "eval_1_ratio_diff": 0.024162120031176904, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8865194211728865, | |
| "eval_loss": 0.48203912377357483, | |
| "eval_precision": 0.8660714285714286, | |
| "eval_recall": 0.9079563182527302, | |
| "eval_runtime": 1439.7938, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.0756246775263507, | |
| "grad_norm": 0.2224024385213852, | |
| "learning_rate": 0.00012125725516656918, | |
| "loss": 0.0068, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.0757720940517432, | |
| "grad_norm": 0.2110309898853302, | |
| "learning_rate": 0.00012125715082386346, | |
| "loss": 0.0036, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.0759195105771357, | |
| "grad_norm": 0.20480689406394958, | |
| "learning_rate": 0.00012125704627801874, | |
| "loss": 0.0101, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.07606692710252819, | |
| "grad_norm": 24.321718215942383, | |
| "learning_rate": 0.00012125694152903538, | |
| "loss": 2.3569, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.07621434362792069, | |
| "grad_norm": 0.3324243426322937, | |
| "learning_rate": 0.00012125683657691368, | |
| "loss": 0.0101, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.07636176015331318, | |
| "grad_norm": 1.0518757104873657, | |
| "learning_rate": 0.00012125673142165406, | |
| "loss": 0.013, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.07650917667870569, | |
| "grad_norm": 25.96786880493164, | |
| "learning_rate": 0.00012125662606325683, | |
| "loss": 1.3031, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.07665659320409818, | |
| "grad_norm": 0.12808893620967865, | |
| "learning_rate": 0.00012125652050172236, | |
| "loss": 0.0051, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.07665659320409818, | |
| "eval_1_ratio_diff": -0.05689789555728764, | |
| "eval_accuracy": 0.8698363211223694, | |
| "eval_f1": 0.8618693134822167, | |
| "eval_loss": 0.5904788970947266, | |
| "eval_precision": 0.9172535211267606, | |
| "eval_recall": 0.8127925117004681, | |
| "eval_runtime": 1440.2519, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.07680400972949068, | |
| "grad_norm": 30.74445343017578, | |
| "learning_rate": 0.00012125641473705098, | |
| "loss": 1.479, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.07695142625488317, | |
| "grad_norm": 0.33330148458480835, | |
| "learning_rate": 0.00012125630876924309, | |
| "loss": 2.7544, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.07709884278027566, | |
| "grad_norm": 0.4779714047908783, | |
| "learning_rate": 0.00012125620259829898, | |
| "loss": 0.0059, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.07724625930566817, | |
| "grad_norm": 0.4376041889190674, | |
| "learning_rate": 0.00012125609622421907, | |
| "loss": 0.0273, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.07739367583106066, | |
| "grad_norm": 0.14147210121154785, | |
| "learning_rate": 0.00012125598964700367, | |
| "loss": 1.3617, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.07754109235645316, | |
| "grad_norm": 20.751298904418945, | |
| "learning_rate": 0.00012125588286665319, | |
| "loss": 2.4864, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.07768850888184566, | |
| "grad_norm": 0.3589191138744354, | |
| "learning_rate": 0.00012125577588316793, | |
| "loss": 0.0102, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.07783592540723815, | |
| "grad_norm": 31.519622802734375, | |
| "learning_rate": 0.00012125566869654828, | |
| "loss": 1.8161, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.07783592540723815, | |
| "eval_1_ratio_diff": -0.05455962587685115, | |
| "eval_accuracy": 0.8472330475448169, | |
| "eval_f1": 0.8382838283828383, | |
| "eval_loss": 0.5423593521118164, | |
| "eval_precision": 0.8896672504378283, | |
| "eval_recall": 0.7925117004680188, | |
| "eval_runtime": 1440.6162, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.07798334193263065, | |
| "grad_norm": 29.618946075439453, | |
| "learning_rate": 0.00012125556130679457, | |
| "loss": 0.9249, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.07813075845802314, | |
| "grad_norm": 24.92931365966797, | |
| "learning_rate": 0.0001212554537139072, | |
| "loss": 1.3237, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.07827817498341565, | |
| "grad_norm": 6.922366142272949, | |
| "learning_rate": 0.00012125534591788653, | |
| "loss": 0.0954, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.07842559150880814, | |
| "grad_norm": 1.4033849239349365, | |
| "learning_rate": 0.00012125523791873287, | |
| "loss": 0.1059, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.07857300803420063, | |
| "grad_norm": 0.5430750846862793, | |
| "learning_rate": 0.00012125512971644664, | |
| "loss": 0.0167, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.07872042455959313, | |
| "grad_norm": 25.31169319152832, | |
| "learning_rate": 0.00012125502131102817, | |
| "loss": 1.4498, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.07886784108498562, | |
| "grad_norm": 24.96006965637207, | |
| "learning_rate": 0.00012125491270247783, | |
| "loss": 1.3258, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.07901525761037813, | |
| "grad_norm": 1.3635300397872925, | |
| "learning_rate": 0.000121254803890796, | |
| "loss": 0.0339, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.07901525761037813, | |
| "eval_1_ratio_diff": -0.029618082618862063, | |
| "eval_accuracy": 0.8581449727201871, | |
| "eval_f1": 0.8536977491961415, | |
| "eval_loss": 0.6108663082122803, | |
| "eval_precision": 0.8805970149253731, | |
| "eval_recall": 0.828393135725429, | |
| "eval_runtime": 1440.3267, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.07916267413577062, | |
| "grad_norm": 41.34056091308594, | |
| "learning_rate": 0.00012125469487598301, | |
| "loss": 1.665, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.07931009066116311, | |
| "grad_norm": 0.8467972278594971, | |
| "learning_rate": 0.00012125458565803925, | |
| "loss": 0.0102, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.07945750718655561, | |
| "grad_norm": 0.09642868489027023, | |
| "learning_rate": 0.00012125447623696508, | |
| "loss": 0.0053, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.0796049237119481, | |
| "grad_norm": 0.06861916184425354, | |
| "learning_rate": 0.00012125436661276089, | |
| "loss": 0.5701, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.07975234023734061, | |
| "grad_norm": 22.139467239379883, | |
| "learning_rate": 0.000121254256785427, | |
| "loss": 1.088, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.0798997567627331, | |
| "grad_norm": 0.037754353135824203, | |
| "learning_rate": 0.00012125414675496381, | |
| "loss": 0.0046, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.08004717328812559, | |
| "grad_norm": 32.785037994384766, | |
| "learning_rate": 0.00012125403652137169, | |
| "loss": 0.956, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.0801945898135181, | |
| "grad_norm": 21.96536636352539, | |
| "learning_rate": 0.000121253926084651, | |
| "loss": 3.1959, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.0801945898135181, | |
| "eval_1_ratio_diff": -0.04364770070148094, | |
| "eval_accuracy": 0.8659392049883087, | |
| "eval_f1": 0.8597063621533442, | |
| "eval_loss": 0.6232408285140991, | |
| "eval_precision": 0.9008547008547009, | |
| "eval_recall": 0.8221528861154446, | |
| "eval_runtime": 1440.7525, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.08034200633891059, | |
| "grad_norm": 0.09767896682024002, | |
| "learning_rate": 0.00012125381544480211, | |
| "loss": 1.9769, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.08048942286430309, | |
| "grad_norm": 4.43467378616333, | |
| "learning_rate": 0.0001212537046018254, | |
| "loss": 1.8357, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.08063683938969558, | |
| "grad_norm": 22.267379760742188, | |
| "learning_rate": 0.00012125359355572121, | |
| "loss": 0.9555, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.08078425591508809, | |
| "grad_norm": 0.7281066179275513, | |
| "learning_rate": 0.00012125348230648997, | |
| "loss": 0.0391, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.08093167244048058, | |
| "grad_norm": 0.4542294442653656, | |
| "learning_rate": 0.000121253370854132, | |
| "loss": 0.9409, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.08107908896587307, | |
| "grad_norm": 22.38312339782715, | |
| "learning_rate": 0.0001212532591986477, | |
| "loss": 1.0271, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.08122650549126557, | |
| "grad_norm": 26.339080810546875, | |
| "learning_rate": 0.00012125314734003743, | |
| "loss": 1.4919, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.08137392201665807, | |
| "grad_norm": 3.4797956943511963, | |
| "learning_rate": 0.00012125303527830157, | |
| "loss": 0.0342, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.08137392201665807, | |
| "eval_1_ratio_diff": -0.15354637568199536, | |
| "eval_accuracy": 0.7887763055339049, | |
| "eval_f1": 0.7502304147465437, | |
| "eval_loss": 0.8048840761184692, | |
| "eval_precision": 0.9166666666666666, | |
| "eval_recall": 0.6349453978159126, | |
| "eval_runtime": 1440.2595, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.08152133854205057, | |
| "grad_norm": 27.387937545776367, | |
| "learning_rate": 0.0001212529230134405, | |
| "loss": 2.2878, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.08166875506744306, | |
| "grad_norm": 0.27098074555397034, | |
| "learning_rate": 0.00012125281054545459, | |
| "loss": 0.0115, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.08181617159283555, | |
| "grad_norm": 0.17622074484825134, | |
| "learning_rate": 0.00012125269787434425, | |
| "loss": 0.9066, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.08196358811822806, | |
| "grad_norm": 13.168516159057617, | |
| "learning_rate": 0.00012125258500010979, | |
| "loss": 1.0468, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.08211100464362055, | |
| "grad_norm": 15.512298583984375, | |
| "learning_rate": 0.00012125247192275165, | |
| "loss": 0.1845, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.08225842116901305, | |
| "grad_norm": 31.888328552246094, | |
| "learning_rate": 0.00012125235864227018, | |
| "loss": 0.3136, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.08240583769440554, | |
| "grad_norm": 9.891843795776367, | |
| "learning_rate": 0.00012125224515866574, | |
| "loss": 0.8436, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.08255325421979803, | |
| "grad_norm": 46.16787338256836, | |
| "learning_rate": 0.00012125213147193877, | |
| "loss": 2.5811, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.08255325421979803, | |
| "eval_1_ratio_diff": 0.07560405300077944, | |
| "eval_accuracy": 0.848012470771629, | |
| "eval_f1": 0.8585931834662799, | |
| "eval_loss": 0.6410078406333923, | |
| "eval_precision": 0.8021680216802168, | |
| "eval_recall": 0.9235569422776911, | |
| "eval_runtime": 1440.0548, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.08270067074519054, | |
| "grad_norm": 7.604285717010498, | |
| "learning_rate": 0.00012125201758208962, | |
| "loss": 1.1177, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.08284808727058303, | |
| "grad_norm": 0.39338427782058716, | |
| "learning_rate": 0.00012125190348911864, | |
| "loss": 1.5911, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.08299550379597553, | |
| "grad_norm": 36.94788360595703, | |
| "learning_rate": 0.00012125178919302626, | |
| "loss": 1.3629, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.08314292032136802, | |
| "grad_norm": 0.8372169137001038, | |
| "learning_rate": 0.00012125167469381283, | |
| "loss": 0.0102, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.08329033684676052, | |
| "grad_norm": 0.12225531786680222, | |
| "learning_rate": 0.00012125155999147876, | |
| "loss": 0.0043, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.08343775337215302, | |
| "grad_norm": 62.011695861816406, | |
| "learning_rate": 0.0001212514450860244, | |
| "loss": 1.6697, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.08358516989754551, | |
| "grad_norm": 0.02834857441484928, | |
| "learning_rate": 0.00012125132997745018, | |
| "loss": 0.0044, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.08373258642293802, | |
| "grad_norm": 0.007508635055273771, | |
| "learning_rate": 0.00012125121466575647, | |
| "loss": 0.0067, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.08373258642293802, | |
| "eval_1_ratio_diff": -0.059236165237724125, | |
| "eval_accuracy": 0.8534684333593141, | |
| "eval_f1": 0.8441127694859039, | |
| "eval_loss": 0.8524520993232727, | |
| "eval_precision": 0.9008849557522124, | |
| "eval_recall": 0.7940717628705148, | |
| "eval_runtime": 1440.1118, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.0838800029483305, | |
| "grad_norm": 70.38623046875, | |
| "learning_rate": 0.00012125109915094362, | |
| "loss": 2.694, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.08402741947372301, | |
| "grad_norm": 1.548732042312622, | |
| "learning_rate": 0.00012125098343301206, | |
| "loss": 0.0213, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.0841748359991155, | |
| "grad_norm": 1.2770323753356934, | |
| "learning_rate": 0.00012125086751196217, | |
| "loss": 0.0099, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.08432225252450799, | |
| "grad_norm": 30.610591888427734, | |
| "learning_rate": 0.00012125075138779432, | |
| "loss": 2.0352, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.0844696690499005, | |
| "grad_norm": 0.8128361701965332, | |
| "learning_rate": 0.0001212506350605089, | |
| "loss": 1.0719, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.08461708557529299, | |
| "grad_norm": 1.6853057146072388, | |
| "learning_rate": 0.00012125051853010634, | |
| "loss": 0.0092, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.0847645021006855, | |
| "grad_norm": 39.670047760009766, | |
| "learning_rate": 0.000121250401796587, | |
| "loss": 1.7653, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.08491191862607798, | |
| "grad_norm": 36.04311752319336, | |
| "learning_rate": 0.00012125028485995127, | |
| "loss": 1.3473, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.08491191862607798, | |
| "eval_1_ratio_diff": 0.03897116134060796, | |
| "eval_accuracy": 0.8737334372564303, | |
| "eval_f1": 0.8783783783783784, | |
| "eval_loss": 0.6749188899993896, | |
| "eval_precision": 0.8465991316931982, | |
| "eval_recall": 0.9126365054602185, | |
| "eval_runtime": 1440.4215, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.08505933515147047, | |
| "grad_norm": 0.17764577269554138, | |
| "learning_rate": 0.00012125016772019952, | |
| "loss": 0.0023, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.08520675167686298, | |
| "grad_norm": 0.3527587652206421, | |
| "learning_rate": 0.0001212500503773322, | |
| "loss": 0.0055, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.08535416820225547, | |
| "grad_norm": 0.1379138082265854, | |
| "learning_rate": 0.00012124993283134963, | |
| "loss": 1.6429, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.08550158472764798, | |
| "grad_norm": 0.14264832437038422, | |
| "learning_rate": 0.0001212498150822523, | |
| "loss": 0.0089, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.08564900125304047, | |
| "grad_norm": 30.086095809936523, | |
| "learning_rate": 0.00012124969713004051, | |
| "loss": 2.4261, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.08579641777843296, | |
| "grad_norm": 0.26527953147888184, | |
| "learning_rate": 0.00012124957897471469, | |
| "loss": 0.6917, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.08594383430382546, | |
| "grad_norm": 8.70952320098877, | |
| "learning_rate": 0.00012124946061627526, | |
| "loss": 0.0826, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.08609125082921795, | |
| "grad_norm": 0.031940966844558716, | |
| "learning_rate": 0.0001212493420547226, | |
| "loss": 0.0008, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.08609125082921795, | |
| "eval_1_ratio_diff": -0.010132501948558081, | |
| "eval_accuracy": 0.8636009353078722, | |
| "eval_f1": 0.8620961386918834, | |
| "eval_loss": 0.5565428137779236, | |
| "eval_precision": 0.8710191082802548, | |
| "eval_recall": 0.8533541341653667, | |
| "eval_runtime": 1440.6772, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.08623866735461046, | |
| "grad_norm": 1.1032943725585938, | |
| "learning_rate": 0.0001212492232900571, | |
| "loss": 0.0135, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.08638608388000295, | |
| "grad_norm": 0.6731190085411072, | |
| "learning_rate": 0.00012124910432227916, | |
| "loss": 0.0145, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.08653350040539544, | |
| "grad_norm": 0.2941815257072449, | |
| "learning_rate": 0.00012124898515138918, | |
| "loss": 0.005, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.08668091693078794, | |
| "grad_norm": 0.060058582574129105, | |
| "learning_rate": 0.00012124886577738757, | |
| "loss": 0.0024, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.08682833345618043, | |
| "grad_norm": 0.029819436371326447, | |
| "learning_rate": 0.0001212487462002747, | |
| "loss": 0.0015, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.08697574998157294, | |
| "grad_norm": 0.1549704670906067, | |
| "learning_rate": 0.000121248626420051, | |
| "loss": 0.0023, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.08712316650696543, | |
| "grad_norm": 1.1005401611328125, | |
| "learning_rate": 0.00012124850643671686, | |
| "loss": 0.0065, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.08727058303235792, | |
| "grad_norm": 200.2630157470703, | |
| "learning_rate": 0.00012124838625027271, | |
| "loss": 0.7416, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.08727058303235792, | |
| "eval_1_ratio_diff": 0.04520654715510519, | |
| "eval_accuracy": 0.8752922837100545, | |
| "eval_f1": 0.8805970149253731, | |
| "eval_loss": 0.8647755980491638, | |
| "eval_precision": 0.844062947067239, | |
| "eval_recall": 0.9204368174726989, | |
| "eval_runtime": 1441.0897, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.08741799955775043, | |
| "grad_norm": 0.012469271197915077, | |
| "learning_rate": 0.0001212482658607189, | |
| "loss": 0.0003, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.08756541608314292, | |
| "grad_norm": 0.017095841467380524, | |
| "learning_rate": 0.00012124814526805586, | |
| "loss": 0.0003, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.08771283260853542, | |
| "grad_norm": 23.186222076416016, | |
| "learning_rate": 0.00012124802447228401, | |
| "loss": 2.0149, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.08786024913392791, | |
| "grad_norm": 0.010486994870007038, | |
| "learning_rate": 0.00012124790347340374, | |
| "loss": 0.0006, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.08800766565932042, | |
| "grad_norm": 40.754051208496094, | |
| "learning_rate": 0.00012124778227141545, | |
| "loss": 2.8077, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.08815508218471291, | |
| "grad_norm": 0.08611409366130829, | |
| "learning_rate": 0.00012124766086631955, | |
| "loss": 0.0013, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.0883024987101054, | |
| "grad_norm": 0.28396108746528625, | |
| "learning_rate": 0.00012124753925811646, | |
| "loss": 2.2785, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.0884499152354979, | |
| "grad_norm": 0.03215723857283592, | |
| "learning_rate": 0.00012124741744680656, | |
| "loss": 0.0026, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.0884499152354979, | |
| "eval_1_ratio_diff": 0.04598597038191732, | |
| "eval_accuracy": 0.877630553390491, | |
| "eval_f1": 0.8829231916480239, | |
| "eval_loss": 0.7880816459655762, | |
| "eval_precision": 0.8457142857142858, | |
| "eval_recall": 0.9235569422776911, | |
| "eval_runtime": 1441.3958, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.0885973317608904, | |
| "grad_norm": 0.03621472418308258, | |
| "learning_rate": 0.00012124729543239029, | |
| "loss": 0.7748, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.0887447482862829, | |
| "grad_norm": 0.09097783267498016, | |
| "learning_rate": 0.00012124717321486803, | |
| "loss": 1.8821, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.08889216481167539, | |
| "grad_norm": 0.3395259976387024, | |
| "learning_rate": 0.00012124705079424022, | |
| "loss": 0.0073, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.08903958133706788, | |
| "grad_norm": 0.04736631363630295, | |
| "learning_rate": 0.00012124692817050723, | |
| "loss": 1.567, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.08918699786246038, | |
| "grad_norm": 0.08807298541069031, | |
| "learning_rate": 0.00012124680534366952, | |
| "loss": 0.0014, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.08933441438785288, | |
| "grad_norm": 0.05549991875886917, | |
| "learning_rate": 0.00012124668231372745, | |
| "loss": 0.0021, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.08948183091324538, | |
| "grad_norm": 0.06815358251333237, | |
| "learning_rate": 0.00012124655908068146, | |
| "loss": 0.9174, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.08962924743863787, | |
| "grad_norm": 0.03639994189143181, | |
| "learning_rate": 0.00012124643564453199, | |
| "loss": 0.0199, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.08962924743863787, | |
| "eval_1_ratio_diff": 0.020265003897116163, | |
| "eval_accuracy": 0.8628215120810601, | |
| "eval_f1": 0.8654434250764526, | |
| "eval_loss": 0.648876965045929, | |
| "eval_precision": 0.848575712143928, | |
| "eval_recall": 0.8829953198127926, | |
| "eval_runtime": 1441.2405, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.08977666396403036, | |
| "grad_norm": 23.13437271118164, | |
| "learning_rate": 0.00012124631200527941, | |
| "loss": 1.6889, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.08992408048942287, | |
| "grad_norm": 0.2734740674495697, | |
| "learning_rate": 0.00012124618816292414, | |
| "loss": 0.0059, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.09007149701481536, | |
| "grad_norm": 12.39369010925293, | |
| "learning_rate": 0.00012124606411746661, | |
| "loss": 0.0533, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.09021891354020786, | |
| "grad_norm": 0.036048658192157745, | |
| "learning_rate": 0.00012124593986890722, | |
| "loss": 0.0011, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.09036633006560035, | |
| "grad_norm": 0.3171124756336212, | |
| "learning_rate": 0.00012124581541724642, | |
| "loss": 1.5207, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.09051374659099284, | |
| "grad_norm": 4.317696571350098, | |
| "learning_rate": 0.00012124569076248459, | |
| "loss": 1.5358, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.09066116311638535, | |
| "grad_norm": 0.12044669687747955, | |
| "learning_rate": 0.00012124556590462215, | |
| "loss": 0.0053, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.09080857964177784, | |
| "grad_norm": 0.21298988163471222, | |
| "learning_rate": 0.00012124544084365953, | |
| "loss": 0.0081, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.09080857964177784, | |
| "eval_1_ratio_diff": 0.009353078721745844, | |
| "eval_accuracy": 0.8222915042868277, | |
| "eval_f1": 0.8238021638330757, | |
| "eval_loss": 0.7862046360969543, | |
| "eval_precision": 0.8162327718223583, | |
| "eval_recall": 0.8315132605304212, | |
| "eval_runtime": 1439.2896, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.09095599616717034, | |
| "grad_norm": 20.541194915771484, | |
| "learning_rate": 0.00012124531557959717, | |
| "loss": 1.194, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.09110341269256284, | |
| "grad_norm": 0.2897285223007202, | |
| "learning_rate": 0.00012124519011243545, | |
| "loss": 0.8952, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.09125082921795533, | |
| "grad_norm": 0.08111666887998581, | |
| "learning_rate": 0.0001212450644421748, | |
| "loss": 0.006, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.09139824574334783, | |
| "grad_norm": 0.4867294430732727, | |
| "learning_rate": 0.00012124493856881568, | |
| "loss": 1.7795, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.09154566226874032, | |
| "grad_norm": 0.9198406934738159, | |
| "learning_rate": 0.00012124481249235846, | |
| "loss": 0.0259, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.09169307879413283, | |
| "grad_norm": 0.08149991929531097, | |
| "learning_rate": 0.0001212446862128036, | |
| "loss": 1.2016, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.09184049531952532, | |
| "grad_norm": 0.1457146853208542, | |
| "learning_rate": 0.0001212445597301515, | |
| "loss": 0.9302, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.09198791184491782, | |
| "grad_norm": 0.24497820436954498, | |
| "learning_rate": 0.00012124443304440259, | |
| "loss": 0.0051, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.09198791184491782, | |
| "eval_1_ratio_diff": 0.02260327357755254, | |
| "eval_accuracy": 0.8339828526890102, | |
| "eval_f1": 0.8375286041189931, | |
| "eval_loss": 0.747604489326477, | |
| "eval_precision": 0.8194029850746268, | |
| "eval_recall": 0.8564742589703588, | |
| "eval_runtime": 1440.6099, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.09213532837031031, | |
| "grad_norm": 0.10772482305765152, | |
| "learning_rate": 0.0001212443061555573, | |
| "loss": 0.0032, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.0922827448957028, | |
| "grad_norm": 3.8056480884552, | |
| "learning_rate": 0.00012124417906361605, | |
| "loss": 0.838, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.09243016142109531, | |
| "grad_norm": 21.590364456176758, | |
| "learning_rate": 0.00012124405176857927, | |
| "loss": 2.5474, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.0925775779464878, | |
| "grad_norm": 21.33682632446289, | |
| "learning_rate": 0.00012124392427044737, | |
| "loss": 2.7454, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.0927249944718803, | |
| "grad_norm": 0.21534398198127747, | |
| "learning_rate": 0.00012124379656922081, | |
| "loss": 0.0068, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.0928724109972728, | |
| "grad_norm": 20.76007843017578, | |
| "learning_rate": 0.0001212436686649, | |
| "loss": 1.2547, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.09301982752266529, | |
| "grad_norm": 20.636024475097656, | |
| "learning_rate": 0.00012124354055748535, | |
| "loss": 1.5976, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.09316724404805779, | |
| "grad_norm": 2.3518083095550537, | |
| "learning_rate": 0.00012124341224697731, | |
| "loss": 0.0369, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.09316724404805779, | |
| "eval_1_ratio_diff": -0.03117692907248637, | |
| "eval_accuracy": 0.8565861262665627, | |
| "eval_f1": 0.8518518518518519, | |
| "eval_loss": 0.43984636664390564, | |
| "eval_precision": 0.8801996672212978, | |
| "eval_recall": 0.8252730109204368, | |
| "eval_runtime": 1440.9991, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.09331466057345028, | |
| "grad_norm": 1.4304808378219604, | |
| "learning_rate": 0.0001212432837333763, | |
| "loss": 0.03, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.09346207709884279, | |
| "grad_norm": 0.6885532736778259, | |
| "learning_rate": 0.00012124315501668278, | |
| "loss": 0.7603, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.09360949362423528, | |
| "grad_norm": 0.5777420997619629, | |
| "learning_rate": 0.00012124302609689715, | |
| "loss": 1.1026, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.09375691014962777, | |
| "grad_norm": 1.5885238647460938, | |
| "learning_rate": 0.00012124289697401986, | |
| "loss": 0.041, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.09390432667502027, | |
| "grad_norm": 0.37640276551246643, | |
| "learning_rate": 0.00012124276764805132, | |
| "loss": 0.0182, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.09405174320041276, | |
| "grad_norm": 25.54754066467285, | |
| "learning_rate": 0.00012124263811899196, | |
| "loss": 1.2952, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.09419915972580527, | |
| "grad_norm": 41.04960632324219, | |
| "learning_rate": 0.00012124250838684226, | |
| "loss": 2.126, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.09434657625119776, | |
| "grad_norm": 0.16556452214717865, | |
| "learning_rate": 0.00012124237845160263, | |
| "loss": 0.0078, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.09434657625119776, | |
| "eval_1_ratio_diff": -0.08573655494933752, | |
| "eval_accuracy": 0.8487918939984411, | |
| "eval_f1": 0.8344709897610921, | |
| "eval_loss": 0.6657168865203857, | |
| "eval_precision": 0.9209039548022598, | |
| "eval_recall": 0.7628705148205929, | |
| "eval_runtime": 1440.6129, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.09449399277659025, | |
| "grad_norm": 17.6622314453125, | |
| "learning_rate": 0.00012124224831327347, | |
| "loss": 0.1561, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.09464140930198275, | |
| "grad_norm": 0.15980716049671173, | |
| "learning_rate": 0.00012124211797185528, | |
| "loss": 0.0042, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.09478882582737524, | |
| "grad_norm": 0.04221845418214798, | |
| "learning_rate": 0.00012124198742734845, | |
| "loss": 1.4535, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.09493624235276775, | |
| "grad_norm": 0.056126296520233154, | |
| "learning_rate": 0.00012124185667975342, | |
| "loss": 0.0031, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.09508365887816024, | |
| "grad_norm": 0.08041621744632721, | |
| "learning_rate": 0.00012124172572907067, | |
| "loss": 0.0018, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.09523107540355275, | |
| "grad_norm": 28.64826011657715, | |
| "learning_rate": 0.00012124159457530059, | |
| "loss": 1.6516, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.09537849192894524, | |
| "grad_norm": 0.31489408016204834, | |
| "learning_rate": 0.00012124146321844365, | |
| "loss": 0.0038, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.09552590845433773, | |
| "grad_norm": 1.7656670808792114, | |
| "learning_rate": 0.00012124133165850026, | |
| "loss": 0.0131, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.09552590845433773, | |
| "eval_1_ratio_diff": 0.04832424006235381, | |
| "eval_accuracy": 0.8176149649259548, | |
| "eval_f1": 0.8258928571428571, | |
| "eval_loss": 0.8926898241043091, | |
| "eval_precision": 0.7894736842105263, | |
| "eval_recall": 0.8658346333853354, | |
| "eval_runtime": 1440.8824, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.09567332497973023, | |
| "grad_norm": 0.3228819668292999, | |
| "learning_rate": 0.00012124119989547089, | |
| "loss": 0.8356, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.09582074150512272, | |
| "grad_norm": 58.03204345703125, | |
| "learning_rate": 0.00012124106792935597, | |
| "loss": 0.5161, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.09596815803051523, | |
| "grad_norm": 0.094666488468647, | |
| "learning_rate": 0.00012124093576015595, | |
| "loss": 0.0014, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.09611557455590772, | |
| "grad_norm": 0.054852358996868134, | |
| "learning_rate": 0.00012124080338787127, | |
| "loss": 0.0025, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.09626299108130021, | |
| "grad_norm": 2.4614083766937256, | |
| "learning_rate": 0.00012124067081250235, | |
| "loss": 0.0231, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.09641040760669271, | |
| "grad_norm": 0.13067440688610077, | |
| "learning_rate": 0.00012124053803404966, | |
| "loss": 0.0019, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.0965578241320852, | |
| "grad_norm": 0.05831296741962433, | |
| "learning_rate": 0.00012124040505251365, | |
| "loss": 1.1599, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.09670524065747771, | |
| "grad_norm": 22.675302505493164, | |
| "learning_rate": 0.00012124027186789477, | |
| "loss": 1.7971, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.09670524065747771, | |
| "eval_1_ratio_diff": -0.07638347622759162, | |
| "eval_accuracy": 0.8207326578332035, | |
| "eval_f1": 0.8057432432432432, | |
| "eval_loss": 0.9711058735847473, | |
| "eval_precision": 0.8784530386740331, | |
| "eval_recall": 0.7441497659906396, | |
| "eval_runtime": 1440.5355, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.0968526571828702, | |
| "grad_norm": 245.76840209960938, | |
| "learning_rate": 0.00012124013848019342, | |
| "loss": 2.4617, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.09700007370826269, | |
| "grad_norm": 21.968021392822266, | |
| "learning_rate": 0.00012124000488941008, | |
| "loss": 1.4503, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.0971474902336552, | |
| "grad_norm": 0.03653848171234131, | |
| "learning_rate": 0.00012123987109554522, | |
| "loss": 0.0015, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.09729490675904769, | |
| "grad_norm": 0.16115568578243256, | |
| "learning_rate": 0.00012123973709859925, | |
| "loss": 0.0201, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.09744232328444019, | |
| "grad_norm": 34.74784851074219, | |
| "learning_rate": 0.00012123960289857264, | |
| "loss": 1.092, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.09758973980983268, | |
| "grad_norm": 17.326068878173828, | |
| "learning_rate": 0.00012123946849546582, | |
| "loss": 0.0826, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.09773715633522517, | |
| "grad_norm": 22.532522201538086, | |
| "learning_rate": 0.00012123933388927926, | |
| "loss": 2.0905, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.09788457286061768, | |
| "grad_norm": 0.09820098429918289, | |
| "learning_rate": 0.0001212391990800134, | |
| "loss": 0.002, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.09788457286061768, | |
| "eval_1_ratio_diff": 0.05689789555728764, | |
| "eval_accuracy": 0.8106001558846454, | |
| "eval_f1": 0.8206642066420664, | |
| "eval_loss": 0.7345473170280457, | |
| "eval_precision": 0.7787114845938375, | |
| "eval_recall": 0.8673946957878315, | |
| "eval_runtime": 1439.7279, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.09803198938601017, | |
| "grad_norm": 0.11757276207208633, | |
| "learning_rate": 0.00012123906406766871, | |
| "loss": 0.0079, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.09817940591140267, | |
| "grad_norm": 24.76763153076172, | |
| "learning_rate": 0.00012123892885224563, | |
| "loss": 1.3389, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.09832682243679516, | |
| "grad_norm": 0.0959400087594986, | |
| "learning_rate": 0.0001212387934337446, | |
| "loss": 0.9421, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.09847423896218765, | |
| "grad_norm": 0.3935282826423645, | |
| "learning_rate": 0.00012123865781216609, | |
| "loss": 0.0104, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.09862165548758016, | |
| "grad_norm": 22.505558013916016, | |
| "learning_rate": 0.00012123852198751054, | |
| "loss": 0.7555, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.09876907201297265, | |
| "grad_norm": 1.3673774003982544, | |
| "learning_rate": 0.00012123838595977844, | |
| "loss": 0.0409, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.09891648853836515, | |
| "grad_norm": 0.6889051198959351, | |
| "learning_rate": 0.0001212382497289702, | |
| "loss": 0.0269, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.09906390506375765, | |
| "grad_norm": 0.2218835949897766, | |
| "learning_rate": 0.0001212381132950863, | |
| "loss": 0.9572, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.09906390506375765, | |
| "eval_1_ratio_diff": 0.07560405300077944, | |
| "eval_accuracy": 0.8277474668745128, | |
| "eval_f1": 0.8397389412617839, | |
| "eval_loss": 0.7541435360908508, | |
| "eval_precision": 0.7845528455284553, | |
| "eval_recall": 0.9032761310452418, | |
| "eval_runtime": 1440.149, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.09921132158915015, | |
| "grad_norm": 0.08860000967979431, | |
| "learning_rate": 0.0001212379766581272, | |
| "loss": 0.0038, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.09935873811454264, | |
| "grad_norm": 0.1549777388572693, | |
| "learning_rate": 0.00012123783981809338, | |
| "loss": 0.6904, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.09950615463993513, | |
| "grad_norm": 0.4857753813266754, | |
| "learning_rate": 0.00012123770277498524, | |
| "loss": 0.005, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.09965357116532764, | |
| "grad_norm": 0.5475670099258423, | |
| "learning_rate": 0.00012123756552880328, | |
| "loss": 0.0057, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.09980098769072013, | |
| "grad_norm": 0.8644952178001404, | |
| "learning_rate": 0.00012123742807954794, | |
| "loss": 2.7045, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.09994840421611263, | |
| "grad_norm": 0.15051943063735962, | |
| "learning_rate": 0.0001212372904272197, | |
| "loss": 0.7707, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.10009582074150512, | |
| "grad_norm": 0.04434569925069809, | |
| "learning_rate": 0.00012123715257181902, | |
| "loss": 0.0007, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.10024323726689761, | |
| "grad_norm": 0.03767779842019081, | |
| "learning_rate": 0.00012123701451334634, | |
| "loss": 1.7987, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.10024323726689761, | |
| "eval_1_ratio_diff": 0.10054559625876847, | |
| "eval_accuracy": 0.8589243959469992, | |
| "eval_f1": 0.8717221828490432, | |
| "eval_loss": 0.7392542958259583, | |
| "eval_precision": 0.7987012987012987, | |
| "eval_recall": 0.9594383775351014, | |
| "eval_runtime": 1439.9484, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.10039065379229012, | |
| "grad_norm": 0.032404810190200806, | |
| "learning_rate": 0.00012123687625180216, | |
| "loss": 1.3724, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.10053807031768261, | |
| "grad_norm": 0.02649116888642311, | |
| "learning_rate": 0.00012123673778718691, | |
| "loss": 1.3162, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.10068548684307511, | |
| "grad_norm": 0.120023712515831, | |
| "learning_rate": 0.00012123659911950106, | |
| "loss": 0.0026, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.1008329033684676, | |
| "grad_norm": 0.28818804025650024, | |
| "learning_rate": 0.00012123646024874507, | |
| "loss": 0.0048, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.1009803198938601, | |
| "grad_norm": 0.5911560654640198, | |
| "learning_rate": 0.00012123632117491944, | |
| "loss": 0.0142, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.1011277364192526, | |
| "grad_norm": 22.85379409790039, | |
| "learning_rate": 0.00012123618189802459, | |
| "loss": 1.8439, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.10127515294464509, | |
| "grad_norm": 0.37168049812316895, | |
| "learning_rate": 0.00012123604241806102, | |
| "loss": 0.0065, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.1014225694700376, | |
| "grad_norm": 0.10927151888608932, | |
| "learning_rate": 0.00012123590273502919, | |
| "loss": 1.1801, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.1014225694700376, | |
| "eval_1_ratio_diff": 0.021823850350740415, | |
| "eval_accuracy": 0.8862042088854248, | |
| "eval_f1": 0.8885496183206106, | |
| "eval_loss": 0.5426926612854004, | |
| "eval_precision": 0.8699551569506726, | |
| "eval_recall": 0.9079563182527302, | |
| "eval_runtime": 1440.2334, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.10156998599543009, | |
| "grad_norm": 0.038460321724414825, | |
| "learning_rate": 0.00012123576284892955, | |
| "loss": 0.0101, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.10171740252082258, | |
| "grad_norm": 25.498838424682617, | |
| "learning_rate": 0.00012123562275976258, | |
| "loss": 1.3981, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.10186481904621508, | |
| "grad_norm": 159.9862060546875, | |
| "learning_rate": 0.00012123548246752878, | |
| "loss": 1.2495, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.10201223557160757, | |
| "grad_norm": 0.06094611436128616, | |
| "learning_rate": 0.00012123534197222857, | |
| "loss": 0.0046, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.10215965209700008, | |
| "grad_norm": 26.12101173400879, | |
| "learning_rate": 0.00012123520127386245, | |
| "loss": 1.3714, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.10230706862239257, | |
| "grad_norm": 48.13339614868164, | |
| "learning_rate": 0.00012123506037243086, | |
| "loss": 0.0869, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.10245448514778506, | |
| "grad_norm": 0.5880022644996643, | |
| "learning_rate": 0.00012123491926793433, | |
| "loss": 0.6204, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.10260190167317756, | |
| "grad_norm": 24.889034271240234, | |
| "learning_rate": 0.00012123477796037328, | |
| "loss": 0.9381, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.10260190167317756, | |
| "eval_1_ratio_diff": 0.09664848012470773, | |
| "eval_accuracy": 0.8487918939984411, | |
| "eval_f1": 0.8620199146514936, | |
| "eval_loss": 0.5980536937713623, | |
| "eval_precision": 0.792156862745098, | |
| "eval_recall": 0.9453978159126365, | |
| "eval_runtime": 1440.6605, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.10274931819857006, | |
| "grad_norm": 23.767898559570312, | |
| "learning_rate": 0.00012123463644974822, | |
| "loss": 1.3434, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.10289673472396256, | |
| "grad_norm": 0.05240378528833389, | |
| "learning_rate": 0.0001212344947360596, | |
| "loss": 0.006, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.10304415124935505, | |
| "grad_norm": 0.05574984475970268, | |
| "learning_rate": 0.00012123435281930789, | |
| "loss": 0.0062, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.10319156777474756, | |
| "grad_norm": 25.049999237060547, | |
| "learning_rate": 0.00012123421069949359, | |
| "loss": 0.7515, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.10333898430014005, | |
| "grad_norm": 0.6514810919761658, | |
| "learning_rate": 0.00012123406837661717, | |
| "loss": 0.0286, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.10348640082553254, | |
| "grad_norm": 25.315319061279297, | |
| "learning_rate": 0.00012123392585067908, | |
| "loss": 0.6189, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.10363381735092504, | |
| "grad_norm": 24.714847564697266, | |
| "learning_rate": 0.00012123378312167983, | |
| "loss": 0.7992, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.10378123387631753, | |
| "grad_norm": 21.79236602783203, | |
| "learning_rate": 0.00012123364018961989, | |
| "loss": 1.8653, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.10378123387631753, | |
| "eval_1_ratio_diff": -0.018706157443491855, | |
| "eval_accuracy": 0.8752922837100545, | |
| "eval_f1": 0.8728139904610492, | |
| "eval_loss": 0.573785662651062, | |
| "eval_precision": 0.8897893030794165, | |
| "eval_recall": 0.8564742589703588, | |
| "eval_runtime": 1440.8628, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.10392865040171004, | |
| "grad_norm": 21.346384048461914, | |
| "learning_rate": 0.00012123349705449974, | |
| "loss": 1.923, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.10407606692710253, | |
| "grad_norm": 10.506868362426758, | |
| "learning_rate": 0.00012123335371631985, | |
| "loss": 0.5301, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.10422348345249502, | |
| "grad_norm": 1.1288862228393555, | |
| "learning_rate": 0.00012123321017508069, | |
| "loss": 0.0411, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.10437089997788752, | |
| "grad_norm": 0.11825437843799591, | |
| "learning_rate": 0.00012123306643078279, | |
| "loss": 0.0026, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.10451831650328001, | |
| "grad_norm": 0.14662548899650574, | |
| "learning_rate": 0.00012123292248342657, | |
| "loss": 1.3863, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.10466573302867252, | |
| "grad_norm": 1.1349258422851562, | |
| "learning_rate": 0.00012123277833301255, | |
| "loss": 0.0148, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.10481314955406501, | |
| "grad_norm": 20.21559715270996, | |
| "learning_rate": 0.00012123263397954121, | |
| "loss": 2.3576, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.1049605660794575, | |
| "grad_norm": 27.789064407348633, | |
| "learning_rate": 0.00012123248942301302, | |
| "loss": 1.3553, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.1049605660794575, | |
| "eval_1_ratio_diff": -0.18082618862042088, | |
| "eval_accuracy": 0.7833203429462198, | |
| "eval_f1": 0.7352380952380952, | |
| "eval_loss": 0.8213497400283813, | |
| "eval_precision": 0.9437652811735942, | |
| "eval_recall": 0.6021840873634945, | |
| "eval_runtime": 1440.567, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.10510798260485, | |
| "grad_norm": 0.472825288772583, | |
| "learning_rate": 0.00012123234466342849, | |
| "loss": 0.0161, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.1052553991302425, | |
| "grad_norm": 44.982635498046875, | |
| "learning_rate": 0.00012123219970078806, | |
| "loss": 0.216, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.105402815655635, | |
| "grad_norm": 20.85587501525879, | |
| "learning_rate": 0.00012123205453509228, | |
| "loss": 1.7555, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.10555023218102749, | |
| "grad_norm": 19.432729721069336, | |
| "learning_rate": 0.00012123190916634158, | |
| "loss": 0.9614, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.10569764870641998, | |
| "grad_norm": 1.2885982990264893, | |
| "learning_rate": 0.00012123176359453646, | |
| "loss": 0.7221, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.10584506523181249, | |
| "grad_norm": 39.255924224853516, | |
| "learning_rate": 0.00012123161781967742, | |
| "loss": 0.7135, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.10599248175720498, | |
| "grad_norm": 1.8398678302764893, | |
| "learning_rate": 0.00012123147184176495, | |
| "loss": 1.7681, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.10613989828259748, | |
| "grad_norm": 0.04480309039354324, | |
| "learning_rate": 0.00012123132566079952, | |
| "loss": 0.0198, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.10613989828259748, | |
| "eval_1_ratio_diff": 0.013250194855806696, | |
| "eval_accuracy": 0.8495713172252534, | |
| "eval_f1": 0.8514241724403387, | |
| "eval_loss": 0.5520654916763306, | |
| "eval_precision": 0.8404255319148937, | |
| "eval_recall": 0.8627145085803433, | |
| "eval_runtime": 1441.2669, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.10628731480798997, | |
| "grad_norm": 0.10228992253541946, | |
| "learning_rate": 0.00012123117927678164, | |
| "loss": 0.0767, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.10643473133338248, | |
| "grad_norm": 0.14043979346752167, | |
| "learning_rate": 0.0001212310326897118, | |
| "loss": 0.062, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.10658214785877497, | |
| "grad_norm": 18.650835037231445, | |
| "learning_rate": 0.00012123088589959048, | |
| "loss": 0.5735, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.10672956438416746, | |
| "grad_norm": 18.65635871887207, | |
| "learning_rate": 0.00012123073890641816, | |
| "loss": 0.697, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.10687698090955997, | |
| "grad_norm": 24.889253616333008, | |
| "learning_rate": 0.00012123059171019538, | |
| "loss": 1.1449, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.10702439743495246, | |
| "grad_norm": 0.32461315393447876, | |
| "learning_rate": 0.00012123044431092258, | |
| "loss": 0.0108, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.10717181396034496, | |
| "grad_norm": 0.195255309343338, | |
| "learning_rate": 0.00012123029670860029, | |
| "loss": 0.0082, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.10731923048573745, | |
| "grad_norm": 0.3942672312259674, | |
| "learning_rate": 0.00012123014890322897, | |
| "loss": 0.0278, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.10731923048573745, | |
| "eval_1_ratio_diff": -0.05455962587685115, | |
| "eval_accuracy": 0.8487918939984411, | |
| "eval_f1": 0.8399339933993399, | |
| "eval_loss": 0.6235100626945496, | |
| "eval_precision": 0.8914185639229422, | |
| "eval_recall": 0.7940717628705148, | |
| "eval_runtime": 1441.051, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.10746664701112994, | |
| "grad_norm": 20.8675537109375, | |
| "learning_rate": 0.00012123000089480917, | |
| "loss": 2.0488, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.10761406353652245, | |
| "grad_norm": 19.674894332885742, | |
| "learning_rate": 0.00012122985268334132, | |
| "loss": 0.9135, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.10776148006191494, | |
| "grad_norm": 0.16670210659503937, | |
| "learning_rate": 0.00012122970426882597, | |
| "loss": 0.0074, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.10790889658730744, | |
| "grad_norm": 20.293106079101562, | |
| "learning_rate": 0.00012122955565126358, | |
| "loss": 1.0217, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.10805631311269993, | |
| "grad_norm": 0.6973972916603088, | |
| "learning_rate": 0.00012122940683065467, | |
| "loss": 0.9069, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.10820372963809242, | |
| "grad_norm": 25.440162658691406, | |
| "learning_rate": 0.00012122925780699975, | |
| "loss": 1.5865, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.10835114616348493, | |
| "grad_norm": 4.310685157775879, | |
| "learning_rate": 0.00012122910858029928, | |
| "loss": 0.4176, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.10849856268887742, | |
| "grad_norm": 0.3989110291004181, | |
| "learning_rate": 0.00012122895915055379, | |
| "loss": 1.2954, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.10849856268887742, | |
| "eval_1_ratio_diff": 0.021823850350740415, | |
| "eval_accuracy": 0.8581449727201871, | |
| "eval_f1": 0.8610687022900764, | |
| "eval_loss": 0.4833138585090637, | |
| "eval_precision": 0.8430493273542601, | |
| "eval_recall": 0.8798751950078003, | |
| "eval_runtime": 1440.7462, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.10864597921426992, | |
| "grad_norm": 0.3485046923160553, | |
| "learning_rate": 0.00012122880951776379, | |
| "loss": 0.0092, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.10879339573966242, | |
| "grad_norm": 31.38138198852539, | |
| "learning_rate": 0.00012122865968192974, | |
| "loss": 2.2038, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.1089408122650549, | |
| "grad_norm": 0.1756962537765503, | |
| "learning_rate": 0.00012122850964305218, | |
| "loss": 0.0039, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.10908822879044741, | |
| "grad_norm": 0.4892203211784363, | |
| "learning_rate": 0.0001212283594011316, | |
| "loss": 1.2883, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.1092356453158399, | |
| "grad_norm": 0.38502997159957886, | |
| "learning_rate": 0.00012122820895616849, | |
| "loss": 0.015, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.1093830618412324, | |
| "grad_norm": 0.3273461163043976, | |
| "learning_rate": 0.00012122805830816339, | |
| "loss": 0.0328, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.1095304783666249, | |
| "grad_norm": 53.52883529663086, | |
| "learning_rate": 0.00012122790745711678, | |
| "loss": 1.4843, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.10967789489201739, | |
| "grad_norm": 0.2854032814502716, | |
| "learning_rate": 0.00012122775640302914, | |
| "loss": 0.0227, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.10967789489201739, | |
| "eval_1_ratio_diff": 0.014809041309431059, | |
| "eval_accuracy": 0.8448947778643804, | |
| "eval_f1": 0.8470407378939278, | |
| "eval_loss": 0.6297035217285156, | |
| "eval_precision": 0.8348484848484848, | |
| "eval_recall": 0.859594383775351, | |
| "eval_runtime": 1441.3865, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.1098253114174099, | |
| "grad_norm": 0.2311754673719406, | |
| "learning_rate": 0.00012122760514590104, | |
| "loss": 0.0063, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.10997272794280238, | |
| "grad_norm": 21.77858543395996, | |
| "learning_rate": 0.00012122745368573293, | |
| "loss": 1.6042, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.11012014446819489, | |
| "grad_norm": 0.12185559421777725, | |
| "learning_rate": 0.00012122730202252534, | |
| "loss": 0.0054, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.11026756099358738, | |
| "grad_norm": 0.07674361765384674, | |
| "learning_rate": 0.00012122715015627879, | |
| "loss": 1.2277, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.11041497751897988, | |
| "grad_norm": 1.0588175058364868, | |
| "learning_rate": 0.00012122699808699376, | |
| "loss": 0.0121, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.11056239404437238, | |
| "grad_norm": 148.854248046875, | |
| "learning_rate": 0.00012122684581467078, | |
| "loss": 1.6651, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.11070981056976487, | |
| "grad_norm": 0.07673851400613785, | |
| "learning_rate": 0.00012122669333931036, | |
| "loss": 0.0037, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.11085722709515737, | |
| "grad_norm": 0.14825621247291565, | |
| "learning_rate": 0.00012122654066091301, | |
| "loss": 0.0033, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.11085722709515737, | |
| "eval_1_ratio_diff": 0.0, | |
| "eval_accuracy": 0.8394388152766953, | |
| "eval_f1": 0.8393135725429017, | |
| "eval_loss": 0.7106738686561584, | |
| "eval_precision": 0.8393135725429017, | |
| "eval_recall": 0.8393135725429017, | |
| "eval_runtime": 1440.7668, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.11100464362054986, | |
| "grad_norm": 0.3325727880001068, | |
| "learning_rate": 0.00012122638777947923, | |
| "loss": 0.0043, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.11115206014594237, | |
| "grad_norm": 0.16898727416992188, | |
| "learning_rate": 0.00012122623469500956, | |
| "loss": 1.3778, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.11129947667133486, | |
| "grad_norm": 24.855741500854492, | |
| "learning_rate": 0.00012122608140750447, | |
| "loss": 1.1577, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.11144689319672735, | |
| "grad_norm": 0.15268811583518982, | |
| "learning_rate": 0.0001212259279169645, | |
| "loss": 0.0057, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.11159430972211985, | |
| "grad_norm": 37.5292854309082, | |
| "learning_rate": 0.00012122577422339017, | |
| "loss": 2.6301, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.11174172624751234, | |
| "grad_norm": 0.23876796662807465, | |
| "learning_rate": 0.000121225620326782, | |
| "loss": 0.0067, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.11188914277290485, | |
| "grad_norm": 0.14355158805847168, | |
| "learning_rate": 0.00012122546622714046, | |
| "loss": 0.0082, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.11203655929829734, | |
| "grad_norm": 0.14837191998958588, | |
| "learning_rate": 0.00012122531192446613, | |
| "loss": 1.1954, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.11203655929829734, | |
| "eval_1_ratio_diff": -0.006235385814497285, | |
| "eval_accuracy": 0.8456742010911925, | |
| "eval_f1": 0.8445839874411303, | |
| "eval_loss": 0.5836101174354553, | |
| "eval_precision": 0.8499210110584519, | |
| "eval_recall": 0.8393135725429017, | |
| "eval_runtime": 1440.5652, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.11218397582368983, | |
| "grad_norm": 1.0671629905700684, | |
| "learning_rate": 0.0001212251574187595, | |
| "loss": 0.0128, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.11233139234908233, | |
| "grad_norm": 22.311914443969727, | |
| "learning_rate": 0.00012122500271002106, | |
| "loss": 1.1378, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.11247880887447483, | |
| "grad_norm": 24.98206329345703, | |
| "learning_rate": 0.00012122484779825135, | |
| "loss": 1.4429, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.11262622539986733, | |
| "grad_norm": 0.10400061309337616, | |
| "learning_rate": 0.00012122469268345093, | |
| "loss": 0.8205, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.11277364192525982, | |
| "grad_norm": 0.1311234086751938, | |
| "learning_rate": 0.00012122453736562024, | |
| "loss": 0.0052, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.11292105845065231, | |
| "grad_norm": 24.459693908691406, | |
| "learning_rate": 0.00012122438184475986, | |
| "loss": 0.8169, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.11306847497604482, | |
| "grad_norm": 0.6599878072738647, | |
| "learning_rate": 0.0001212242261208703, | |
| "loss": 0.0172, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.11321589150143731, | |
| "grad_norm": 0.7011798024177551, | |
| "learning_rate": 0.00012122407019395205, | |
| "loss": 0.0101, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.11321589150143731, | |
| "eval_1_ratio_diff": -0.011691348402182389, | |
| "eval_accuracy": 0.852689010132502, | |
| "eval_f1": 0.850828729281768, | |
| "eval_loss": 0.616263747215271, | |
| "eval_precision": 0.8610223642172524, | |
| "eval_recall": 0.8408736349453978, | |
| "eval_runtime": 1440.4808, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.11336330802682981, | |
| "grad_norm": 0.11136188358068466, | |
| "learning_rate": 0.00012122391406400568, | |
| "loss": 0.0043, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 0.1135107245522223, | |
| "grad_norm": 0.09410673379898071, | |
| "learning_rate": 0.00012122375773103169, | |
| "loss": 0.0029, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.1136581410776148, | |
| "grad_norm": 0.0886264443397522, | |
| "learning_rate": 0.00012122360119503061, | |
| "loss": 0.0027, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 0.1138055576030073, | |
| "grad_norm": 0.06019139662384987, | |
| "learning_rate": 0.00012122344445600295, | |
| "loss": 0.0012, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.11395297412839979, | |
| "grad_norm": 24.27945327758789, | |
| "learning_rate": 0.00012122328751394924, | |
| "loss": 1.2476, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 0.1141003906537923, | |
| "grad_norm": 0.07040827721357346, | |
| "learning_rate": 0.00012122313036887001, | |
| "loss": 1.05, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.11424780717918478, | |
| "grad_norm": 21.743165969848633, | |
| "learning_rate": 0.00012122297302076579, | |
| "loss": 3.2561, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.11439522370457729, | |
| "grad_norm": 0.21815018355846405, | |
| "learning_rate": 0.00012122281546963711, | |
| "loss": 0.0085, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.11439522370457729, | |
| "eval_1_ratio_diff": 0.05845674201091189, | |
| "eval_accuracy": 0.8620420888542478, | |
| "eval_f1": 0.8695652173913043, | |
| "eval_loss": 0.5588727593421936, | |
| "eval_precision": 0.8240223463687151, | |
| "eval_recall": 0.9204368174726989, | |
| "eval_runtime": 1440.4266, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.11454264022996978, | |
| "grad_norm": 0.3978158235549927, | |
| "learning_rate": 0.0001212226577154845, | |
| "loss": 0.0087, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.11469005675536227, | |
| "grad_norm": 0.07042258977890015, | |
| "learning_rate": 0.00012122249975830848, | |
| "loss": 0.0021, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.11483747328075478, | |
| "grad_norm": 0.16607695817947388, | |
| "learning_rate": 0.00012122234159810957, | |
| "loss": 0.0024, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 0.11498488980614727, | |
| "grad_norm": 0.11605281382799149, | |
| "learning_rate": 0.00012122218323488832, | |
| "loss": 0.0026, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.11513230633153977, | |
| "grad_norm": 24.77876091003418, | |
| "learning_rate": 0.00012122202466864525, | |
| "loss": 1.4127, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 0.11527972285693226, | |
| "grad_norm": 0.17567309737205505, | |
| "learning_rate": 0.00012122186589938088, | |
| "loss": 0.0037, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.11542713938232475, | |
| "grad_norm": 0.19481156766414642, | |
| "learning_rate": 0.00012122170692709576, | |
| "loss": 0.6267, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.11557455590771726, | |
| "grad_norm": 24.115211486816406, | |
| "learning_rate": 0.00012122154775179043, | |
| "loss": 0.8964, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.11557455590771726, | |
| "eval_1_ratio_diff": 0.03975058456742009, | |
| "eval_accuracy": 0.8713951675759938, | |
| "eval_f1": 0.8762190547636909, | |
| "eval_loss": 0.5382638573646545, | |
| "eval_precision": 0.8439306358381503, | |
| "eval_recall": 0.9110764430577223, | |
| "eval_runtime": 1441.1253, | |
| "eval_samples_per_second": 0.89, | |
| "eval_steps_per_second": 0.445, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.11572197243310975, | |
| "grad_norm": 0.140619158744812, | |
| "learning_rate": 0.0001212213883734654, | |
| "loss": 0.0054, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.11586938895850225, | |
| "grad_norm": 0.12547695636749268, | |
| "learning_rate": 0.00012122122879212122, | |
| "loss": 0.3549, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.11601680548389474, | |
| "grad_norm": 0.12592053413391113, | |
| "learning_rate": 0.00012122106900775843, | |
| "loss": 0.0105, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 0.11616422200928724, | |
| "grad_norm": 0.11613775789737701, | |
| "learning_rate": 0.00012122090902037755, | |
| "loss": 0.0044, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.11631163853467974, | |
| "grad_norm": 0.06327944993972778, | |
| "learning_rate": 0.00012122074882997911, | |
| "loss": 0.0052, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 0.11645905506007223, | |
| "grad_norm": 0.26552170515060425, | |
| "learning_rate": 0.00012122058843656367, | |
| "loss": 0.0049, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.11660647158546474, | |
| "grad_norm": 0.05181106925010681, | |
| "learning_rate": 0.00012122042784013175, | |
| "loss": 0.8965, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.11675388811085723, | |
| "grad_norm": 0.07022108882665634, | |
| "learning_rate": 0.0001212202670406839, | |
| "loss": 1.4149, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.11675388811085723, | |
| "eval_1_ratio_diff": 0.014029618082618822, | |
| "eval_accuracy": 0.8565861262665627, | |
| "eval_f1": 0.8584615384615385, | |
| "eval_loss": 0.6103407144546509, | |
| "eval_precision": 0.8467374810318664, | |
| "eval_recall": 0.8705148205928237, | |
| "eval_runtime": 1439.8898, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.11690130463624972, | |
| "grad_norm": 0.20126762986183167, | |
| "learning_rate": 0.00012122010603822065, | |
| "loss": 0.0077, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 0.11704872116164222, | |
| "grad_norm": 0.09971367567777634, | |
| "learning_rate": 0.00012121994483274255, | |
| "loss": 0.0049, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.11719613768703471, | |
| "grad_norm": 0.06467089802026749, | |
| "learning_rate": 0.00012121978342425012, | |
| "loss": 0.005, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.11734355421242722, | |
| "grad_norm": 0.06981782615184784, | |
| "learning_rate": 0.00012121962181274392, | |
| "loss": 0.0028, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.11749097073781971, | |
| "grad_norm": 0.12012193351984024, | |
| "learning_rate": 0.00012121945999822448, | |
| "loss": 0.0022, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 0.11763838726321221, | |
| "grad_norm": 24.71665382385254, | |
| "learning_rate": 0.00012121929798069236, | |
| "loss": 1.756, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.1177858037886047, | |
| "grad_norm": 0.31951653957366943, | |
| "learning_rate": 0.0001212191357601481, | |
| "loss": 0.004, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 0.1179332203139972, | |
| "grad_norm": 0.03907225281000137, | |
| "learning_rate": 0.0001212189733365922, | |
| "loss": 0.0018, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.1179332203139972, | |
| "eval_1_ratio_diff": 0.003117692907248615, | |
| "eval_accuracy": 0.8628215120810601, | |
| "eval_f1": 0.8631415241057543, | |
| "eval_loss": 0.6979319453239441, | |
| "eval_precision": 0.8604651162790697, | |
| "eval_recall": 0.8658346333853354, | |
| "eval_runtime": 1440.2188, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.1180806368393897, | |
| "grad_norm": 0.14489419758319855, | |
| "learning_rate": 0.00012121881071002525, | |
| "loss": 0.004, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 0.11822805336478219, | |
| "grad_norm": 0.02964833378791809, | |
| "learning_rate": 0.00012121864788044781, | |
| "loss": 0.0014, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.1183754698901747, | |
| "grad_norm": 0.1308467835187912, | |
| "learning_rate": 0.00012121848484786039, | |
| "loss": 1.2428, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.11852288641556719, | |
| "grad_norm": 0.012196216732263565, | |
| "learning_rate": 0.00012121832161226353, | |
| "loss": 0.0039, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.11867030294095968, | |
| "grad_norm": 26.82729721069336, | |
| "learning_rate": 0.0001212181581736578, | |
| "loss": 0.9557, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.11881771946635218, | |
| "grad_norm": 55.06840515136719, | |
| "learning_rate": 0.00012121799453204374, | |
| "loss": 1.341, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.11896513599174467, | |
| "grad_norm": 0.10571928322315216, | |
| "learning_rate": 0.0001212178306874219, | |
| "loss": 0.0018, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 0.11911255251713718, | |
| "grad_norm": 23.6888427734375, | |
| "learning_rate": 0.00012121766663979284, | |
| "loss": 2.8349, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.11911255251713718, | |
| "eval_1_ratio_diff": 0.001558846453624252, | |
| "eval_accuracy": 0.8612626656274357, | |
| "eval_f1": 0.8613707165109035, | |
| "eval_loss": 0.6912267804145813, | |
| "eval_precision": 0.8600311041990669, | |
| "eval_recall": 0.8627145085803433, | |
| "eval_runtime": 1439.8805, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.11925996904252967, | |
| "grad_norm": 0.013893804512917995, | |
| "learning_rate": 0.00012121750238915708, | |
| "loss": 0.0039, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 0.11940738556792216, | |
| "grad_norm": 0.0326993353664875, | |
| "learning_rate": 0.00012121733793551521, | |
| "loss": 0.0071, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.11955480209331466, | |
| "grad_norm": 0.021896235644817352, | |
| "learning_rate": 0.00012121717327886775, | |
| "loss": 0.4694, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 0.11970221861870715, | |
| "grad_norm": 2.5759835243225098, | |
| "learning_rate": 0.00012121700841921524, | |
| "loss": 0.8411, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.11984963514409966, | |
| "grad_norm": 6.512516021728516, | |
| "learning_rate": 0.00012121684335655828, | |
| "loss": 1.2897, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 0.11999705166949215, | |
| "grad_norm": 1.0826752185821533, | |
| "learning_rate": 0.00012121667809089738, | |
| "loss": 0.067, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.12014446819488464, | |
| "grad_norm": 0.5020477771759033, | |
| "learning_rate": 0.00012121651262223313, | |
| "loss": 0.0061, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.12029188472027715, | |
| "grad_norm": 1.0385483503341675, | |
| "learning_rate": 0.00012121634695056605, | |
| "loss": 0.0162, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.12029188472027715, | |
| "eval_1_ratio_diff": 0.024162120031176904, | |
| "eval_accuracy": 0.8854247856586126, | |
| "eval_f1": 0.8880426504188881, | |
| "eval_loss": 0.4667970538139343, | |
| "eval_precision": 0.8675595238095238, | |
| "eval_recall": 0.9095163806552262, | |
| "eval_runtime": 1440.04, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.12043930124566964, | |
| "grad_norm": 0.3257231116294861, | |
| "learning_rate": 0.00012121618107589671, | |
| "loss": 0.0073, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 0.12058671777106214, | |
| "grad_norm": 0.17591340839862823, | |
| "learning_rate": 0.00012121601499822568, | |
| "loss": 0.7197, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.12073413429645463, | |
| "grad_norm": 20.77132797241211, | |
| "learning_rate": 0.0001212158487175535, | |
| "loss": 1.5072, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.12088155082184712, | |
| "grad_norm": 0.013665467500686646, | |
| "learning_rate": 0.00012121568223388071, | |
| "loss": 0.0014, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.12102896734723963, | |
| "grad_norm": 0.368145614862442, | |
| "learning_rate": 0.00012121551554720792, | |
| "loss": 1.0871, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 0.12117638387263212, | |
| "grad_norm": 0.2764877378940582, | |
| "learning_rate": 0.00012121534865753563, | |
| "loss": 0.0044, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 0.12132380039802462, | |
| "grad_norm": 0.15803444385528564, | |
| "learning_rate": 0.00012121518156486446, | |
| "loss": 0.0058, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 0.12147121692341711, | |
| "grad_norm": 21.269418716430664, | |
| "learning_rate": 0.0001212150142691949, | |
| "loss": 1.5637, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.12147121692341711, | |
| "eval_1_ratio_diff": -0.03117692907248637, | |
| "eval_accuracy": 0.8877630553390491, | |
| "eval_f1": 0.8840579710144928, | |
| "eval_loss": 0.5637651681900024, | |
| "eval_precision": 0.913477537437604, | |
| "eval_recall": 0.8564742589703588, | |
| "eval_runtime": 1440.5567, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.12161863344880962, | |
| "grad_norm": 21.965253829956055, | |
| "learning_rate": 0.00012121484677052757, | |
| "loss": 0.9775, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.12176604997420211, | |
| "grad_norm": 5.706968307495117, | |
| "learning_rate": 0.000121214679068863, | |
| "loss": 1.2593, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.1219134664995946, | |
| "grad_norm": 78.91386413574219, | |
| "learning_rate": 0.00012121451116420174, | |
| "loss": 1.8529, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 0.1220608830249871, | |
| "grad_norm": 20.03242301940918, | |
| "learning_rate": 0.00012121434305654442, | |
| "loss": 3.822, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 0.1222082995503796, | |
| "grad_norm": 18.92554473876953, | |
| "learning_rate": 0.00012121417474589151, | |
| "loss": 1.7478, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 0.1223557160757721, | |
| "grad_norm": 18.513463973999023, | |
| "learning_rate": 0.00012121400623224365, | |
| "loss": 0.9207, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.12250313260116459, | |
| "grad_norm": 2.1414077281951904, | |
| "learning_rate": 0.00012121383751560137, | |
| "loss": 0.0559, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.12265054912655708, | |
| "grad_norm": 1.9082714319229126, | |
| "learning_rate": 0.00012121366859596523, | |
| "loss": 0.0867, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.12265054912655708, | |
| "eval_1_ratio_diff": -0.11223694466095091, | |
| "eval_accuracy": 0.8316445830085737, | |
| "eval_f1": 0.8101933216168717, | |
| "eval_loss": 0.46373099088668823, | |
| "eval_precision": 0.9275653923541247, | |
| "eval_recall": 0.719188767550702, | |
| "eval_runtime": 1439.8045, | |
| "eval_samples_per_second": 0.891, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.12279796565194959, | |
| "grad_norm": 17.66658592224121, | |
| "learning_rate": 0.0001212134994733358, | |
| "loss": 0.694, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.12294538217734208, | |
| "grad_norm": 0.5736209750175476, | |
| "learning_rate": 0.00012121333014771369, | |
| "loss": 0.5414, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 0.12309279870273458, | |
| "grad_norm": 16.726125717163086, | |
| "learning_rate": 0.0001212131606190994, | |
| "loss": 2.7414, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.12324021522812707, | |
| "grad_norm": 1.1649620532989502, | |
| "learning_rate": 0.00012121299088749353, | |
| "loss": 0.0285, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.12338763175351956, | |
| "grad_norm": 18.4560604095459, | |
| "learning_rate": 0.00012121282095289665, | |
| "loss": 0.9068, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.12353504827891207, | |
| "grad_norm": 0.3899083137512207, | |
| "learning_rate": 0.00012121265081530934, | |
| "loss": 0.0192, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.12368246480430456, | |
| "grad_norm": 0.6309532523155212, | |
| "learning_rate": 0.00012121248047473215, | |
| "loss": 0.0398, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 0.12382988132969706, | |
| "grad_norm": 25.81404685974121, | |
| "learning_rate": 0.00012121230993116564, | |
| "loss": 0.9268, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.12382988132969706, | |
| "eval_1_ratio_diff": -0.05222135619641466, | |
| "eval_accuracy": 0.8620420888542478, | |
| "eval_f1": 0.854320987654321, | |
| "eval_loss": 0.49660980701446533, | |
| "eval_precision": 0.9041811846689896, | |
| "eval_recall": 0.8096723868954758, | |
| "eval_runtime": 1439.1088, | |
| "eval_samples_per_second": 0.892, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.12397729785508955, | |
| "grad_norm": 0.17194198071956635, | |
| "learning_rate": 0.00012121213918461043, | |
| "loss": 0.0091, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 0.12412471438048205, | |
| "grad_norm": 0.1233774870634079, | |
| "learning_rate": 0.00012121196823506704, | |
| "loss": 0.033, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 0.12427213090587455, | |
| "grad_norm": 0.12911829352378845, | |
| "learning_rate": 0.00012121179708253609, | |
| "loss": 0.9894, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 0.12441954743126704, | |
| "grad_norm": 12.796908378601074, | |
| "learning_rate": 0.00012121162572701811, | |
| "loss": 0.2167, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 0.12456696395665955, | |
| "grad_norm": 19.411853790283203, | |
| "learning_rate": 0.0001212114541685137, | |
| "loss": 1.1343, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.12471438048205204, | |
| "grad_norm": 2.125748872756958, | |
| "learning_rate": 0.00012121128240702341, | |
| "loss": 0.0167, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 0.12486179700744453, | |
| "grad_norm": 0.23534013330936432, | |
| "learning_rate": 0.00012121111044254785, | |
| "loss": 0.0099, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 0.12500921353283703, | |
| "grad_norm": 0.2723231911659241, | |
| "learning_rate": 0.00012121093827508758, | |
| "loss": 0.0222, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.12500921353283703, | |
| "eval_1_ratio_diff": -0.05144193296960253, | |
| "eval_accuracy": 0.8721745908028059, | |
| "eval_f1": 0.8651315789473685, | |
| "eval_loss": 0.608511209487915, | |
| "eval_precision": 0.9147826086956522, | |
| "eval_recall": 0.8205928237129485, | |
| "eval_runtime": 1438.9329, | |
| "eval_samples_per_second": 0.892, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.12515663005822952, | |
| "grad_norm": 0.1575896292924881, | |
| "learning_rate": 0.00012121076590464316, | |
| "loss": 0.0045, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 0.12530404658362201, | |
| "grad_norm": 25.341609954833984, | |
| "learning_rate": 0.00012121059333121521, | |
| "loss": 2.9943, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.12545146310901453, | |
| "grad_norm": 0.13375264406204224, | |
| "learning_rate": 0.00012121042055480427, | |
| "loss": 0.0033, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 0.12559887963440702, | |
| "grad_norm": 0.06750854849815369, | |
| "learning_rate": 0.00012121024757541094, | |
| "loss": 0.0024, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 0.12574629615979951, | |
| "grad_norm": 0.05674993619322777, | |
| "learning_rate": 0.00012121007439303577, | |
| "loss": 1.2325, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 0.125893712685192, | |
| "grad_norm": 0.06746107339859009, | |
| "learning_rate": 0.00012120990100767938, | |
| "loss": 0.0016, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.1260411292105845, | |
| "grad_norm": 18.890642166137695, | |
| "learning_rate": 0.00012120972741934233, | |
| "loss": 1.5509, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.12618854573597701, | |
| "grad_norm": 0.0601690337061882, | |
| "learning_rate": 0.00012120955362802522, | |
| "loss": 0.0042, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.12618854573597701, | |
| "eval_1_ratio_diff": -0.21278254091971943, | |
| "eval_accuracy": 0.7575993764614185, | |
| "eval_f1": 0.6917740336967294, | |
| "eval_loss": 1.260048747062683, | |
| "eval_precision": 0.9483695652173914, | |
| "eval_recall": 0.5444617784711389, | |
| "eval_runtime": 1438.8451, | |
| "eval_samples_per_second": 0.892, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.1263359622613695, | |
| "grad_norm": 31.62714385986328, | |
| "learning_rate": 0.00012120937963372859, | |
| "loss": 2.3397, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 0.126483378786762, | |
| "grad_norm": 0.09423007071018219, | |
| "learning_rate": 0.00012120920543645306, | |
| "loss": 0.0056, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 0.1266307953121545, | |
| "grad_norm": 18.73729133605957, | |
| "learning_rate": 0.0001212090310361992, | |
| "loss": 1.3417, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 0.12677821183754698, | |
| "grad_norm": 0.16277751326560974, | |
| "learning_rate": 0.0001212088564329676, | |
| "loss": 0.0088, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.1269256283629395, | |
| "grad_norm": 18.30181884765625, | |
| "learning_rate": 0.00012120868162675886, | |
| "loss": 0.966, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 0.127073044888332, | |
| "grad_norm": 0.3613678812980652, | |
| "learning_rate": 0.00012120850661757353, | |
| "loss": 1.0053, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 0.12722046141372448, | |
| "grad_norm": 0.7345402836799622, | |
| "learning_rate": 0.00012120833140541222, | |
| "loss": 1.4195, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 0.12736787793911697, | |
| "grad_norm": 1.3485078811645508, | |
| "learning_rate": 0.00012120815599027552, | |
| "loss": 0.0247, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.12736787793911697, | |
| "eval_1_ratio_diff": -0.04130943102104445, | |
| "eval_accuracy": 0.8651597817614964, | |
| "eval_f1": 0.8592351505288853, | |
| "eval_loss": 0.4965825080871582, | |
| "eval_precision": 0.8979591836734694, | |
| "eval_recall": 0.8237129485179407, | |
| "eval_runtime": 1438.3328, | |
| "eval_samples_per_second": 0.892, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.12751529446450946, | |
| "grad_norm": 17.94972801208496, | |
| "learning_rate": 0.000121207980372164, | |
| "loss": 0.7518, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.12766271098990198, | |
| "grad_norm": 1.6150920391082764, | |
| "learning_rate": 0.00012120780455107827, | |
| "loss": 0.0328, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 0.12781012751529447, | |
| "grad_norm": 0.22876843810081482, | |
| "learning_rate": 0.00012120762852701892, | |
| "loss": 0.0105, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 0.12795754404068696, | |
| "grad_norm": 0.1126691922545433, | |
| "learning_rate": 0.0001212074522999865, | |
| "loss": 0.0038, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.12810496056607945, | |
| "grad_norm": 0.5277115702629089, | |
| "learning_rate": 0.00012120727586998164, | |
| "loss": 0.0094, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 0.12825237709147194, | |
| "grad_norm": 0.11928611248731613, | |
| "learning_rate": 0.00012120709923700492, | |
| "loss": 0.0054, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.12839979361686446, | |
| "grad_norm": 22.84393310546875, | |
| "learning_rate": 0.00012120692240105693, | |
| "loss": 1.7358, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 0.12854721014225695, | |
| "grad_norm": 0.08426441997289658, | |
| "learning_rate": 0.0001212067453621383, | |
| "loss": 0.0029, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.12854721014225695, | |
| "eval_1_ratio_diff": -0.014029618082618878, | |
| "eval_accuracy": 0.8784099766173032, | |
| "eval_f1": 0.8765822784810127, | |
| "eval_loss": 0.6492618322372437, | |
| "eval_precision": 0.8892455858747994, | |
| "eval_recall": 0.8642745709828393, | |
| "eval_runtime": 1438.7827, | |
| "eval_samples_per_second": 0.892, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.12869462666764944, | |
| "grad_norm": 22.079143524169922, | |
| "learning_rate": 0.00012120656812024955, | |
| "loss": 1.2809, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 0.12884204319304193, | |
| "grad_norm": 21.899768829345703, | |
| "learning_rate": 0.00012120639067539131, | |
| "loss": 3.0657, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 0.12898945971843442, | |
| "grad_norm": 0.1824941784143448, | |
| "learning_rate": 0.0001212062130275642, | |
| "loss": 0.0032, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.12913687624382694, | |
| "grad_norm": 0.1769951432943344, | |
| "learning_rate": 0.00012120603517676877, | |
| "loss": 1.2614, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 0.12928429276921943, | |
| "grad_norm": 21.305864334106445, | |
| "learning_rate": 0.00012120585712300566, | |
| "loss": 1.0725, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 0.12943170929461192, | |
| "grad_norm": 0.44233354926109314, | |
| "learning_rate": 0.00012120567886627544, | |
| "loss": 0.9641, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 0.12957912582000441, | |
| "grad_norm": 0.2779258191585541, | |
| "learning_rate": 0.00012120550040657871, | |
| "loss": 0.0096, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 0.1297265423453969, | |
| "grad_norm": 22.293994903564453, | |
| "learning_rate": 0.00012120532174391606, | |
| "loss": 0.9558, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.1297265423453969, | |
| "eval_1_ratio_diff": 0.031956352299298496, | |
| "eval_accuracy": 0.8901013250194856, | |
| "eval_f1": 0.8934240362811792, | |
| "eval_loss": 0.45321086049079895, | |
| "eval_precision": 0.8665689149560117, | |
| "eval_recall": 0.921996879875195, | |
| "eval_runtime": 1438.3028, | |
| "eval_samples_per_second": 0.892, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.12987395887078942, | |
| "grad_norm": 0.15532580018043518, | |
| "learning_rate": 0.00012120514287828811, | |
| "loss": 0.0082, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 0.13002137539618192, | |
| "grad_norm": 27.137800216674805, | |
| "learning_rate": 0.00012120496380969545, | |
| "loss": 0.8253, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 0.1301687919215744, | |
| "grad_norm": 0.12127237766981125, | |
| "learning_rate": 0.00012120478453813868, | |
| "loss": 0.007, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 0.1303162084469669, | |
| "grad_norm": 0.12471210211515427, | |
| "learning_rate": 0.00012120460506361839, | |
| "loss": 0.0118, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 0.1304636249723594, | |
| "grad_norm": 45.0229377746582, | |
| "learning_rate": 0.0001212044253861352, | |
| "loss": 3.5846, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.1306110414977519, | |
| "grad_norm": 0.4128153622150421, | |
| "learning_rate": 0.0001212042455056897, | |
| "loss": 0.0073, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 0.1307584580231444, | |
| "grad_norm": 0.40481987595558167, | |
| "learning_rate": 0.0001212040654222825, | |
| "loss": 0.0072, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 0.1309058745485369, | |
| "grad_norm": 0.11055589467287064, | |
| "learning_rate": 0.00012120388513591419, | |
| "loss": 1.0826, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 0.1309058745485369, | |
| "eval_1_ratio_diff": 0.1200311769290725, | |
| "eval_accuracy": 0.8332034294621979, | |
| "eval_f1": 0.850974930362117, | |
| "eval_loss": 0.6285108923912048, | |
| "eval_precision": 0.7685534591194969, | |
| "eval_recall": 0.953198127925117, | |
| "eval_runtime": 1438.3285, | |
| "eval_samples_per_second": 0.892, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 0.13105329107392938, | |
| "grad_norm": 0.12451104074716568, | |
| "learning_rate": 0.0001212037046465854, | |
| "loss": 1.0074, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 0.1312007075993219, | |
| "grad_norm": 0.27884507179260254, | |
| "learning_rate": 0.0001212035239542967, | |
| "loss": 0.0129, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.1313481241247144, | |
| "grad_norm": 0.992557168006897, | |
| "learning_rate": 0.00012120334305904872, | |
| "loss": 1.4174, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 0.13149554065010688, | |
| "grad_norm": 0.9067917466163635, | |
| "learning_rate": 0.00012120316196084206, | |
| "loss": 1.435, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 0.13164295717549937, | |
| "grad_norm": 20.08501625061035, | |
| "learning_rate": 0.00012120298065967733, | |
| "loss": 1.7277, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 0.13179037370089186, | |
| "grad_norm": 0.20194768905639648, | |
| "learning_rate": 0.00012120279915555515, | |
| "loss": 0.005, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 0.13193779022628438, | |
| "grad_norm": 0.29110512137413025, | |
| "learning_rate": 0.0001212026174484761, | |
| "loss": 0.0065, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.13208520675167687, | |
| "grad_norm": 0.3067338764667511, | |
| "learning_rate": 0.00012120243553844079, | |
| "loss": 0.006, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.13208520675167687, | |
| "eval_1_ratio_diff": -0.002338269680436489, | |
| "eval_accuracy": 0.8978955572876072, | |
| "eval_f1": 0.8975762314308053, | |
| "eval_loss": 0.42508459091186523, | |
| "eval_precision": 0.8996865203761756, | |
| "eval_recall": 0.8954758190327613, | |
| "eval_runtime": 1439.0957, | |
| "eval_samples_per_second": 0.892, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1792 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 108536, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 64, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 1000, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.5936070605815808e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |