| { | |
| "best_global_step": 2754, | |
| "best_metric": 1.0, | |
| "best_model_checkpoint": "./results/wallet_deberta_v10/checkpoint-2754", | |
| "epoch": 0.4001452960406829, | |
| "eval_steps": 1377, | |
| "global_step": 2754, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.000726480203414457, | |
| "grad_norm": 1.474841833114624, | |
| "learning_rate": 5.809731299927378e-07, | |
| "loss": 0.6929, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.001452960406828914, | |
| "grad_norm": 0.7840715646743774, | |
| "learning_rate": 1.3071895424836602e-06, | |
| "loss": 0.6982, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.002179440610243371, | |
| "grad_norm": 0.6173088550567627, | |
| "learning_rate": 2.0334059549745822e-06, | |
| "loss": 0.6906, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.002905920813657828, | |
| "grad_norm": 1.7541619539260864, | |
| "learning_rate": 2.759622367465505e-06, | |
| "loss": 0.6917, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.003632401017072285, | |
| "grad_norm": 1.0843828916549683, | |
| "learning_rate": 3.4858387799564276e-06, | |
| "loss": 0.6885, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.004358881220486742, | |
| "grad_norm": 0.6747203469276428, | |
| "learning_rate": 4.212055192447349e-06, | |
| "loss": 0.6854, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.005085361423901199, | |
| "grad_norm": 2.215040445327759, | |
| "learning_rate": 4.938271604938272e-06, | |
| "loss": 0.6895, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.005811841627315656, | |
| "grad_norm": 1.2605243921279907, | |
| "learning_rate": 5.664488017429194e-06, | |
| "loss": 0.6914, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.006538321830730113, | |
| "grad_norm": 1.5572800636291504, | |
| "learning_rate": 6.390704429920116e-06, | |
| "loss": 0.6913, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.00726480203414457, | |
| "grad_norm": 0.5582659244537354, | |
| "learning_rate": 7.116920842411038e-06, | |
| "loss": 0.7015, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.007991282237559027, | |
| "grad_norm": 2.64322829246521, | |
| "learning_rate": 7.84313725490196e-06, | |
| "loss": 0.6883, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.008717762440973484, | |
| "grad_norm": 0.4942118525505066, | |
| "learning_rate": 8.569353667392883e-06, | |
| "loss": 0.6863, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.00944424264438794, | |
| "grad_norm": 1.1258032321929932, | |
| "learning_rate": 9.295570079883805e-06, | |
| "loss": 0.6999, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.010170722847802398, | |
| "grad_norm": 1.727752923965454, | |
| "learning_rate": 1.0021786492374727e-05, | |
| "loss": 0.6932, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.010897203051216855, | |
| "grad_norm": 1.4421207904815674, | |
| "learning_rate": 1.0748002904865651e-05, | |
| "loss": 0.6898, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.011623683254631312, | |
| "grad_norm": 1.5667537450790405, | |
| "learning_rate": 1.1474219317356574e-05, | |
| "loss": 0.6965, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.012350163458045769, | |
| "grad_norm": 0.44926533102989197, | |
| "learning_rate": 1.2200435729847496e-05, | |
| "loss": 0.6929, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.013076643661460226, | |
| "grad_norm": 0.425881028175354, | |
| "learning_rate": 1.2926652142338416e-05, | |
| "loss": 0.6907, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.013803123864874683, | |
| "grad_norm": 0.4951478838920593, | |
| "learning_rate": 1.365286855482934e-05, | |
| "loss": 0.6993, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.01452960406828914, | |
| "grad_norm": 0.3694448471069336, | |
| "learning_rate": 1.4379084967320261e-05, | |
| "loss": 0.6831, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.015256084271703597, | |
| "grad_norm": 1.17753005027771, | |
| "learning_rate": 1.5105301379811185e-05, | |
| "loss": 0.68, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.015982564475118054, | |
| "grad_norm": 0.611096978187561, | |
| "learning_rate": 1.5831517792302105e-05, | |
| "loss": 0.6921, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.01670904467853251, | |
| "grad_norm": 1.563934087753296, | |
| "learning_rate": 1.655773420479303e-05, | |
| "loss": 0.6865, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.017435524881946968, | |
| "grad_norm": 0.928711473941803, | |
| "learning_rate": 1.728395061728395e-05, | |
| "loss": 0.6861, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.018162005085361425, | |
| "grad_norm": 1.1964377164840698, | |
| "learning_rate": 1.8010167029774874e-05, | |
| "loss": 0.7021, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.01888848528877588, | |
| "grad_norm": 0.3896273970603943, | |
| "learning_rate": 1.8736383442265794e-05, | |
| "loss": 0.6918, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.01961496549219034, | |
| "grad_norm": 0.4799005091190338, | |
| "learning_rate": 1.946259985475672e-05, | |
| "loss": 0.6954, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.020341445695604796, | |
| "grad_norm": 0.6113623380661011, | |
| "learning_rate": 2.018881626724764e-05, | |
| "loss": 0.6886, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.021067925899019253, | |
| "grad_norm": 0.6965861320495605, | |
| "learning_rate": 2.0915032679738563e-05, | |
| "loss": 0.6814, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.02179440610243371, | |
| "grad_norm": 0.46387240290641785, | |
| "learning_rate": 2.1641249092229483e-05, | |
| "loss": 0.681, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.022520886305848167, | |
| "grad_norm": 0.4296594560146332, | |
| "learning_rate": 2.2367465504720407e-05, | |
| "loss": 0.6853, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.023247366509262624, | |
| "grad_norm": 0.8854900002479553, | |
| "learning_rate": 2.3093681917211328e-05, | |
| "loss": 0.6784, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.02397384671267708, | |
| "grad_norm": 0.7150282263755798, | |
| "learning_rate": 2.3819898329702252e-05, | |
| "loss": 0.6774, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.024700326916091538, | |
| "grad_norm": 0.8739128708839417, | |
| "learning_rate": 2.4546114742193176e-05, | |
| "loss": 0.6662, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.025426807119505995, | |
| "grad_norm": 0.7919905781745911, | |
| "learning_rate": 2.5272331154684096e-05, | |
| "loss": 0.6601, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.02615328732292045, | |
| "grad_norm": 0.6220109462738037, | |
| "learning_rate": 2.599854756717502e-05, | |
| "loss": 0.649, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.02687976752633491, | |
| "grad_norm": 0.8708673715591431, | |
| "learning_rate": 2.672476397966594e-05, | |
| "loss": 0.6318, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.027606247729749366, | |
| "grad_norm": 1.0253198146820068, | |
| "learning_rate": 2.7450980392156865e-05, | |
| "loss": 0.6025, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.028332727933163823, | |
| "grad_norm": 1.1449552774429321, | |
| "learning_rate": 2.817719680464779e-05, | |
| "loss": 0.5564, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.02905920813657828, | |
| "grad_norm": 3.2159643173217773, | |
| "learning_rate": 2.890341321713871e-05, | |
| "loss": 0.515, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.029785688339992736, | |
| "grad_norm": 1.912434458732605, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 0.4635, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.030512168543407193, | |
| "grad_norm": 1.9808599948883057, | |
| "learning_rate": 3.0355846042120557e-05, | |
| "loss": 0.4233, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.03123864874682165, | |
| "grad_norm": 1.929961085319519, | |
| "learning_rate": 3.1082062454611474e-05, | |
| "loss": 0.3505, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.03196512895023611, | |
| "grad_norm": 2.4213836193084717, | |
| "learning_rate": 3.1808278867102395e-05, | |
| "loss": 0.3079, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.03269160915365056, | |
| "grad_norm": 8.767487525939941, | |
| "learning_rate": 3.2534495279593316e-05, | |
| "loss": 0.2805, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.03341808935706502, | |
| "grad_norm": 6.868554592132568, | |
| "learning_rate": 3.326071169208424e-05, | |
| "loss": 0.2501, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.034144569560479475, | |
| "grad_norm": 10.502647399902344, | |
| "learning_rate": 3.3986928104575163e-05, | |
| "loss": 0.2505, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.034871049763893935, | |
| "grad_norm": 2.8313727378845215, | |
| "learning_rate": 3.471314451706609e-05, | |
| "loss": 0.2265, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.03559752996730839, | |
| "grad_norm": 4.173934459686279, | |
| "learning_rate": 3.543936092955701e-05, | |
| "loss": 0.2045, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.03632401017072285, | |
| "grad_norm": 1.775830626487732, | |
| "learning_rate": 3.616557734204793e-05, | |
| "loss": 0.1584, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0370504903741373, | |
| "grad_norm": 3.129055976867676, | |
| "learning_rate": 3.689179375453886e-05, | |
| "loss": 0.131, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.03777697057755176, | |
| "grad_norm": 4.053362846374512, | |
| "learning_rate": 3.761801016702978e-05, | |
| "loss": 0.1333, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.03850345078096622, | |
| "grad_norm": 3.1923694610595703, | |
| "learning_rate": 3.83442265795207e-05, | |
| "loss": 0.1734, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.03922993098438068, | |
| "grad_norm": 4.751387119293213, | |
| "learning_rate": 3.907044299201162e-05, | |
| "loss": 0.1296, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.03995641118779513, | |
| "grad_norm": 1.9991039037704468, | |
| "learning_rate": 3.979665940450255e-05, | |
| "loss": 0.1297, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.04068289139120959, | |
| "grad_norm": 5.077785015106201, | |
| "learning_rate": 4.052287581699347e-05, | |
| "loss": 0.1127, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.041409371594624045, | |
| "grad_norm": 3.6270077228546143, | |
| "learning_rate": 4.124909222948439e-05, | |
| "loss": 0.1125, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.042135851798038505, | |
| "grad_norm": 1.017204999923706, | |
| "learning_rate": 4.197530864197531e-05, | |
| "loss": 0.0802, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.04286233200145296, | |
| "grad_norm": 1.492018222808838, | |
| "learning_rate": 4.270152505446624e-05, | |
| "loss": 0.1008, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.04358881220486742, | |
| "grad_norm": 1.7372925281524658, | |
| "learning_rate": 4.342774146695716e-05, | |
| "loss": 0.1172, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.04431529240828187, | |
| "grad_norm": 2.817929983139038, | |
| "learning_rate": 4.415395787944808e-05, | |
| "loss": 0.1096, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.04504177261169633, | |
| "grad_norm": 0.9688124656677246, | |
| "learning_rate": 4.4880174291939e-05, | |
| "loss": 0.0663, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.04576825281511079, | |
| "grad_norm": 3.9759960174560547, | |
| "learning_rate": 4.5606390704429926e-05, | |
| "loss": 0.0706, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.04649473301852525, | |
| "grad_norm": 5.014834880828857, | |
| "learning_rate": 4.633260711692085e-05, | |
| "loss": 0.0681, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.0472212132219397, | |
| "grad_norm": 3.1871182918548584, | |
| "learning_rate": 4.705882352941177e-05, | |
| "loss": 0.0934, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.04794769342535416, | |
| "grad_norm": 5.145167350769043, | |
| "learning_rate": 4.778503994190269e-05, | |
| "loss": 0.0777, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.048674173628768615, | |
| "grad_norm": 6.0013275146484375, | |
| "learning_rate": 4.8511256354393615e-05, | |
| "loss": 0.0825, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.049400653832183075, | |
| "grad_norm": 0.9712790250778198, | |
| "learning_rate": 4.9237472766884536e-05, | |
| "loss": 0.0417, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.05012713403559753, | |
| "grad_norm": 1.4921551942825317, | |
| "learning_rate": 4.9963689179375456e-05, | |
| "loss": 0.0316, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.05085361423901199, | |
| "grad_norm": 4.556818008422852, | |
| "learning_rate": 5.0689905591866384e-05, | |
| "loss": 0.0592, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.05158009444242644, | |
| "grad_norm": 2.8250820636749268, | |
| "learning_rate": 5.1416122004357304e-05, | |
| "loss": 0.0591, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.0523065746458409, | |
| "grad_norm": 2.345460891723633, | |
| "learning_rate": 5.2142338416848225e-05, | |
| "loss": 0.073, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.05303305484925536, | |
| "grad_norm": 0.44890037178993225, | |
| "learning_rate": 5.2868554829339145e-05, | |
| "loss": 0.057, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.05375953505266982, | |
| "grad_norm": 1.1340672969818115, | |
| "learning_rate": 5.3594771241830066e-05, | |
| "loss": 0.0466, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.05448601525608427, | |
| "grad_norm": 0.7642996311187744, | |
| "learning_rate": 5.4320987654320986e-05, | |
| "loss": 0.0437, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.05521249545949873, | |
| "grad_norm": 4.866988658905029, | |
| "learning_rate": 5.504720406681191e-05, | |
| "loss": 0.0654, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.055938975662913185, | |
| "grad_norm": 0.9396504163742065, | |
| "learning_rate": 5.577342047930284e-05, | |
| "loss": 0.0184, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.056665455866327645, | |
| "grad_norm": 0.5176196098327637, | |
| "learning_rate": 5.649963689179376e-05, | |
| "loss": 0.0208, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.0573919360697421, | |
| "grad_norm": 4.328392028808594, | |
| "learning_rate": 5.722585330428468e-05, | |
| "loss": 0.0186, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.05811841627315656, | |
| "grad_norm": 6.72576379776001, | |
| "learning_rate": 5.79520697167756e-05, | |
| "loss": 0.0311, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.05884489647657101, | |
| "grad_norm": 10.39561653137207, | |
| "learning_rate": 5.8678286129266523e-05, | |
| "loss": 0.0453, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.05957137667998547, | |
| "grad_norm": 8.84882926940918, | |
| "learning_rate": 5.9404502541757444e-05, | |
| "loss": 0.0821, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.060297856883399926, | |
| "grad_norm": 0.5676841735839844, | |
| "learning_rate": 6.0130718954248365e-05, | |
| "loss": 0.1021, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.06102433708681439, | |
| "grad_norm": 3.4484715461730957, | |
| "learning_rate": 6.0856935366739285e-05, | |
| "loss": 0.0397, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.06175081729022884, | |
| "grad_norm": 8.414154052734375, | |
| "learning_rate": 6.158315177923021e-05, | |
| "loss": 0.0864, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.0624772974936433, | |
| "grad_norm": 5.502734184265137, | |
| "learning_rate": 6.230936819172115e-05, | |
| "loss": 0.0313, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.06320377769705776, | |
| "grad_norm": 6.950675964355469, | |
| "learning_rate": 6.303558460421207e-05, | |
| "loss": 0.0271, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.06393025790047221, | |
| "grad_norm": 2.7828145027160645, | |
| "learning_rate": 6.376180101670299e-05, | |
| "loss": 0.016, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.06465673810388667, | |
| "grad_norm": 4.585832118988037, | |
| "learning_rate": 6.448801742919391e-05, | |
| "loss": 0.0402, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.06538321830730112, | |
| "grad_norm": 5.096743106842041, | |
| "learning_rate": 6.521423384168483e-05, | |
| "loss": 0.0719, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.06610969851071559, | |
| "grad_norm": 5.883689880371094, | |
| "learning_rate": 6.594045025417575e-05, | |
| "loss": 0.0691, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.06683617871413004, | |
| "grad_norm": 1.7454990148544312, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.0504, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.0675626589175445, | |
| "grad_norm": 2.231943368911743, | |
| "learning_rate": 6.739288307915759e-05, | |
| "loss": 0.0184, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.06828913912095895, | |
| "grad_norm": 4.1820268630981445, | |
| "learning_rate": 6.811909949164852e-05, | |
| "loss": 0.025, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.06901561932437342, | |
| "grad_norm": 0.06752662360668182, | |
| "learning_rate": 6.884531590413945e-05, | |
| "loss": 0.0061, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.06974209952778787, | |
| "grad_norm": 0.034968651831150055, | |
| "learning_rate": 6.957153231663037e-05, | |
| "loss": 0.0246, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.07046857973120232, | |
| "grad_norm": 4.133062839508057, | |
| "learning_rate": 7.029774872912129e-05, | |
| "loss": 0.0483, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.07119505993461678, | |
| "grad_norm": 0.14520829916000366, | |
| "learning_rate": 7.10239651416122e-05, | |
| "loss": 0.0242, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.07192154013803125, | |
| "grad_norm": 0.08248770982027054, | |
| "learning_rate": 7.175018155410313e-05, | |
| "loss": 0.0389, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.0726480203414457, | |
| "grad_norm": 0.09677606076002121, | |
| "learning_rate": 7.247639796659405e-05, | |
| "loss": 0.0813, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07337450054486015, | |
| "grad_norm": 2.2317094802856445, | |
| "learning_rate": 7.320261437908497e-05, | |
| "loss": 0.0425, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.0741009807482746, | |
| "grad_norm": 0.9524332284927368, | |
| "learning_rate": 7.39288307915759e-05, | |
| "loss": 0.0165, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.07482746095168907, | |
| "grad_norm": 1.2688440084457397, | |
| "learning_rate": 7.465504720406682e-05, | |
| "loss": 0.0376, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.07555394115510353, | |
| "grad_norm": 0.5410459637641907, | |
| "learning_rate": 7.538126361655774e-05, | |
| "loss": 0.0132, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.07628042135851798, | |
| "grad_norm": 1.0646350383758545, | |
| "learning_rate": 7.610748002904866e-05, | |
| "loss": 0.0357, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.07700690156193243, | |
| "grad_norm": 0.05422890931367874, | |
| "learning_rate": 7.683369644153958e-05, | |
| "loss": 0.0024, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.0777333817653469, | |
| "grad_norm": 1.7686655521392822, | |
| "learning_rate": 7.75599128540305e-05, | |
| "loss": 0.011, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.07845986196876135, | |
| "grad_norm": 1.7055928707122803, | |
| "learning_rate": 7.828612926652143e-05, | |
| "loss": 0.0283, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.07918634217217581, | |
| "grad_norm": 7.1870245933532715, | |
| "learning_rate": 7.901234567901235e-05, | |
| "loss": 0.0255, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.07991282237559026, | |
| "grad_norm": 4.143937110900879, | |
| "learning_rate": 7.973856209150328e-05, | |
| "loss": 0.0163, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.08063930257900472, | |
| "grad_norm": 2.7253036499023438, | |
| "learning_rate": 8.04647785039942e-05, | |
| "loss": 0.0356, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.08136578278241918, | |
| "grad_norm": 0.1260932832956314, | |
| "learning_rate": 8.119099491648512e-05, | |
| "loss": 0.0897, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.08209226298583364, | |
| "grad_norm": 0.8739075064659119, | |
| "learning_rate": 8.191721132897604e-05, | |
| "loss": 0.0212, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.08281874318924809, | |
| "grad_norm": 0.07976645231246948, | |
| "learning_rate": 8.264342774146696e-05, | |
| "loss": 0.0202, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.08354522339266254, | |
| "grad_norm": 3.089498996734619, | |
| "learning_rate": 8.336964415395788e-05, | |
| "loss": 0.0288, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.08427170359607701, | |
| "grad_norm": 1.1282787322998047, | |
| "learning_rate": 8.40958605664488e-05, | |
| "loss": 0.0236, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.08499818379949146, | |
| "grad_norm": 2.50753116607666, | |
| "learning_rate": 8.482207697893972e-05, | |
| "loss": 0.0491, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.08572466400290592, | |
| "grad_norm": 15.398341178894043, | |
| "learning_rate": 8.554829339143065e-05, | |
| "loss": 0.0541, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.08645114420632037, | |
| "grad_norm": 0.3026963174343109, | |
| "learning_rate": 8.627450980392158e-05, | |
| "loss": 0.0053, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.08717762440973484, | |
| "grad_norm": 0.12404945492744446, | |
| "learning_rate": 8.70007262164125e-05, | |
| "loss": 0.0404, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.08790410461314929, | |
| "grad_norm": 0.9239891767501831, | |
| "learning_rate": 8.772694262890342e-05, | |
| "loss": 0.0221, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.08863058481656375, | |
| "grad_norm": 1.404173493385315, | |
| "learning_rate": 8.845315904139434e-05, | |
| "loss": 0.0122, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.0893570650199782, | |
| "grad_norm": 3.049877405166626, | |
| "learning_rate": 8.917937545388526e-05, | |
| "loss": 0.02, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.09008354522339267, | |
| "grad_norm": 0.6122508645057678, | |
| "learning_rate": 8.990559186637618e-05, | |
| "loss": 0.0191, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.09081002542680712, | |
| "grad_norm": 0.021131640300154686, | |
| "learning_rate": 9.06318082788671e-05, | |
| "loss": 0.0257, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.09153650563022157, | |
| "grad_norm": 1.1997209787368774, | |
| "learning_rate": 9.135802469135802e-05, | |
| "loss": 0.0067, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.09226298583363603, | |
| "grad_norm": 2.1676833629608154, | |
| "learning_rate": 9.208424110384896e-05, | |
| "loss": 0.0078, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.0929894660370505, | |
| "grad_norm": 0.29366588592529297, | |
| "learning_rate": 9.281045751633988e-05, | |
| "loss": 0.0052, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.09371594624046495, | |
| "grad_norm": 0.6021141409873962, | |
| "learning_rate": 9.35366739288308e-05, | |
| "loss": 0.0147, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.0944424264438794, | |
| "grad_norm": 0.05590349808335304, | |
| "learning_rate": 9.426289034132172e-05, | |
| "loss": 0.0041, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.09516890664729385, | |
| "grad_norm": 0.010648532770574093, | |
| "learning_rate": 9.498910675381264e-05, | |
| "loss": 0.0004, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.09589538685070832, | |
| "grad_norm": 0.6512329578399658, | |
| "learning_rate": 9.571532316630356e-05, | |
| "loss": 0.0057, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.09662186705412278, | |
| "grad_norm": 0.040556080639362335, | |
| "learning_rate": 9.644153957879448e-05, | |
| "loss": 0.0006, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.09734834725753723, | |
| "grad_norm": 0.03349559009075165, | |
| "learning_rate": 9.71677559912854e-05, | |
| "loss": 0.0025, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.09807482746095168, | |
| "grad_norm": 0.22619083523750305, | |
| "learning_rate": 9.789397240377634e-05, | |
| "loss": 0.0008, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.09880130766436615, | |
| "grad_norm": 0.005620414856821299, | |
| "learning_rate": 9.862018881626726e-05, | |
| "loss": 0.0004, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.0995277878677806, | |
| "grad_norm": 0.05560583993792534, | |
| "learning_rate": 9.934640522875818e-05, | |
| "loss": 0.0115, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.10025426807119506, | |
| "grad_norm": 0.003737515537068248, | |
| "learning_rate": 0.00010007262164124908, | |
| "loss": 0.006, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.10098074827460951, | |
| "grad_norm": 4.636546611785889, | |
| "learning_rate": 0.00010079883805374002, | |
| "loss": 0.0039, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.10170722847802398, | |
| "grad_norm": 0.0036786955315619707, | |
| "learning_rate": 0.00010152505446623095, | |
| "loss": 0.0193, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.10243370868143843, | |
| "grad_norm": 1.0555495023727417, | |
| "learning_rate": 0.00010225127087872186, | |
| "loss": 0.024, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.10316018888485289, | |
| "grad_norm": 0.32569730281829834, | |
| "learning_rate": 0.00010297748729121279, | |
| "loss": 0.0168, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.10388666908826734, | |
| "grad_norm": 0.6908342242240906, | |
| "learning_rate": 0.0001037037037037037, | |
| "loss": 0.0359, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.1046131492916818, | |
| "grad_norm": 0.044849053025245667, | |
| "learning_rate": 0.00010442992011619463, | |
| "loss": 0.0009, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.10533962949509626, | |
| "grad_norm": 0.011313475668430328, | |
| "learning_rate": 0.00010515613652868554, | |
| "loss": 0.0003, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.10606610969851071, | |
| "grad_norm": 0.011058060452342033, | |
| "learning_rate": 0.00010588235294117647, | |
| "loss": 0.0047, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.10679258990192517, | |
| "grad_norm": 0.013103635981678963, | |
| "learning_rate": 0.00010660856935366741, | |
| "loss": 0.0005, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.10751907010533963, | |
| "grad_norm": 0.0076889158226549625, | |
| "learning_rate": 0.00010733478576615832, | |
| "loss": 0.0122, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.10824555030875409, | |
| "grad_norm": 0.6621626019477844, | |
| "learning_rate": 0.00010806100217864925, | |
| "loss": 0.0027, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.10897203051216854, | |
| "grad_norm": 1.037239670753479, | |
| "learning_rate": 0.00010878721859114016, | |
| "loss": 0.0385, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.109698510715583, | |
| "grad_norm": 0.11858850717544556, | |
| "learning_rate": 0.00010951343500363109, | |
| "loss": 0.0522, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.11042499091899746, | |
| "grad_norm": 3.902498245239258, | |
| "learning_rate": 0.000110239651416122, | |
| "loss": 0.0037, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.11115147112241192, | |
| "grad_norm": 0.03190886229276657, | |
| "learning_rate": 0.00011096586782861293, | |
| "loss": 0.0092, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.11187795132582637, | |
| "grad_norm": 0.028368664905428886, | |
| "learning_rate": 0.00011169208424110384, | |
| "loss": 0.0011, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.11260443152924082, | |
| "grad_norm": 1.0788954496383667, | |
| "learning_rate": 0.00011241830065359477, | |
| "loss": 0.0205, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.11333091173265529, | |
| "grad_norm": 0.10793304443359375, | |
| "learning_rate": 0.00011314451706608571, | |
| "loss": 0.0128, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.11405739193606974, | |
| "grad_norm": 1.4622502326965332, | |
| "learning_rate": 0.00011387073347857661, | |
| "loss": 0.0525, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.1147838721394842, | |
| "grad_norm": 0.43396472930908203, | |
| "learning_rate": 0.00011459694989106755, | |
| "loss": 0.0016, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.11551035234289865, | |
| "grad_norm": 2.9861035346984863, | |
| "learning_rate": 0.00011532316630355846, | |
| "loss": 0.0138, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.11623683254631312, | |
| "grad_norm": 14.586094856262207, | |
| "learning_rate": 0.00011604938271604939, | |
| "loss": 0.1007, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.11696331274972757, | |
| "grad_norm": 0.014536268077790737, | |
| "learning_rate": 0.0001167755991285403, | |
| "loss": 0.0147, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.11768979295314203, | |
| "grad_norm": 0.05354047194123268, | |
| "learning_rate": 0.00011750181554103123, | |
| "loss": 0.0176, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.11841627315655648, | |
| "grad_norm": 0.3078368902206421, | |
| "learning_rate": 0.00011822803195352215, | |
| "loss": 0.0049, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.11914275335997095, | |
| "grad_norm": 0.011014469899237156, | |
| "learning_rate": 0.00011895424836601307, | |
| "loss": 0.0035, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.1198692335633854, | |
| "grad_norm": 0.5486952662467957, | |
| "learning_rate": 0.000119680464778504, | |
| "loss": 0.0168, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.12059571376679985, | |
| "grad_norm": 0.02629510499536991, | |
| "learning_rate": 0.00012040668119099491, | |
| "loss": 0.001, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.1213221939702143, | |
| "grad_norm": 0.019840385764837265, | |
| "learning_rate": 0.00012113289760348585, | |
| "loss": 0.0037, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.12204867417362877, | |
| "grad_norm": 1.3649095296859741, | |
| "learning_rate": 0.00012185911401597675, | |
| "loss": 0.0133, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.12277515437704323, | |
| "grad_norm": 0.025183813646435738, | |
| "learning_rate": 0.0001225853304284677, | |
| "loss": 0.0006, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.12350163458045768, | |
| "grad_norm": 0.07554338127374649, | |
| "learning_rate": 0.0001233115468409586, | |
| "loss": 0.0041, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.12422811478387213, | |
| "grad_norm": 0.04600398242473602, | |
| "learning_rate": 0.00012403776325344953, | |
| "loss": 0.0022, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.1249545949872866, | |
| "grad_norm": 4.709814548492432, | |
| "learning_rate": 0.00012476397966594048, | |
| "loss": 0.0037, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.12568107519070104, | |
| "grad_norm": 0.020981954410672188, | |
| "learning_rate": 0.00012549019607843137, | |
| "loss": 0.0121, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.12640755539411552, | |
| "grad_norm": 0.3170248866081238, | |
| "learning_rate": 0.00012621641249092232, | |
| "loss": 0.0027, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.12713403559752998, | |
| "grad_norm": 0.01148161105811596, | |
| "learning_rate": 0.0001269426289034132, | |
| "loss": 0.0005, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.12786051580094443, | |
| "grad_norm": 0.005348953418433666, | |
| "learning_rate": 0.00012766884531590416, | |
| "loss": 0.0002, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.12858699600435888, | |
| "grad_norm": 3.101860761642456, | |
| "learning_rate": 0.00012839506172839505, | |
| "loss": 0.0038, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.12931347620777334, | |
| "grad_norm": 2.680506706237793, | |
| "learning_rate": 0.000129121278140886, | |
| "loss": 0.0472, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.1300399564111878, | |
| "grad_norm": 22.780397415161133, | |
| "learning_rate": 0.0001298474945533769, | |
| "loss": 0.0232, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.13076643661460224, | |
| "grad_norm": 0.08615617454051971, | |
| "learning_rate": 0.00013057371096586784, | |
| "loss": 0.0278, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.1314929168180167, | |
| "grad_norm": 0.4959210455417633, | |
| "learning_rate": 0.00013129992737835876, | |
| "loss": 0.0007, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.13221939702143118, | |
| "grad_norm": 0.0067051006481051445, | |
| "learning_rate": 0.00013202614379084968, | |
| "loss": 0.0167, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.13294587722484563, | |
| "grad_norm": 10.88768482208252, | |
| "learning_rate": 0.0001327523602033406, | |
| "loss": 0.0682, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.13367235742826009, | |
| "grad_norm": 0.007390011567622423, | |
| "learning_rate": 0.00013347857661583152, | |
| "loss": 0.0003, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.13439883763167454, | |
| "grad_norm": 0.12825822830200195, | |
| "learning_rate": 0.00013420479302832244, | |
| "loss": 0.0244, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.135125317835089, | |
| "grad_norm": 0.8949776291847229, | |
| "learning_rate": 0.00013493100944081336, | |
| "loss": 0.0214, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.13585179803850345, | |
| "grad_norm": 0.007870912551879883, | |
| "learning_rate": 0.00013565722585330429, | |
| "loss": 0.0004, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.1365782782419179, | |
| "grad_norm": 0.013382726348936558, | |
| "learning_rate": 0.0001363834422657952, | |
| "loss": 0.0004, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.13730475844533235, | |
| "grad_norm": 0.037289395928382874, | |
| "learning_rate": 0.00013710965867828613, | |
| "loss": 0.0012, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.13803123864874683, | |
| "grad_norm": 0.9494091272354126, | |
| "learning_rate": 0.00013783587509077707, | |
| "loss": 0.0249, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.1387577188521613, | |
| "grad_norm": 2.1269211769104004, | |
| "learning_rate": 0.00013856209150326797, | |
| "loss": 0.0041, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.13948419905557574, | |
| "grad_norm": 0.03475005179643631, | |
| "learning_rate": 0.00013928830791575892, | |
| "loss": 0.0039, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.1402106792589902, | |
| "grad_norm": 0.778325080871582, | |
| "learning_rate": 0.0001400145243282498, | |
| "loss": 0.0044, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.14093715946240465, | |
| "grad_norm": 0.06391960382461548, | |
| "learning_rate": 0.00014074074074074076, | |
| "loss": 0.0011, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.1416636396658191, | |
| "grad_norm": 0.015311076305806637, | |
| "learning_rate": 0.00014146695715323165, | |
| "loss": 0.0156, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.14239011986923356, | |
| "grad_norm": 0.005620781797915697, | |
| "learning_rate": 0.0001421931735657226, | |
| "loss": 0.0005, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.143116600072648, | |
| "grad_norm": 0.006361651234328747, | |
| "learning_rate": 0.00014291938997821352, | |
| "loss": 0.0173, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.1438430802760625, | |
| "grad_norm": 0.6632714867591858, | |
| "learning_rate": 0.00014364560639070444, | |
| "loss": 0.0008, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.14456956047947694, | |
| "grad_norm": 3.7890255451202393, | |
| "learning_rate": 0.00014437182280319536, | |
| "loss": 0.0591, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.1452960406828914, | |
| "grad_norm": 4.573298454284668, | |
| "learning_rate": 0.00014509803921568628, | |
| "loss": 0.0182, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.14602252088630585, | |
| "grad_norm": 0.18653298914432526, | |
| "learning_rate": 0.0001458242556281772, | |
| "loss": 0.011, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.1467490010897203, | |
| "grad_norm": 0.0030135358683764935, | |
| "learning_rate": 0.00014655047204066812, | |
| "loss": 0.017, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.14747548129313476, | |
| "grad_norm": 13.294329643249512, | |
| "learning_rate": 0.00014727668845315904, | |
| "loss": 0.0359, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.1482019614965492, | |
| "grad_norm": 1.0047153234481812, | |
| "learning_rate": 0.00014800290486564996, | |
| "loss": 0.0014, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.14892844169996367, | |
| "grad_norm": 0.0042244489304721355, | |
| "learning_rate": 0.00014872912127814088, | |
| "loss": 0.0008, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.14965492190337815, | |
| "grad_norm": 0.005744027905166149, | |
| "learning_rate": 0.00014945533769063183, | |
| "loss": 0.0005, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.1503814021067926, | |
| "grad_norm": 0.0027218873146921396, | |
| "learning_rate": 0.00015018155410312272, | |
| "loss": 0.0009, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.15110788231020705, | |
| "grad_norm": 1.5683510303497314, | |
| "learning_rate": 0.00015090777051561367, | |
| "loss": 0.0009, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.1518343625136215, | |
| "grad_norm": 0.0024358402006328106, | |
| "learning_rate": 0.00015163398692810456, | |
| "loss": 0.0008, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.15256084271703596, | |
| "grad_norm": 0.0035784540232270956, | |
| "learning_rate": 0.0001523602033405955, | |
| "loss": 0.0217, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.15328732292045041, | |
| "grad_norm": 0.017342494800686836, | |
| "learning_rate": 0.0001530864197530864, | |
| "loss": 0.0002, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.15401380312386487, | |
| "grad_norm": 0.0023592431098222733, | |
| "learning_rate": 0.00015381263616557735, | |
| "loss": 0.0001, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.15474028332727932, | |
| "grad_norm": 0.0029132033232599497, | |
| "learning_rate": 0.00015453885257806827, | |
| "loss": 0.0002, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.1554667635306938, | |
| "grad_norm": 5.089969158172607, | |
| "learning_rate": 0.0001552650689905592, | |
| "loss": 0.0124, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.15619324373410826, | |
| "grad_norm": 0.0020955149084329605, | |
| "learning_rate": 0.00015599128540305012, | |
| "loss": 0.0002, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.1569197239375227, | |
| "grad_norm": 0.001827805070206523, | |
| "learning_rate": 0.00015671750181554104, | |
| "loss": 0.0026, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.15764620414093716, | |
| "grad_norm": 0.0018593213753774762, | |
| "learning_rate": 0.00015744371822803196, | |
| "loss": 0.0001, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.15837268434435162, | |
| "grad_norm": 8.548373222351074, | |
| "learning_rate": 0.00015816993464052288, | |
| "loss": 0.0116, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.15909916454776607, | |
| "grad_norm": 0.003052167361602187, | |
| "learning_rate": 0.0001588961510530138, | |
| "loss": 0.023, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.15982564475118052, | |
| "grad_norm": 0.01510961726307869, | |
| "learning_rate": 0.00015962236746550472, | |
| "loss": 0.0003, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.16055212495459498, | |
| "grad_norm": 0.006872969213873148, | |
| "learning_rate": 0.00016034858387799564, | |
| "loss": 0.0008, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.16127860515800943, | |
| "grad_norm": 0.0075376201421022415, | |
| "learning_rate": 0.00016107480029048659, | |
| "loss": 0.001, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.1620050853614239, | |
| "grad_norm": 1.308592438697815, | |
| "learning_rate": 0.00016180101670297748, | |
| "loss": 0.0206, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.16273156556483837, | |
| "grad_norm": 0.01441910769790411, | |
| "learning_rate": 0.00016252723311546843, | |
| "loss": 0.0003, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.16345804576825282, | |
| "grad_norm": 4.73635721206665, | |
| "learning_rate": 0.00016325344952795935, | |
| "loss": 0.0048, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.16418452597166727, | |
| "grad_norm": 0.07317811995744705, | |
| "learning_rate": 0.00016397966594045027, | |
| "loss": 0.0079, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.16491100617508173, | |
| "grad_norm": 3.066941976547241, | |
| "learning_rate": 0.0001647058823529412, | |
| "loss": 0.0245, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.16563748637849618, | |
| "grad_norm": 0.20101045072078705, | |
| "learning_rate": 0.0001654320987654321, | |
| "loss": 0.0538, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.16636396658191063, | |
| "grad_norm": 0.03498254343867302, | |
| "learning_rate": 0.00016615831517792303, | |
| "loss": 0.0009, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.1670904467853251, | |
| "grad_norm": 0.044696319848299026, | |
| "learning_rate": 0.00016688453159041395, | |
| "loss": 0.001, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.16781692698873957, | |
| "grad_norm": 0.005176996346563101, | |
| "learning_rate": 0.00016761074800290487, | |
| "loss": 0.001, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.16854340719215402, | |
| "grad_norm": 0.0034458874724805355, | |
| "learning_rate": 0.0001683369644153958, | |
| "loss": 0.0295, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.16926988739556847, | |
| "grad_norm": 0.01240626908838749, | |
| "learning_rate": 0.0001690631808278867, | |
| "loss": 0.0055, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.16999636759898293, | |
| "grad_norm": 0.0073911272920668125, | |
| "learning_rate": 0.00016978939724037763, | |
| "loss": 0.0002, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.17072284780239738, | |
| "grad_norm": 0.010020995512604713, | |
| "learning_rate": 0.00017051561365286855, | |
| "loss": 0.0002, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.17144932800581184, | |
| "grad_norm": 0.0028329354245215654, | |
| "learning_rate": 0.00017124183006535947, | |
| "loss": 0.0002, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.1721758082092263, | |
| "grad_norm": 0.009768263436853886, | |
| "learning_rate": 0.00017196804647785042, | |
| "loss": 0.0001, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.17290228841264074, | |
| "grad_norm": 0.006985844578593969, | |
| "learning_rate": 0.00017269426289034134, | |
| "loss": 0.0001, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.17362876861605522, | |
| "grad_norm": 0.003910423722118139, | |
| "learning_rate": 0.00017342047930283226, | |
| "loss": 0.0001, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.17435524881946968, | |
| "grad_norm": 0.0018550670938566327, | |
| "learning_rate": 0.00017414669571532318, | |
| "loss": 0.0001, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.17508172902288413, | |
| "grad_norm": 0.003561209188774228, | |
| "learning_rate": 0.0001748729121278141, | |
| "loss": 0.0001, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.17580820922629858, | |
| "grad_norm": 0.0017712870612740517, | |
| "learning_rate": 0.00017559912854030502, | |
| "loss": 0.0001, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.17653468942971304, | |
| "grad_norm": 0.002323460765182972, | |
| "learning_rate": 0.00017632534495279595, | |
| "loss": 0.0001, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.1772611696331275, | |
| "grad_norm": 0.0017775703454390168, | |
| "learning_rate": 0.00017705156136528687, | |
| "loss": 0.0004, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.17798764983654194, | |
| "grad_norm": 0.003454179735854268, | |
| "learning_rate": 0.00017777777777777779, | |
| "loss": 0.0001, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.1787141300399564, | |
| "grad_norm": 0.003128621494397521, | |
| "learning_rate": 0.0001785039941902687, | |
| "loss": 0.0001, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.17944061024337088, | |
| "grad_norm": 0.013285885564982891, | |
| "learning_rate": 0.00017923021060275963, | |
| "loss": 0.0001, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.18016709044678533, | |
| "grad_norm": 0.0012834910303354263, | |
| "learning_rate": 0.00017995642701525055, | |
| "loss": 0.0004, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.1808935706501998, | |
| "grad_norm": 0.0010866275988519192, | |
| "learning_rate": 0.0001806826434277415, | |
| "loss": 0.0001, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.18162005085361424, | |
| "grad_norm": 0.0010630824835970998, | |
| "learning_rate": 0.0001814088598402324, | |
| "loss": 0.0, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.1823465310570287, | |
| "grad_norm": 0.0011757917236536741, | |
| "learning_rate": 0.00018213507625272334, | |
| "loss": 0.0, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.18307301126044315, | |
| "grad_norm": 0.0009444226743653417, | |
| "learning_rate": 0.00018286129266521423, | |
| "loss": 0.008, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.1837994914638576, | |
| "grad_norm": 0.0011839661747217178, | |
| "learning_rate": 0.00018358750907770518, | |
| "loss": 0.0, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.18452597166727205, | |
| "grad_norm": 0.0008903779671527445, | |
| "learning_rate": 0.00018431372549019607, | |
| "loss": 0.0, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.18525245187068654, | |
| "grad_norm": 0.0010285211028531194, | |
| "learning_rate": 0.00018503994190268702, | |
| "loss": 0.0001, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.185978932074101, | |
| "grad_norm": 0.0016522291116416454, | |
| "learning_rate": 0.00018576615831517794, | |
| "loss": 0.0338, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.18670541227751544, | |
| "grad_norm": 0.001982170157134533, | |
| "learning_rate": 0.00018649237472766886, | |
| "loss": 0.0009, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.1874318924809299, | |
| "grad_norm": 0.002856120467185974, | |
| "learning_rate": 0.00018721859114015978, | |
| "loss": 0.0001, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.18815837268434435, | |
| "grad_norm": 6.8815484046936035, | |
| "learning_rate": 0.0001879448075526507, | |
| "loss": 0.0064, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.1888848528877588, | |
| "grad_norm": 0.002711124252527952, | |
| "learning_rate": 0.00018867102396514162, | |
| "loss": 0.0001, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.18961133309117326, | |
| "grad_norm": 0.01453580055385828, | |
| "learning_rate": 0.00018939724037763254, | |
| "loss": 0.0002, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.1903378132945877, | |
| "grad_norm": 0.004619908984750509, | |
| "learning_rate": 0.00019012345679012346, | |
| "loss": 0.0108, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.1910642934980022, | |
| "grad_norm": 0.002147579798474908, | |
| "learning_rate": 0.0001908496732026144, | |
| "loss": 0.0002, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.19179077370141664, | |
| "grad_norm": 0.006444690283387899, | |
| "learning_rate": 0.0001915758896151053, | |
| "loss": 0.0013, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.1925172539048311, | |
| "grad_norm": 0.0015877482946962118, | |
| "learning_rate": 0.00019230210602759625, | |
| "loss": 0.0011, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.19324373410824555, | |
| "grad_norm": 2.8192436695098877, | |
| "learning_rate": 0.00019302832244008715, | |
| "loss": 0.0018, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.19397021431166, | |
| "grad_norm": 6.506179332733154, | |
| "learning_rate": 0.0001937545388525781, | |
| "loss": 0.0067, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.19469669451507446, | |
| "grad_norm": 0.0016660846304148436, | |
| "learning_rate": 0.00019448075526506899, | |
| "loss": 0.0001, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.1954231747184889, | |
| "grad_norm": 0.0011433791369199753, | |
| "learning_rate": 0.00019520697167755993, | |
| "loss": 0.0141, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.19614965492190337, | |
| "grad_norm": 0.001556798000819981, | |
| "learning_rate": 0.00019593318809005083, | |
| "loss": 0.0, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.19687613512531785, | |
| "grad_norm": 0.0035784265492111444, | |
| "learning_rate": 0.00019665940450254178, | |
| "loss": 0.0001, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.1976026153287323, | |
| "grad_norm": 4.246982097625732, | |
| "learning_rate": 0.0001973856209150327, | |
| "loss": 0.0068, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.19832909553214675, | |
| "grad_norm": 0.001589273801073432, | |
| "learning_rate": 0.00019811183732752362, | |
| "loss": 0.012, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.1990555757355612, | |
| "grad_norm": 0.0008454394992440939, | |
| "learning_rate": 0.00019883805374001454, | |
| "loss": 0.0001, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.19978205593897566, | |
| "grad_norm": 0.0013743549352511764, | |
| "learning_rate": 0.00019956427015250546, | |
| "loss": 0.0001, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.20007264802034144, | |
| "eval_accuracy": 0.9996397787212145, | |
| "eval_f1": 0.9995310825294748, | |
| "eval_loss": 0.0019488565158098936, | |
| "eval_precision": 0.9990626046200201, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 124.3971, | |
| "eval_samples_per_second": 312.427, | |
| "eval_steps_per_second": 2.444, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 0.20050853614239011, | |
| "grad_norm": 0.002287400420755148, | |
| "learning_rate": 0.00019996771329405116, | |
| "loss": 0.0093, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.20123501634580457, | |
| "grad_norm": 0.004998628981411457, | |
| "learning_rate": 0.00019988699652917914, | |
| "loss": 0.0159, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.20196149654921902, | |
| "grad_norm": 0.003076898632571101, | |
| "learning_rate": 0.00019980627976430706, | |
| "loss": 0.0073, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.2026879767526335, | |
| "grad_norm": 0.7471761107444763, | |
| "learning_rate": 0.00019972556299943498, | |
| "loss": 0.0033, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.20341445695604796, | |
| "grad_norm": 0.1988172084093094, | |
| "learning_rate": 0.00019964484623456293, | |
| "loss": 0.0009, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2041409371594624, | |
| "grad_norm": 0.005002092570066452, | |
| "learning_rate": 0.00019956412946969088, | |
| "loss": 0.0009, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.20486741736287686, | |
| "grad_norm": 0.24179202318191528, | |
| "learning_rate": 0.0001994834127048188, | |
| "loss": 0.0182, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.20559389756629132, | |
| "grad_norm": 4.140319347381592, | |
| "learning_rate": 0.00019940269593994673, | |
| "loss": 0.0052, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.20632037776970577, | |
| "grad_norm": 0.0015831501223146915, | |
| "learning_rate": 0.00019932197917507468, | |
| "loss": 0.0, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.20704685797312022, | |
| "grad_norm": 0.0023513727355748415, | |
| "learning_rate": 0.0001992412624102026, | |
| "loss": 0.0001, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.20777333817653468, | |
| "grad_norm": 0.0018358491361141205, | |
| "learning_rate": 0.00019916054564533055, | |
| "loss": 0.0217, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.20849981837994916, | |
| "grad_norm": 2.7655224800109863, | |
| "learning_rate": 0.00019907982888045847, | |
| "loss": 0.0393, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.2092262985833636, | |
| "grad_norm": 0.00683799060061574, | |
| "learning_rate": 0.00019899911211558642, | |
| "loss": 0.0002, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.20995277878677807, | |
| "grad_norm": 0.011541269719600677, | |
| "learning_rate": 0.00019891839535071435, | |
| "loss": 0.0045, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.21067925899019252, | |
| "grad_norm": 0.013042348437011242, | |
| "learning_rate": 0.00019883767858584227, | |
| "loss": 0.0003, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.21140573919360697, | |
| "grad_norm": 0.01146721187978983, | |
| "learning_rate": 0.00019875696182097025, | |
| "loss": 0.0009, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.21213221939702143, | |
| "grad_norm": 0.0053860582411289215, | |
| "learning_rate": 0.00019867624505609817, | |
| "loss": 0.003, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.21285869960043588, | |
| "grad_norm": 0.35763925313949585, | |
| "learning_rate": 0.0001985955282912261, | |
| "loss": 0.0066, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.21358517980385033, | |
| "grad_norm": 0.003207879839465022, | |
| "learning_rate": 0.00019851481152635401, | |
| "loss": 0.0002, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.21431166000726481, | |
| "grad_norm": 0.004152906127274036, | |
| "learning_rate": 0.00019843409476148196, | |
| "loss": 0.0001, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.21503814021067927, | |
| "grad_norm": 0.003981268033385277, | |
| "learning_rate": 0.00019835337799660991, | |
| "loss": 0.0002, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.21576462041409372, | |
| "grad_norm": 0.0030321148224174976, | |
| "learning_rate": 0.00019827266123173784, | |
| "loss": 0.0004, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.21649110061750818, | |
| "grad_norm": 0.0033642794005572796, | |
| "learning_rate": 0.00019819194446686579, | |
| "loss": 0.0002, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.21721758082092263, | |
| "grad_norm": 0.0015044253086671233, | |
| "learning_rate": 0.0001981112277019937, | |
| "loss": 0.0, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.21794406102433708, | |
| "grad_norm": 0.0013194256462156773, | |
| "learning_rate": 0.00019803051093712166, | |
| "loss": 0.0064, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.21867054122775154, | |
| "grad_norm": 0.003604642581194639, | |
| "learning_rate": 0.00019794979417224958, | |
| "loss": 0.0001, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.219397021431166, | |
| "grad_norm": 0.002144684549421072, | |
| "learning_rate": 0.00019786907740737753, | |
| "loss": 0.0011, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.22012350163458047, | |
| "grad_norm": 0.00234671076759696, | |
| "learning_rate": 0.00019778836064250545, | |
| "loss": 0.0001, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.22084998183799492, | |
| "grad_norm": 0.027411388233304024, | |
| "learning_rate": 0.00019770764387763338, | |
| "loss": 0.0002, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.22157646204140938, | |
| "grad_norm": 0.00431784288957715, | |
| "learning_rate": 0.00019762692711276135, | |
| "loss": 0.0016, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.22230294224482383, | |
| "grad_norm": 0.007216178812086582, | |
| "learning_rate": 0.00019754621034788928, | |
| "loss": 0.0031, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.22302942244823828, | |
| "grad_norm": 0.0020561974961310625, | |
| "learning_rate": 0.0001974654935830172, | |
| "loss": 0.0, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.22375590265165274, | |
| "grad_norm": 0.003935552202165127, | |
| "learning_rate": 0.00019738477681814512, | |
| "loss": 0.0, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.2244823828550672, | |
| "grad_norm": 0.0017273337580263615, | |
| "learning_rate": 0.00019730406005327307, | |
| "loss": 0.0018, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.22520886305848165, | |
| "grad_norm": 0.0009397296234965324, | |
| "learning_rate": 0.00019722334328840102, | |
| "loss": 0.0071, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.22593534326189613, | |
| "grad_norm": 4.2714738845825195, | |
| "learning_rate": 0.00019714262652352894, | |
| "loss": 0.0043, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.22666182346531058, | |
| "grad_norm": 0.008737271651625633, | |
| "learning_rate": 0.0001970619097586569, | |
| "loss": 0.0, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.22738830366872503, | |
| "grad_norm": 0.0011167083866894245, | |
| "learning_rate": 0.00019698119299378482, | |
| "loss": 0.0195, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.2281147838721395, | |
| "grad_norm": 0.0015777769731357694, | |
| "learning_rate": 0.00019690047622891274, | |
| "loss": 0.0093, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.22884126407555394, | |
| "grad_norm": 1.3581019639968872, | |
| "learning_rate": 0.0001968197594640407, | |
| "loss": 0.0371, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.2295677442789684, | |
| "grad_norm": 0.005585103295743465, | |
| "learning_rate": 0.00019673904269916864, | |
| "loss": 0.0003, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.23029422448238285, | |
| "grad_norm": 0.013055490329861641, | |
| "learning_rate": 0.00019665832593429656, | |
| "loss": 0.0009, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.2310207046857973, | |
| "grad_norm": 0.012752565555274487, | |
| "learning_rate": 0.00019657760916942448, | |
| "loss": 0.0057, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.23174718488921178, | |
| "grad_norm": 0.016765527427196503, | |
| "learning_rate": 0.00019649689240455243, | |
| "loss": 0.0053, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.23247366509262624, | |
| "grad_norm": 0.04566654935479164, | |
| "learning_rate": 0.00019641617563968038, | |
| "loss": 0.0031, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.2332001452960407, | |
| "grad_norm": 0.05058443173766136, | |
| "learning_rate": 0.0001963354588748083, | |
| "loss": 0.0064, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.23392662549945514, | |
| "grad_norm": 0.006236894056200981, | |
| "learning_rate": 0.00019625474210993623, | |
| "loss": 0.0202, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.2346531057028696, | |
| "grad_norm": 0.00453936355188489, | |
| "learning_rate": 0.00019617402534506418, | |
| "loss": 0.0002, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.23537958590628405, | |
| "grad_norm": 0.01652829721570015, | |
| "learning_rate": 0.00019609330858019213, | |
| "loss": 0.0005, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.2361060661096985, | |
| "grad_norm": 0.28086262941360474, | |
| "learning_rate": 0.00019601259181532005, | |
| "loss": 0.0012, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.23683254631311296, | |
| "grad_norm": 0.002964381594210863, | |
| "learning_rate": 0.000195931875050448, | |
| "loss": 0.0001, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.23755902651652744, | |
| "grad_norm": 0.004744562786072493, | |
| "learning_rate": 0.00019585115828557592, | |
| "loss": 0.0001, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.2382855067199419, | |
| "grad_norm": 0.002022289205342531, | |
| "learning_rate": 0.00019577044152070385, | |
| "loss": 0.0003, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.23901198692335635, | |
| "grad_norm": 1.325679063796997, | |
| "learning_rate": 0.0001956897247558318, | |
| "loss": 0.0223, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.2397384671267708, | |
| "grad_norm": 0.005906618200242519, | |
| "learning_rate": 0.00019560900799095974, | |
| "loss": 0.0004, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.24046494733018525, | |
| "grad_norm": 0.022973524406552315, | |
| "learning_rate": 0.00019552829122608767, | |
| "loss": 0.0004, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.2411914275335997, | |
| "grad_norm": 0.017179157584905624, | |
| "learning_rate": 0.0001954475744612156, | |
| "loss": 0.0005, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.24191790773701416, | |
| "grad_norm": 0.011254935525357723, | |
| "learning_rate": 0.00019536685769634354, | |
| "loss": 0.0008, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.2426443879404286, | |
| "grad_norm": 0.004135392606258392, | |
| "learning_rate": 0.0001952861409314715, | |
| "loss": 0.0003, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.24337086814384307, | |
| "grad_norm": 0.002715233713388443, | |
| "learning_rate": 0.0001952054241665994, | |
| "loss": 0.0002, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.24409734834725755, | |
| "grad_norm": 0.00374965975061059, | |
| "learning_rate": 0.00019512470740172734, | |
| "loss": 0.0001, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.244823828550672, | |
| "grad_norm": 0.0033891089260578156, | |
| "learning_rate": 0.00019504399063685528, | |
| "loss": 0.0001, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.24555030875408645, | |
| "grad_norm": 0.001574166351929307, | |
| "learning_rate": 0.0001949632738719832, | |
| "loss": 0.0001, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.2462767889575009, | |
| "grad_norm": 0.001289655570872128, | |
| "learning_rate": 0.00019488255710711116, | |
| "loss": 0.0, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.24700326916091536, | |
| "grad_norm": 0.0012494047405198216, | |
| "learning_rate": 0.0001948018403422391, | |
| "loss": 0.0, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.24772974936432982, | |
| "grad_norm": 0.0028091860003769398, | |
| "learning_rate": 0.00019472112357736703, | |
| "loss": 0.0, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.24845622956774427, | |
| "grad_norm": 0.0020063440315425396, | |
| "learning_rate": 0.00019464040681249495, | |
| "loss": 0.0, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.24918270977115872, | |
| "grad_norm": 0.00732283852994442, | |
| "learning_rate": 0.0001945596900476229, | |
| "loss": 0.0001, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.2499091899745732, | |
| "grad_norm": 0.0009436274995096028, | |
| "learning_rate": 0.00019447897328275085, | |
| "loss": 0.0, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.25063567017798766, | |
| "grad_norm": 0.001065302756614983, | |
| "learning_rate": 0.00019439825651787877, | |
| "loss": 0.0, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.2513621503814021, | |
| "grad_norm": 0.0007398009183816612, | |
| "learning_rate": 0.0001943175397530067, | |
| "loss": 0.0, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.25208863058481656, | |
| "grad_norm": 0.0009731051395647228, | |
| "learning_rate": 0.00019423682298813465, | |
| "loss": 0.0001, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.25281511078823105, | |
| "grad_norm": 0.0006832171930000186, | |
| "learning_rate": 0.0001941561062232626, | |
| "loss": 0.0, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.25354159099164547, | |
| "grad_norm": 0.0011063286801800132, | |
| "learning_rate": 0.00019407538945839052, | |
| "loss": 0.0, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.25426807119505995, | |
| "grad_norm": 0.0012475239345803857, | |
| "learning_rate": 0.00019399467269351844, | |
| "loss": 0.0, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.2549945513984744, | |
| "grad_norm": 0.0008868346340022981, | |
| "learning_rate": 0.0001939139559286464, | |
| "loss": 0.0, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.25572103160188886, | |
| "grad_norm": 0.0013618022203445435, | |
| "learning_rate": 0.00019383323916377431, | |
| "loss": 0.0, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.2564475118053033, | |
| "grad_norm": 0.0008328580879606307, | |
| "learning_rate": 0.00019375252239890226, | |
| "loss": 0.0, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.25717399200871777, | |
| "grad_norm": 0.0017324545187875628, | |
| "learning_rate": 0.00019367180563403021, | |
| "loss": 0.0, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.2579004722121322, | |
| "grad_norm": 0.0010712060611695051, | |
| "learning_rate": 0.00019359108886915814, | |
| "loss": 0.0, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.2586269524155467, | |
| "grad_norm": 0.0005095238448120654, | |
| "learning_rate": 0.00019351037210428606, | |
| "loss": 0.0, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.25935343261896115, | |
| "grad_norm": 0.0014343465445563197, | |
| "learning_rate": 0.00019342965533941398, | |
| "loss": 0.0, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.2600799128223756, | |
| "grad_norm": 0.0007007729145698249, | |
| "learning_rate": 0.00019334893857454196, | |
| "loss": 0.0, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.26080639302579006, | |
| "grad_norm": 0.0005924066063016653, | |
| "learning_rate": 0.00019326822180966988, | |
| "loss": 0.0, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.2615328732292045, | |
| "grad_norm": 0.0004458896000869572, | |
| "learning_rate": 0.0001931875050447978, | |
| "loss": 0.0, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.26225935343261897, | |
| "grad_norm": 0.005087355151772499, | |
| "learning_rate": 0.00019310678827992575, | |
| "loss": 0.0, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.2629858336360334, | |
| "grad_norm": 0.11598234623670578, | |
| "learning_rate": 0.0001930260715150537, | |
| "loss": 0.0004, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.2637123138394479, | |
| "grad_norm": 0.0015027482295408845, | |
| "learning_rate": 0.00019294535475018163, | |
| "loss": 0.0473, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.26443879404286236, | |
| "grad_norm": 0.04484843090176582, | |
| "learning_rate": 0.00019286463798530955, | |
| "loss": 0.0003, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.2651652742462768, | |
| "grad_norm": 0.007797603961080313, | |
| "learning_rate": 0.0001927839212204375, | |
| "loss": 0.0031, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.26589175444969126, | |
| "grad_norm": 0.006486868020147085, | |
| "learning_rate": 0.00019270320445556542, | |
| "loss": 0.0003, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.2666182346531057, | |
| "grad_norm": 0.005536284297704697, | |
| "learning_rate": 0.00019262248769069337, | |
| "loss": 0.0004, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.26734471485652017, | |
| "grad_norm": 0.014443649910390377, | |
| "learning_rate": 0.00019254177092582132, | |
| "loss": 0.0001, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.2680711950599346, | |
| "grad_norm": 0.0030865217559039593, | |
| "learning_rate": 0.00019246105416094924, | |
| "loss": 0.0001, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.2687976752633491, | |
| "grad_norm": 0.15668638050556183, | |
| "learning_rate": 0.00019238033739607717, | |
| "loss": 0.0002, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.2695241554667635, | |
| "grad_norm": 0.04532123729586601, | |
| "learning_rate": 0.0001922996206312051, | |
| "loss": 0.0003, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.270250635670178, | |
| "grad_norm": 0.00196210783906281, | |
| "learning_rate": 0.00019221890386633307, | |
| "loss": 0.0001, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.27097711587359247, | |
| "grad_norm": 0.0017535451333969831, | |
| "learning_rate": 0.000192138187101461, | |
| "loss": 0.0012, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.2717035960770069, | |
| "grad_norm": 0.0014856884954497218, | |
| "learning_rate": 0.0001920574703365889, | |
| "loss": 0.0002, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.2724300762804214, | |
| "grad_norm": 0.004271077457815409, | |
| "learning_rate": 0.00019197675357171686, | |
| "loss": 0.0049, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.2731565564838358, | |
| "grad_norm": 0.009308665059506893, | |
| "learning_rate": 0.00019189603680684478, | |
| "loss": 0.0013, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.2738830366872503, | |
| "grad_norm": 0.001470932038500905, | |
| "learning_rate": 0.00019181532004197273, | |
| "loss": 0.0009, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.2746095168906647, | |
| "grad_norm": 0.0009906482882797718, | |
| "learning_rate": 0.00019173460327710066, | |
| "loss": 0.0074, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.2753359970940792, | |
| "grad_norm": 0.5366028547286987, | |
| "learning_rate": 0.0001916538865122286, | |
| "loss": 0.0004, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.27606247729749367, | |
| "grad_norm": 0.0012202219804748893, | |
| "learning_rate": 0.00019157316974735653, | |
| "loss": 0.0007, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.2767889575009081, | |
| "grad_norm": 0.5043062567710876, | |
| "learning_rate": 0.00019149245298248445, | |
| "loss": 0.0007, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.2775154377043226, | |
| "grad_norm": 0.0006929966621100903, | |
| "learning_rate": 0.0001914117362176124, | |
| "loss": 0.01, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.278241917907737, | |
| "grad_norm": 0.0005868257721886039, | |
| "learning_rate": 0.00019133101945274035, | |
| "loss": 0.0, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.2789683981111515, | |
| "grad_norm": 1.5353443622589111, | |
| "learning_rate": 0.00019125030268786827, | |
| "loss": 0.0012, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.2796948783145659, | |
| "grad_norm": 0.0007161126704886556, | |
| "learning_rate": 0.0001911695859229962, | |
| "loss": 0.0, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.2804213585179804, | |
| "grad_norm": 0.0007424887735396624, | |
| "learning_rate": 0.00019108886915812417, | |
| "loss": 0.0, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.2811478387213948, | |
| "grad_norm": 0.0006449614884331822, | |
| "learning_rate": 0.0001910081523932521, | |
| "loss": 0.0, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.2818743189248093, | |
| "grad_norm": 0.0006138585740700364, | |
| "learning_rate": 0.00019092743562838002, | |
| "loss": 0.0, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.2826007991282238, | |
| "grad_norm": 0.0006936938152648509, | |
| "learning_rate": 0.00019084671886350797, | |
| "loss": 0.0, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.2833272793316382, | |
| "grad_norm": 0.0004829142999369651, | |
| "learning_rate": 0.0001907660020986359, | |
| "loss": 0.0001, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.2840537595350527, | |
| "grad_norm": 0.0005034743226133287, | |
| "learning_rate": 0.00019068528533376384, | |
| "loss": 0.0, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.2847802397384671, | |
| "grad_norm": 0.0004061743093188852, | |
| "learning_rate": 0.00019060456856889176, | |
| "loss": 0.0, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.2855067199418816, | |
| "grad_norm": 0.6731203198432922, | |
| "learning_rate": 0.0001905238518040197, | |
| "loss": 0.0282, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.286233200145296, | |
| "grad_norm": 0.010977654717862606, | |
| "learning_rate": 0.00019044313503914764, | |
| "loss": 0.0002, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.2869596803487105, | |
| "grad_norm": 0.022831691429018974, | |
| "learning_rate": 0.00019036241827427556, | |
| "loss": 0.0006, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.287686160552125, | |
| "grad_norm": 0.026040196418762207, | |
| "learning_rate": 0.0001902817015094035, | |
| "loss": 0.0005, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.2884126407555394, | |
| "grad_norm": 0.011391847394406796, | |
| "learning_rate": 0.00019020098474453146, | |
| "loss": 0.0004, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.2891391209589539, | |
| "grad_norm": 0.013334060087800026, | |
| "learning_rate": 0.00019012026797965938, | |
| "loss": 0.0003, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.2898656011623683, | |
| "grad_norm": 0.0060678147710859776, | |
| "learning_rate": 0.0001900395512147873, | |
| "loss": 0.0002, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.2905920813657828, | |
| "grad_norm": 0.004468259867280722, | |
| "learning_rate": 0.00018995883444991525, | |
| "loss": 0.0002, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2913185615691972, | |
| "grad_norm": 0.0036872695200145245, | |
| "learning_rate": 0.0001898781176850432, | |
| "loss": 0.0001, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.2920450417726117, | |
| "grad_norm": 0.0026169579941779375, | |
| "learning_rate": 0.00018979740092017113, | |
| "loss": 0.0001, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.29277152197602613, | |
| "grad_norm": 0.0021394200157374144, | |
| "learning_rate": 0.00018971668415529907, | |
| "loss": 0.0001, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.2934980021794406, | |
| "grad_norm": 0.0022201493848115206, | |
| "learning_rate": 0.000189635967390427, | |
| "loss": 0.0001, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.2942244823828551, | |
| "grad_norm": 0.0021840811241418123, | |
| "learning_rate": 0.00018955525062555495, | |
| "loss": 0.0001, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.2949509625862695, | |
| "grad_norm": 0.0016265831654891372, | |
| "learning_rate": 0.00018947453386068287, | |
| "loss": 0.0001, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.295677442789684, | |
| "grad_norm": 0.0015095279086381197, | |
| "learning_rate": 0.00018939381709581082, | |
| "loss": 0.0001, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.2964039229930984, | |
| "grad_norm": 0.0013007308589294553, | |
| "learning_rate": 0.00018931310033093874, | |
| "loss": 0.0, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.2971304031965129, | |
| "grad_norm": 0.0011377567425370216, | |
| "learning_rate": 0.00018923238356606666, | |
| "loss": 0.0, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.29785688339992733, | |
| "grad_norm": 0.0017277223523706198, | |
| "learning_rate": 0.00018915166680119461, | |
| "loss": 0.0, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.2985833636033418, | |
| "grad_norm": 0.0009744380367919803, | |
| "learning_rate": 0.00018907095003632256, | |
| "loss": 0.0158, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.2993098438067563, | |
| "grad_norm": 0.0012453808449208736, | |
| "learning_rate": 0.0001889902332714505, | |
| "loss": 0.0, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.3000363240101707, | |
| "grad_norm": 0.05199315398931503, | |
| "learning_rate": 0.0001889095165065784, | |
| "loss": 0.0002, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.3007628042135852, | |
| "grad_norm": 0.0018118784064427018, | |
| "learning_rate": 0.00018882879974170636, | |
| "loss": 0.0116, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.3014892844169996, | |
| "grad_norm": 0.002479708520695567, | |
| "learning_rate": 0.0001887480829768343, | |
| "loss": 0.0096, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.3022157646204141, | |
| "grad_norm": 0.001789470436051488, | |
| "learning_rate": 0.00018866736621196223, | |
| "loss": 0.0, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.30294224482382853, | |
| "grad_norm": 1.2244577407836914, | |
| "learning_rate": 0.00018858664944709018, | |
| "loss": 0.002, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.303668725027243, | |
| "grad_norm": 0.001510178786702454, | |
| "learning_rate": 0.0001885059326822181, | |
| "loss": 0.0001, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.30439520523065744, | |
| "grad_norm": 0.0012227630941197276, | |
| "learning_rate": 0.00018842521591734603, | |
| "loss": 0.0007, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.3051216854340719, | |
| "grad_norm": 6.986842155456543, | |
| "learning_rate": 0.00018834449915247398, | |
| "loss": 0.0041, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.3058481656374864, | |
| "grad_norm": 0.0014463558327406645, | |
| "learning_rate": 0.00018826378238760193, | |
| "loss": 0.0006, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.30657464584090083, | |
| "grad_norm": 0.0013261119602248073, | |
| "learning_rate": 0.00018818306562272985, | |
| "loss": 0.0058, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.3073011260443153, | |
| "grad_norm": 0.0014859420480206609, | |
| "learning_rate": 0.00018810234885785777, | |
| "loss": 0.0, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.30802760624772973, | |
| "grad_norm": 0.001101717702113092, | |
| "learning_rate": 0.00018802163209298572, | |
| "loss": 0.0, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.3087540864511442, | |
| "grad_norm": 0.0022333369124680758, | |
| "learning_rate": 0.00018794091532811367, | |
| "loss": 0.0003, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.30948056665455864, | |
| "grad_norm": 0.011202757246792316, | |
| "learning_rate": 0.0001878601985632416, | |
| "loss": 0.0001, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.3102070468579731, | |
| "grad_norm": 0.001800977042876184, | |
| "learning_rate": 0.00018777948179836952, | |
| "loss": 0.0218, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.3109335270613876, | |
| "grad_norm": 0.004161295481026173, | |
| "learning_rate": 0.00018769876503349747, | |
| "loss": 0.0002, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.31166000726480203, | |
| "grad_norm": 0.0032398079056292772, | |
| "learning_rate": 0.00018761804826862542, | |
| "loss": 0.0006, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.3123864874682165, | |
| "grad_norm": 0.04649796336889267, | |
| "learning_rate": 0.00018753733150375334, | |
| "loss": 0.0001, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.31311296767163094, | |
| "grad_norm": 0.0010927373077720404, | |
| "learning_rate": 0.0001874566147388813, | |
| "loss": 0.0001, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.3138394478750454, | |
| "grad_norm": 0.002848146017640829, | |
| "learning_rate": 0.0001873758979740092, | |
| "loss": 0.0001, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.31456592807845984, | |
| "grad_norm": 0.001080561545677483, | |
| "learning_rate": 0.00018729518120913713, | |
| "loss": 0.0, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.3152924082818743, | |
| "grad_norm": 0.0011905552819371223, | |
| "learning_rate": 0.00018721446444426508, | |
| "loss": 0.0, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.31601888848528875, | |
| "grad_norm": 0.002129076048731804, | |
| "learning_rate": 0.00018713374767939303, | |
| "loss": 0.0001, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.31674536868870323, | |
| "grad_norm": 0.0015021953731775284, | |
| "learning_rate": 0.00018705303091452096, | |
| "loss": 0.0, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.3174718488921177, | |
| "grad_norm": 0.0011074721114709973, | |
| "learning_rate": 0.00018697231414964888, | |
| "loss": 0.0, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.31819832909553214, | |
| "grad_norm": 0.0013954649912193418, | |
| "learning_rate": 0.00018689159738477683, | |
| "loss": 0.0, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.3189248092989466, | |
| "grad_norm": 0.0008435107301920652, | |
| "learning_rate": 0.00018681088061990478, | |
| "loss": 0.0, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.31965128950236105, | |
| "grad_norm": 0.0015673066955059767, | |
| "learning_rate": 0.0001867301638550327, | |
| "loss": 0.0001, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.32037776970577553, | |
| "grad_norm": 0.0006937576690688729, | |
| "learning_rate": 0.00018664944709016062, | |
| "loss": 0.0, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.32110424990918995, | |
| "grad_norm": 0.0006967806257307529, | |
| "learning_rate": 0.00018656873032528857, | |
| "loss": 0.0, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.32183073011260444, | |
| "grad_norm": 0.0010916970204561949, | |
| "learning_rate": 0.0001864880135604165, | |
| "loss": 0.0, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.32255721031601886, | |
| "grad_norm": 0.054137326776981354, | |
| "learning_rate": 0.00018640729679554445, | |
| "loss": 0.0001, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.32328369051943334, | |
| "grad_norm": 0.0007835402502678335, | |
| "learning_rate": 0.00018632658003067237, | |
| "loss": 0.0, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.3240101707228478, | |
| "grad_norm": 0.0006136346491985023, | |
| "learning_rate": 0.00018624586326580032, | |
| "loss": 0.0, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.32473665092626225, | |
| "grad_norm": 0.0005693508428521454, | |
| "learning_rate": 0.00018616514650092824, | |
| "loss": 0.0, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.32546313112967673, | |
| "grad_norm": 0.001010082894936204, | |
| "learning_rate": 0.0001860844297360562, | |
| "loss": 0.0, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.32618961133309116, | |
| "grad_norm": 0.0006115248543210328, | |
| "learning_rate": 0.00018600371297118414, | |
| "loss": 0.0, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.32691609153650564, | |
| "grad_norm": 0.005977267399430275, | |
| "learning_rate": 0.00018592299620631206, | |
| "loss": 0.0, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.32764257173992006, | |
| "grad_norm": 0.0004075188480783254, | |
| "learning_rate": 0.00018584227944143999, | |
| "loss": 0.0, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.32836905194333454, | |
| "grad_norm": 0.0005186618654988706, | |
| "learning_rate": 0.00018576156267656794, | |
| "loss": 0.0, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.329095532146749, | |
| "grad_norm": 0.0005320632481016219, | |
| "learning_rate": 0.00018568084591169589, | |
| "loss": 0.0, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.32982201235016345, | |
| "grad_norm": 0.029953761026263237, | |
| "learning_rate": 0.0001856001291468238, | |
| "loss": 0.0, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.33054849255357793, | |
| "grad_norm": 0.0003188280388712883, | |
| "learning_rate": 0.00018551941238195173, | |
| "loss": 0.0, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.33127497275699236, | |
| "grad_norm": 0.0004120226367376745, | |
| "learning_rate": 0.00018543869561707968, | |
| "loss": 0.0, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.33200145296040684, | |
| "grad_norm": 0.0005906698643229902, | |
| "learning_rate": 0.0001853579788522076, | |
| "loss": 0.0, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.33272793316382127, | |
| "grad_norm": 0.00045190524542704225, | |
| "learning_rate": 0.00018527726208733555, | |
| "loss": 0.0, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.33345441336723575, | |
| "grad_norm": 0.0008185270125977695, | |
| "learning_rate": 0.00018519654532246348, | |
| "loss": 0.0, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.3341808935706502, | |
| "grad_norm": 0.0003965144860558212, | |
| "learning_rate": 0.00018511582855759143, | |
| "loss": 0.0, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.33490737377406465, | |
| "grad_norm": 0.0003858699928969145, | |
| "learning_rate": 0.00018503511179271935, | |
| "loss": 0.0, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.33563385397747914, | |
| "grad_norm": 0.0005558038246817887, | |
| "learning_rate": 0.00018495439502784727, | |
| "loss": 0.0, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.33636033418089356, | |
| "grad_norm": 0.00037957995664328337, | |
| "learning_rate": 0.00018487367826297525, | |
| "loss": 0.0, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.33708681438430804, | |
| "grad_norm": 0.0003773049684241414, | |
| "learning_rate": 0.00018479296149810317, | |
| "loss": 0.0, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.33781329458772247, | |
| "grad_norm": 0.0006691055023111403, | |
| "learning_rate": 0.0001847122447332311, | |
| "loss": 0.0, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.33853977479113695, | |
| "grad_norm": 0.000681467994581908, | |
| "learning_rate": 0.00018463152796835904, | |
| "loss": 0.0, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.3392662549945514, | |
| "grad_norm": 0.0005777952610515058, | |
| "learning_rate": 0.000184550811203487, | |
| "loss": 0.0, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.33999273519796586, | |
| "grad_norm": 0.0005241065518930554, | |
| "learning_rate": 0.00018447009443861492, | |
| "loss": 0.0, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.34071921540138034, | |
| "grad_norm": 0.00039175679557956755, | |
| "learning_rate": 0.00018438937767374284, | |
| "loss": 0.0, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.34144569560479476, | |
| "grad_norm": 0.00041981766116805375, | |
| "learning_rate": 0.0001843086609088708, | |
| "loss": 0.0, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.34217217580820924, | |
| "grad_norm": 0.000371248199371621, | |
| "learning_rate": 0.0001842279441439987, | |
| "loss": 0.0, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.34289865601162367, | |
| "grad_norm": 0.00031778172706253827, | |
| "learning_rate": 0.00018414722737912666, | |
| "loss": 0.0, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.34362513621503815, | |
| "grad_norm": 0.00029086892027407885, | |
| "learning_rate": 0.00018406651061425458, | |
| "loss": 0.0, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 0.3443516164184526, | |
| "grad_norm": 0.0002902498235926032, | |
| "learning_rate": 0.00018398579384938253, | |
| "loss": 0.0, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.34507809662186706, | |
| "grad_norm": 0.00040075520519167185, | |
| "learning_rate": 0.00018390507708451045, | |
| "loss": 0.0, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.3458045768252815, | |
| "grad_norm": 0.00024263348313979805, | |
| "learning_rate": 0.00018382436031963838, | |
| "loss": 0.0, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.34653105702869597, | |
| "grad_norm": 0.0003889152139890939, | |
| "learning_rate": 0.00018374364355476635, | |
| "loss": 0.0, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.34725753723211045, | |
| "grad_norm": 0.00022724135487806052, | |
| "learning_rate": 0.00018366292678989428, | |
| "loss": 0.0, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.3479840174355249, | |
| "grad_norm": 0.0003505950153339654, | |
| "learning_rate": 0.0001835822100250222, | |
| "loss": 0.0, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.34871049763893935, | |
| "grad_norm": 0.27515658736228943, | |
| "learning_rate": 0.00018350149326015015, | |
| "loss": 0.0002, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3494369778423538, | |
| "grad_norm": 0.0003519939782563597, | |
| "learning_rate": 0.00018342077649527807, | |
| "loss": 0.0, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 0.35016345804576826, | |
| "grad_norm": 0.00033144818735308945, | |
| "learning_rate": 0.00018334005973040602, | |
| "loss": 0.0, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.3508899382491827, | |
| "grad_norm": 0.0003098642046097666, | |
| "learning_rate": 0.00018325934296553394, | |
| "loss": 0.0, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.35161641845259717, | |
| "grad_norm": 0.0002381189988227561, | |
| "learning_rate": 0.0001831786262006619, | |
| "loss": 0.0, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.35234289865601165, | |
| "grad_norm": 0.0007972380262799561, | |
| "learning_rate": 0.00018309790943578982, | |
| "loss": 0.0, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.3530693788594261, | |
| "grad_norm": 0.0025481837801635265, | |
| "learning_rate": 0.00018301719267091774, | |
| "loss": 0.0, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.35379585906284056, | |
| "grad_norm": 0.00035965273855254054, | |
| "learning_rate": 0.0001829364759060457, | |
| "loss": 0.0, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 0.354522339266255, | |
| "grad_norm": 0.000297486170893535, | |
| "learning_rate": 0.00018285575914117364, | |
| "loss": 0.0, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.35524881946966946, | |
| "grad_norm": 0.00028157353517599404, | |
| "learning_rate": 0.00018277504237630156, | |
| "loss": 0.0021, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.3559752996730839, | |
| "grad_norm": 0.0003479410079307854, | |
| "learning_rate": 0.00018269432561142948, | |
| "loss": 0.0, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.35670177987649837, | |
| "grad_norm": 0.002874035155400634, | |
| "learning_rate": 0.00018261360884655746, | |
| "loss": 0.0003, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 0.3574282600799128, | |
| "grad_norm": 0.00015613746654707938, | |
| "learning_rate": 0.00018253289208168538, | |
| "loss": 0.0004, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.3581547402833273, | |
| "grad_norm": 0.00013312845840118825, | |
| "learning_rate": 0.0001824521753168133, | |
| "loss": 0.0, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 0.35888122048674176, | |
| "grad_norm": 0.0001981378736672923, | |
| "learning_rate": 0.00018237145855194126, | |
| "loss": 0.0, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.3596077006901562, | |
| "grad_norm": 0.00027879534172825515, | |
| "learning_rate": 0.00018229074178706918, | |
| "loss": 0.0, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.36033418089357067, | |
| "grad_norm": 0.00016323383897542953, | |
| "learning_rate": 0.00018221002502219713, | |
| "loss": 0.0, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.3610606610969851, | |
| "grad_norm": 0.0005233317497186363, | |
| "learning_rate": 0.00018212930825732505, | |
| "loss": 0.0, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 0.3617871413003996, | |
| "grad_norm": 0.00013268415932543576, | |
| "learning_rate": 0.000182048591492453, | |
| "loss": 0.0, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.362513621503814, | |
| "grad_norm": 0.01259111799299717, | |
| "learning_rate": 0.00018196787472758092, | |
| "loss": 0.0001, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 0.3632401017072285, | |
| "grad_norm": 0.00014725365326739848, | |
| "learning_rate": 0.00018188715796270885, | |
| "loss": 0.0, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.36396658191064296, | |
| "grad_norm": 0.00021464233577717096, | |
| "learning_rate": 0.0001818064411978368, | |
| "loss": 0.0, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.3646930621140574, | |
| "grad_norm": 0.00011434618500061333, | |
| "learning_rate": 0.00018172572443296475, | |
| "loss": 0.0, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.36541954231747187, | |
| "grad_norm": 0.00012706074630841613, | |
| "learning_rate": 0.00018164500766809267, | |
| "loss": 0.0, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 0.3661460225208863, | |
| "grad_norm": 0.00015453774540219456, | |
| "learning_rate": 0.0001815642909032206, | |
| "loss": 0.0, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.3668725027243008, | |
| "grad_norm": 0.00014317889872472733, | |
| "learning_rate": 0.00018148357413834854, | |
| "loss": 0.0, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.3675989829277152, | |
| "grad_norm": 0.00014966298476792872, | |
| "learning_rate": 0.0001814028573734765, | |
| "loss": 0.0, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.3683254631311297, | |
| "grad_norm": 0.0001484445674577728, | |
| "learning_rate": 0.00018132214060860441, | |
| "loss": 0.0, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.3690519433345441, | |
| "grad_norm": 0.00012702727690339088, | |
| "learning_rate": 0.00018124142384373234, | |
| "loss": 0.0, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.3697784235379586, | |
| "grad_norm": 0.0001310681545874104, | |
| "learning_rate": 0.00018116070707886029, | |
| "loss": 0.0, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 0.37050490374137307, | |
| "grad_norm": 0.0001544792321510613, | |
| "learning_rate": 0.00018107999031398824, | |
| "loss": 0.0, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.3712313839447875, | |
| "grad_norm": 0.0003174188022967428, | |
| "learning_rate": 0.00018099927354911616, | |
| "loss": 0.0, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 0.371957864148202, | |
| "grad_norm": 0.00012976166908629239, | |
| "learning_rate": 0.0001809185567842441, | |
| "loss": 0.0, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.3726843443516164, | |
| "grad_norm": 0.00011333979637129232, | |
| "learning_rate": 0.00018083784001937203, | |
| "loss": 0.0, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.3734108245550309, | |
| "grad_norm": 0.00014128838665783405, | |
| "learning_rate": 0.00018075712325449995, | |
| "loss": 0.0, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.3741373047584453, | |
| "grad_norm": 9.816375677473843e-05, | |
| "learning_rate": 0.0001806764064896279, | |
| "loss": 0.0, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.3748637849618598, | |
| "grad_norm": 0.00012458849232643843, | |
| "learning_rate": 0.00018059568972475585, | |
| "loss": 0.0, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.3755902651652743, | |
| "grad_norm": 0.00011874383199028671, | |
| "learning_rate": 0.00018051497295988378, | |
| "loss": 0.0, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 0.3763167453686887, | |
| "grad_norm": 0.00010492030560271814, | |
| "learning_rate": 0.0001804342561950117, | |
| "loss": 0.0, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.3770432255721032, | |
| "grad_norm": 0.00012079241423634812, | |
| "learning_rate": 0.00018035353943013965, | |
| "loss": 0.0, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.3777697057755176, | |
| "grad_norm": 0.0010301030706614256, | |
| "learning_rate": 0.0001802728226652676, | |
| "loss": 0.0, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.3784961859789321, | |
| "grad_norm": 0.00020237726857885718, | |
| "learning_rate": 0.00018019210590039552, | |
| "loss": 0.0, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 0.3792226661823465, | |
| "grad_norm": 0.00014590570935979486, | |
| "learning_rate": 0.00018011138913552344, | |
| "loss": 0.0, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.379949146385761, | |
| "grad_norm": 0.00012144942593295127, | |
| "learning_rate": 0.0001800306723706514, | |
| "loss": 0.0, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 0.3806756265891754, | |
| "grad_norm": 0.00011861774692079052, | |
| "learning_rate": 0.00017994995560577932, | |
| "loss": 0.0, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.3814021067925899, | |
| "grad_norm": 0.0002795616746880114, | |
| "learning_rate": 0.00017986923884090727, | |
| "loss": 0.0, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.3821285869960044, | |
| "grad_norm": 0.0001514231407782063, | |
| "learning_rate": 0.00017978852207603522, | |
| "loss": 0.0, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.3828550671994188, | |
| "grad_norm": 0.000137203314807266, | |
| "learning_rate": 0.00017970780531116314, | |
| "loss": 0.0, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 0.3835815474028333, | |
| "grad_norm": 0.00011654103582259268, | |
| "learning_rate": 0.00017962708854629106, | |
| "loss": 0.0, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.3843080276062477, | |
| "grad_norm": 0.00011019224621122703, | |
| "learning_rate": 0.000179546371781419, | |
| "loss": 0.0, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 0.3850345078096622, | |
| "grad_norm": 0.00011716793233063072, | |
| "learning_rate": 0.00017946565501654696, | |
| "loss": 0.0, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.3857609880130766, | |
| "grad_norm": 0.00013133355241734535, | |
| "learning_rate": 0.00017938493825167488, | |
| "loss": 0.0, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.3864874682164911, | |
| "grad_norm": 0.00010616648069117218, | |
| "learning_rate": 0.0001793042214868028, | |
| "loss": 0.0, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.3872139484199056, | |
| "grad_norm": 0.00012793530186172575, | |
| "learning_rate": 0.00017922350472193076, | |
| "loss": 0.0, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 0.38794042862332, | |
| "grad_norm": 0.00021880699205212295, | |
| "learning_rate": 0.0001791427879570587, | |
| "loss": 0.0, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.3886669088267345, | |
| "grad_norm": 0.0321350060403347, | |
| "learning_rate": 0.00017906207119218663, | |
| "loss": 0.0, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.3893933890301489, | |
| "grad_norm": 0.0001054102904163301, | |
| "learning_rate": 0.00017898135442731455, | |
| "loss": 0.0, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.3901198692335634, | |
| "grad_norm": 0.00011370116408215836, | |
| "learning_rate": 0.0001789006376624425, | |
| "loss": 0.0, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.3908463494369778, | |
| "grad_norm": 7.921565702417865e-05, | |
| "learning_rate": 0.00017881992089757042, | |
| "loss": 0.0, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.3915728296403923, | |
| "grad_norm": 0.0001325017656199634, | |
| "learning_rate": 0.00017873920413269837, | |
| "loss": 0.0, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 0.39229930984380673, | |
| "grad_norm": 0.00011485354480100796, | |
| "learning_rate": 0.00017865848736782632, | |
| "loss": 0.0, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.3930257900472212, | |
| "grad_norm": 0.0001319620932918042, | |
| "learning_rate": 0.00017857777060295424, | |
| "loss": 0.0, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 0.3937522702506357, | |
| "grad_norm": 0.00011554160300875083, | |
| "learning_rate": 0.00017849705383808217, | |
| "loss": 0.0, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.3944787504540501, | |
| "grad_norm": 0.00011111667845398188, | |
| "learning_rate": 0.00017841633707321012, | |
| "loss": 0.0, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.3952052306574646, | |
| "grad_norm": 0.00030816654907539487, | |
| "learning_rate": 0.00017833562030833807, | |
| "loss": 0.0, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.395931710860879, | |
| "grad_norm": 0.00012618518667295575, | |
| "learning_rate": 0.000178254903543466, | |
| "loss": 0.0, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.3966581910642935, | |
| "grad_norm": 0.00011036815703846514, | |
| "learning_rate": 0.0001781741867785939, | |
| "loss": 0.0, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.39738467126770793, | |
| "grad_norm": 0.001136181759648025, | |
| "learning_rate": 0.00017809347001372186, | |
| "loss": 0.0, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 0.3981111514711224, | |
| "grad_norm": 9.4526847533416e-05, | |
| "learning_rate": 0.00017801275324884978, | |
| "loss": 0.0, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.3988376316745369, | |
| "grad_norm": 9.693180618342012e-05, | |
| "learning_rate": 0.00017793203648397773, | |
| "loss": 0.0, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.3995641118779513, | |
| "grad_norm": 0.00013439155009109527, | |
| "learning_rate": 0.00017785131971910566, | |
| "loss": 0.0, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.4001452960406829, | |
| "eval_accuracy": 1.0, | |
| "eval_f1": 1.0, | |
| "eval_loss": 8.966613904703991e-07, | |
| "eval_precision": 1.0, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 123.9354, | |
| "eval_samples_per_second": 313.591, | |
| "eval_steps_per_second": 2.453, | |
| "step": 2754 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 13766, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 1377, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3618642193367040.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |