| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 533, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001876172607879925, | |
| "grad_norm": 0.4121309108650883, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 1.6355, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.009380863039399626, | |
| "grad_norm": 0.4145535276322867, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 1.6509, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01876172607879925, | |
| "grad_norm": 0.499090421739371, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 1.6194, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.028142589118198873, | |
| "grad_norm": 0.2168002082363555, | |
| "learning_rate": 5.555555555555556e-05, | |
| "loss": 1.6102, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0375234521575985, | |
| "grad_norm": 0.19182026954015316, | |
| "learning_rate": 7.407407407407407e-05, | |
| "loss": 1.5826, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04690431519699812, | |
| "grad_norm": 0.17104214986412603, | |
| "learning_rate": 9.25925925925926e-05, | |
| "loss": 1.4983, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05628517823639775, | |
| "grad_norm": 0.1314352337398472, | |
| "learning_rate": 0.00011111111111111112, | |
| "loss": 1.4705, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06566604127579738, | |
| "grad_norm": 0.13683731835169527, | |
| "learning_rate": 0.00012962962962962963, | |
| "loss": 1.4351, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.075046904315197, | |
| "grad_norm": 0.12592565344109571, | |
| "learning_rate": 0.00014814814814814815, | |
| "loss": 1.4154, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08442776735459662, | |
| "grad_norm": 0.10747815841874438, | |
| "learning_rate": 0.0001666666666666667, | |
| "loss": 1.3925, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.09380863039399624, | |
| "grad_norm": 0.09736495320865902, | |
| "learning_rate": 0.0001851851851851852, | |
| "loss": 1.3452, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10318949343339587, | |
| "grad_norm": 0.09026377977279064, | |
| "learning_rate": 0.00019999784921417228, | |
| "loss": 1.3658, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.1125703564727955, | |
| "grad_norm": 0.08191150814769302, | |
| "learning_rate": 0.00019992258142410334, | |
| "loss": 1.3526, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12195121951219512, | |
| "grad_norm": 0.0987761994127843, | |
| "learning_rate": 0.00019973986684235418, | |
| "loss": 1.3457, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.13133208255159476, | |
| "grad_norm": 0.08470882333862399, | |
| "learning_rate": 0.00019944990194198758, | |
| "loss": 1.3237, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14071294559099437, | |
| "grad_norm": 0.08215471773448521, | |
| "learning_rate": 0.00019905299852237654, | |
| "loss": 1.3171, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.150093808630394, | |
| "grad_norm": 0.07920641876308229, | |
| "learning_rate": 0.00019854958337392654, | |
| "loss": 1.2913, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15947467166979362, | |
| "grad_norm": 0.09467087341552041, | |
| "learning_rate": 0.00019794019781914766, | |
| "loss": 1.3156, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.16885553470919323, | |
| "grad_norm": 0.08313358090040693, | |
| "learning_rate": 0.0001972254971305701, | |
| "loss": 1.3032, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.17823639774859287, | |
| "grad_norm": 0.08181396678268484, | |
| "learning_rate": 0.00019640624982612942, | |
| "loss": 1.3047, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.18761726078799248, | |
| "grad_norm": 0.08648614872822097, | |
| "learning_rate": 0.00019548333684277919, | |
| "loss": 1.2938, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19699812382739212, | |
| "grad_norm": 0.08114011033820992, | |
| "learning_rate": 0.00019445775058921853, | |
| "loss": 1.2919, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.20637898686679174, | |
| "grad_norm": 0.08069454171446046, | |
| "learning_rate": 0.00019333059387875525, | |
| "loss": 1.3007, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21575984990619138, | |
| "grad_norm": 0.1055930521893841, | |
| "learning_rate": 0.0001921030787434499, | |
| "loss": 1.3022, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.225140712945591, | |
| "grad_norm": 0.1039770749041834, | |
| "learning_rate": 0.0001907765251308173, | |
| "loss": 1.2945, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23452157598499063, | |
| "grad_norm": 0.08612620245766023, | |
| "learning_rate": 0.0001893523594844865, | |
| "loss": 1.2734, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 0.09453075060390269, | |
| "learning_rate": 0.00018783211321034534, | |
| "loss": 1.2903, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.25328330206378985, | |
| "grad_norm": 0.09115708333629854, | |
| "learning_rate": 0.00018621742102981905, | |
| "loss": 1.3021, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2626641651031895, | |
| "grad_norm": 0.09246301111274602, | |
| "learning_rate": 0.0001845100192220537, | |
| "loss": 1.2994, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.27204502814258913, | |
| "grad_norm": 0.08863879008659571, | |
| "learning_rate": 0.00018271174375689454, | |
| "loss": 1.3049, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.28142589118198874, | |
| "grad_norm": 0.09121089281314407, | |
| "learning_rate": 0.00018082452832066687, | |
| "loss": 1.2836, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.29080675422138835, | |
| "grad_norm": 0.08883133842744517, | |
| "learning_rate": 0.00017885040223688235, | |
| "loss": 1.2643, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.300187617260788, | |
| "grad_norm": 0.0889208798307562, | |
| "learning_rate": 0.0001767914882841067, | |
| "loss": 1.2902, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.30956848030018763, | |
| "grad_norm": 0.10746121860126627, | |
| "learning_rate": 0.00017465000041333494, | |
| "loss": 1.2968, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.31894934333958724, | |
| "grad_norm": 0.0832629700388369, | |
| "learning_rate": 0.0001724282413673291, | |
| "loss": 1.3031, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.32833020637898686, | |
| "grad_norm": 0.09626519128659516, | |
| "learning_rate": 0.00017012860020447796, | |
| "loss": 1.3223, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.33771106941838647, | |
| "grad_norm": 0.08434970867620112, | |
| "learning_rate": 0.0001677535497298416, | |
| "loss": 1.2684, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.34709193245778613, | |
| "grad_norm": 0.08136406881743141, | |
| "learning_rate": 0.0001653056438361432, | |
| "loss": 1.265, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.35647279549718575, | |
| "grad_norm": 0.0897096956349179, | |
| "learning_rate": 0.0001627875147575671, | |
| "loss": 1.2936, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.36585365853658536, | |
| "grad_norm": 0.08876653720245482, | |
| "learning_rate": 0.00016020187023931638, | |
| "loss": 1.2961, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.37523452157598497, | |
| "grad_norm": 0.08532928831285669, | |
| "learning_rate": 0.00015755149062597333, | |
| "loss": 1.2634, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.09494232614320906, | |
| "learning_rate": 0.00015483922587179386, | |
| "loss": 1.2794, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.39399624765478425, | |
| "grad_norm": 0.0938403596147772, | |
| "learning_rate": 0.00015206799247615037, | |
| "loss": 1.2668, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.40337711069418386, | |
| "grad_norm": 0.09298814066383618, | |
| "learning_rate": 0.00014924077034741924, | |
| "loss": 1.2704, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.41275797373358347, | |
| "grad_norm": 0.09381021867831099, | |
| "learning_rate": 0.0001463605995986836, | |
| "loss": 1.2743, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.42213883677298314, | |
| "grad_norm": 0.10478061496717873, | |
| "learning_rate": 0.000143430577278699, | |
| "loss": 1.3064, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.43151969981238275, | |
| "grad_norm": 0.0967412755428019, | |
| "learning_rate": 0.0001404538540416353, | |
| "loss": 1.2749, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.44090056285178236, | |
| "grad_norm": 0.10245748178767897, | |
| "learning_rate": 0.00013743363075917724, | |
| "loss": 1.2879, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.450281425891182, | |
| "grad_norm": 0.09494585437974697, | |
| "learning_rate": 0.00013437315507862566, | |
| "loss": 1.268, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4596622889305816, | |
| "grad_norm": 0.10070892164674464, | |
| "learning_rate": 0.0001312757179307012, | |
| "loss": 1.2741, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.46904315196998125, | |
| "grad_norm": 0.08590715074864536, | |
| "learning_rate": 0.000128144649990805, | |
| "loss": 1.28, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.47842401500938087, | |
| "grad_norm": 0.0921370657647381, | |
| "learning_rate": 0.00012498331809754243, | |
| "loss": 1.3099, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 0.09411198985391388, | |
| "learning_rate": 0.00012179512163235974, | |
| "loss": 1.2402, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4971857410881801, | |
| "grad_norm": 0.0955041592156138, | |
| "learning_rate": 0.0001185834888641883, | |
| "loss": 1.2637, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5065666041275797, | |
| "grad_norm": 0.08843739914811306, | |
| "learning_rate": 0.0001153518732630253, | |
| "loss": 1.2524, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5159474671669794, | |
| "grad_norm": 0.08894269736301577, | |
| "learning_rate": 0.00011210374978641631, | |
| "loss": 1.3058, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.525328330206379, | |
| "grad_norm": 0.09583708909351138, | |
| "learning_rate": 0.0001088426111428319, | |
| "loss": 1.2462, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5347091932457786, | |
| "grad_norm": 0.0934542252776399, | |
| "learning_rate": 0.00010557196403595688, | |
| "loss": 1.2641, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5440900562851783, | |
| "grad_norm": 0.08688918325528941, | |
| "learning_rate": 0.00010229532539393049, | |
| "loss": 1.2593, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5534709193245778, | |
| "grad_norm": 0.08796474911729359, | |
| "learning_rate": 9.901621858759203e-05, | |
| "loss": 1.2639, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5628517823639775, | |
| "grad_norm": 0.09427974834689387, | |
| "learning_rate": 9.57381696417989e-05, | |
| "loss": 1.257, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5722326454033771, | |
| "grad_norm": 0.08656751800419889, | |
| "learning_rate": 9.246470344389065e-05, | |
| "loss": 1.2545, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.5816135084427767, | |
| "grad_norm": 0.09133977950050422, | |
| "learning_rate": 8.919933995337624e-05, | |
| "loss": 1.2629, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5909943714821764, | |
| "grad_norm": 0.0881803403964572, | |
| "learning_rate": 8.594559041692003e-05, | |
| "loss": 1.2668, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.600375234521576, | |
| "grad_norm": 0.08194022763623, | |
| "learning_rate": 8.270695359269698e-05, | |
| "loss": 1.2426, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6097560975609756, | |
| "grad_norm": 0.08404336869271818, | |
| "learning_rate": 7.948691198817666e-05, | |
| "loss": 1.2608, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6191369606003753, | |
| "grad_norm": 0.0911226278784459, | |
| "learning_rate": 7.628892811538137e-05, | |
| "loss": 1.2446, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6285178236397748, | |
| "grad_norm": 0.0866722940291979, | |
| "learning_rate": 7.311644076764564e-05, | |
| "loss": 1.2661, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.6378986866791745, | |
| "grad_norm": 0.08492545007005736, | |
| "learning_rate": 6.997286132188057e-05, | |
| "loss": 1.2476, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6472795497185742, | |
| "grad_norm": 0.08729514750397552, | |
| "learning_rate": 6.68615700703186e-05, | |
| "loss": 1.2471, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.6566604127579737, | |
| "grad_norm": 0.08416435736232936, | |
| "learning_rate": 6.37859125856842e-05, | |
| "loss": 1.2773, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6660412757973734, | |
| "grad_norm": 0.09054925125258262, | |
| "learning_rate": 6.074919612369787e-05, | |
| "loss": 1.2854, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6754221388367729, | |
| "grad_norm": 0.0840484253491192, | |
| "learning_rate": 5.7754686066783045e-05, | |
| "loss": 1.2321, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6848030018761726, | |
| "grad_norm": 0.09091692153105359, | |
| "learning_rate": 5.4805602412798906e-05, | |
| "loss": 1.2519, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.6941838649155723, | |
| "grad_norm": 0.0996295825165971, | |
| "learning_rate": 5.1905116312575475e-05, | |
| "loss": 1.2422, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7035647279549718, | |
| "grad_norm": 0.09167775721865827, | |
| "learning_rate": 4.905634665997371e-05, | |
| "loss": 1.274, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.7129455909943715, | |
| "grad_norm": 0.09342003802996314, | |
| "learning_rate": 4.6262356738137937e-05, | |
| "loss": 1.2727, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7223264540337712, | |
| "grad_norm": 0.08770871963126985, | |
| "learning_rate": 4.3526150925546e-05, | |
| "loss": 1.2558, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 0.09069696254933435, | |
| "learning_rate": 4.0850671465400144e-05, | |
| "loss": 1.2593, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7410881801125704, | |
| "grad_norm": 0.08665209061432325, | |
| "learning_rate": 3.823879530183154e-05, | |
| "loss": 1.273, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.7504690431519699, | |
| "grad_norm": 0.08862136722443326, | |
| "learning_rate": 3.569333098632109e-05, | |
| "loss": 1.2613, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7598499061913696, | |
| "grad_norm": 0.08931630016078942, | |
| "learning_rate": 3.3217015657663145e-05, | |
| "loss": 1.255, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.08603421106367509, | |
| "learning_rate": 3.081251209871872e-05, | |
| "loss": 1.2956, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7786116322701688, | |
| "grad_norm": 0.08449087840242356, | |
| "learning_rate": 2.848240587312433e-05, | |
| "loss": 1.2554, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.7879924953095685, | |
| "grad_norm": 0.09050119999554507, | |
| "learning_rate": 2.622920254503416e-05, | |
| "loss": 1.2555, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.797373358348968, | |
| "grad_norm": 0.08603303013311027, | |
| "learning_rate": 2.405532498488612e-05, | |
| "loss": 1.2445, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.8067542213883677, | |
| "grad_norm": 0.0917770059937159, | |
| "learning_rate": 2.196311076408808e-05, | |
| "loss": 1.2499, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8161350844277674, | |
| "grad_norm": 0.09260597797294362, | |
| "learning_rate": 1.995480964142663e-05, | |
| "loss": 1.2643, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.8255159474671669, | |
| "grad_norm": 0.08638741822571576, | |
| "learning_rate": 1.803258114390034e-05, | |
| "loss": 1.2381, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8348968105065666, | |
| "grad_norm": 0.08438081141056018, | |
| "learning_rate": 1.6198492244579722e-05, | |
| "loss": 1.2422, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.8442776735459663, | |
| "grad_norm": 0.08400750233223092, | |
| "learning_rate": 1.445451513999012e-05, | |
| "loss": 1.2178, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8536585365853658, | |
| "grad_norm": 0.08554689021587018, | |
| "learning_rate": 1.2802525129408038e-05, | |
| "loss": 1.2406, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8630393996247655, | |
| "grad_norm": 0.08737158991700927, | |
| "learning_rate": 1.1244298598351077e-05, | |
| "loss": 1.2434, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8724202626641651, | |
| "grad_norm": 0.08730413757640722, | |
| "learning_rate": 9.781511108429909e-06, | |
| "loss": 1.2653, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.8818011257035647, | |
| "grad_norm": 0.0881674821245138, | |
| "learning_rate": 8.415735595616203e-06, | |
| "loss": 1.2524, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8911819887429644, | |
| "grad_norm": 0.09353121525052323, | |
| "learning_rate": 7.1484406788639215e-06, | |
| "loss": 1.2523, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.900562851782364, | |
| "grad_norm": 0.08427560236702278, | |
| "learning_rate": 5.980989080902721e-06, | |
| "loss": 1.2812, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9099437148217636, | |
| "grad_norm": 0.08684819671006813, | |
| "learning_rate": 4.914636162901798e-06, | |
| "loss": 1.2549, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.9193245778611632, | |
| "grad_norm": 0.08436176928753658, | |
| "learning_rate": 3.950528574579415e-06, | |
| "loss": 1.2724, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9287054409005628, | |
| "grad_norm": 0.0881012431213454, | |
| "learning_rate": 3.089703021210033e-06, | |
| "loss": 1.2375, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.9380863039399625, | |
| "grad_norm": 0.10177275096404065, | |
| "learning_rate": 2.333085148854708e-06, | |
| "loss": 1.2587, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9474671669793621, | |
| "grad_norm": 0.08489096282767854, | |
| "learning_rate": 1.6814885490135102e-06, | |
| "loss": 1.2745, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.9568480300187617, | |
| "grad_norm": 0.08633339614238927, | |
| "learning_rate": 1.1356138837702702e-06, | |
| "loss": 1.2495, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9662288930581614, | |
| "grad_norm": 0.0865458432922912, | |
| "learning_rate": 6.960481323703638e-07, | |
| "loss": 1.2604, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 0.0850043491504774, | |
| "learning_rate": 3.632639600416932e-07, | |
| "loss": 1.2568, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9849906191369606, | |
| "grad_norm": 0.08694509913799078, | |
| "learning_rate": 1.376192097375495e-07, | |
| "loss": 1.2397, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.9943714821763602, | |
| "grad_norm": 0.08603902256571394, | |
| "learning_rate": 1.9356517347990378e-08, | |
| "loss": 1.2735, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.1848469972610474, | |
| "eval_runtime": 1561.8445, | |
| "eval_samples_per_second": 8.566, | |
| "eval_steps_per_second": 0.536, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 533, | |
| "total_flos": 6701415728676864.0, | |
| "train_loss": 1.2950908849208038, | |
| "train_runtime": 12657.6418, | |
| "train_samples_per_second": 2.694, | |
| "train_steps_per_second": 0.042 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 533, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6701415728676864.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |