| { | |
| "best_metric": 1.1137409210205078, | |
| "best_model_checkpoint": "./trained-age/checkpoint-5000", | |
| "epoch": 1.1066027296200664, | |
| "eval_steps": 1000, | |
| "global_step": 6000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.000199907783105865, | |
| "loss": 2.1543, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019981556621172998, | |
| "loss": 2.0322, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000199723349317595, | |
| "loss": 1.9709, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019963113242345997, | |
| "loss": 1.9224, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019953891552932499, | |
| "loss": 1.9126, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019944669863518997, | |
| "loss": 1.9034, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019935448174105498, | |
| "loss": 1.8643, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019926226484691997, | |
| "loss": 1.7807, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019917004795278495, | |
| "loss": 1.7977, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019907783105864993, | |
| "loss": 1.9388, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019898561416451495, | |
| "loss": 1.9642, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019889339727037993, | |
| "loss": 1.9164, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019880118037624494, | |
| "loss": 1.8912, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019870896348210993, | |
| "loss": 1.7802, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019861674658797494, | |
| "loss": 1.7922, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019852452969383992, | |
| "loss": 1.8446, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001984323127997049, | |
| "loss": 1.8217, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001983400959055699, | |
| "loss": 1.8406, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001982478790114349, | |
| "loss": 1.8379, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001981556621172999, | |
| "loss": 1.7822, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001980634452231649, | |
| "loss": 1.8287, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019797122832902988, | |
| "loss": 1.8191, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001978790114348949, | |
| "loss": 1.7958, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019778679454075988, | |
| "loss": 1.7708, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019769457764662486, | |
| "loss": 1.7059, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019760236075248985, | |
| "loss": 1.7921, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019751014385835486, | |
| "loss": 1.8458, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019741792696421984, | |
| "loss": 1.7092, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019732571007008486, | |
| "loss": 1.7765, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019723349317594984, | |
| "loss": 1.7186, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019714127628181485, | |
| "loss": 1.7108, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019704905938767984, | |
| "loss": 1.5981, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019695684249354482, | |
| "loss": 1.7238, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001968646255994098, | |
| "loss": 1.677, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019677240870527482, | |
| "loss": 1.6465, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001966801918111398, | |
| "loss": 1.6533, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001965879749170048, | |
| "loss": 1.5514, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001964957580228698, | |
| "loss": 1.5717, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001964035411287348, | |
| "loss": 1.6208, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001963113242345998, | |
| "loss": 1.7294, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019621910734046478, | |
| "loss": 1.6014, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019612689044632976, | |
| "loss": 1.7207, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019603467355219477, | |
| "loss": 1.6452, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019594245665805976, | |
| "loss": 1.5979, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019585023976392477, | |
| "loss": 1.5809, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019575802286978975, | |
| "loss": 1.5626, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019566580597565476, | |
| "loss": 1.4701, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019557358908151975, | |
| "loss": 1.6025, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019548137218738473, | |
| "loss": 1.4946, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019538915529324972, | |
| "loss": 1.6431, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019529693839911473, | |
| "loss": 1.5584, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019520472150497971, | |
| "loss": 1.6565, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019511250461084473, | |
| "loss": 1.5616, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001950202877167097, | |
| "loss": 1.5366, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019492807082257472, | |
| "loss": 1.6421, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001948358539284397, | |
| "loss": 1.5919, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001947436370343047, | |
| "loss": 1.594, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019465142014016967, | |
| "loss": 1.5947, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019455920324603469, | |
| "loss": 1.612, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019446698635189967, | |
| "loss": 1.4975, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019437476945776468, | |
| "loss": 1.5175, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019428255256362967, | |
| "loss": 1.3869, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019419033566949468, | |
| "loss": 1.494, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019409811877535966, | |
| "loss": 1.557, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019400590188122465, | |
| "loss": 1.4168, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019391368498708963, | |
| "loss": 1.4237, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019382146809295462, | |
| "loss": 1.4369, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019372925119881963, | |
| "loss": 1.399, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019363703430468464, | |
| "loss": 1.4145, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019354481741054962, | |
| "loss": 1.5005, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001934526005164146, | |
| "loss": 1.5179, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019336038362227962, | |
| "loss": 1.4201, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001932681667281446, | |
| "loss": 1.5546, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001931759498340096, | |
| "loss": 1.5961, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019308373293987457, | |
| "loss": 1.4922, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019299151604573958, | |
| "loss": 1.4039, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001928992991516046, | |
| "loss": 1.4541, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019280708225746958, | |
| "loss": 1.4385, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019271486536333456, | |
| "loss": 1.4652, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019262264846919958, | |
| "loss": 1.4903, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019253043157506456, | |
| "loss": 1.5028, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019243821468092954, | |
| "loss": 1.4647, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019234599778679453, | |
| "loss": 1.4421, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019225378089265954, | |
| "loss": 1.483, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019216156399852455, | |
| "loss": 1.5329, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019206934710438954, | |
| "loss": 1.4918, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019197713021025452, | |
| "loss": 1.4519, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019188491331611953, | |
| "loss": 1.2952, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019179269642198452, | |
| "loss": 1.4764, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001917004795278495, | |
| "loss": 1.419, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00019160826263371449, | |
| "loss": 1.4126, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001915160457395795, | |
| "loss": 1.4495, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001914238288454445, | |
| "loss": 1.4355, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001913316119513095, | |
| "loss": 1.3425, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019123939505717448, | |
| "loss": 1.3806, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001911471781630395, | |
| "loss": 1.3833, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019105496126890447, | |
| "loss": 1.4533, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019096274437476946, | |
| "loss": 1.4369, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019087052748063444, | |
| "loss": 1.4233, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019077831058649945, | |
| "loss": 1.3347, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.4296147526017893, | |
| "eval_loss": 1.3819409608840942, | |
| "eval_runtime": 98.0284, | |
| "eval_samples_per_second": 111.743, | |
| "eval_steps_per_second": 13.976, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019068609369236446, | |
| "loss": 1.3295, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019059387679822945, | |
| "loss": 1.4622, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019050165990409443, | |
| "loss": 1.3483, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019040944300995944, | |
| "loss": 1.4762, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019031722611582443, | |
| "loss": 1.462, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00019022500922168941, | |
| "loss": 1.5086, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001901327923275544, | |
| "loss": 1.4992, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001900405754334194, | |
| "loss": 1.3563, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018994835853928442, | |
| "loss": 1.3295, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001898561416451494, | |
| "loss": 1.3227, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001897639247510144, | |
| "loss": 1.4465, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001896717078568794, | |
| "loss": 1.4442, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00018957949096274439, | |
| "loss": 1.3804, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00018948727406860937, | |
| "loss": 1.3868, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00018939505717447435, | |
| "loss": 1.4905, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00018930284028033937, | |
| "loss": 1.3099, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00018921062338620438, | |
| "loss": 1.4865, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00018911840649206936, | |
| "loss": 1.4155, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00018902618959793435, | |
| "loss": 1.4095, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00018893397270379936, | |
| "loss": 1.3217, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00018884175580966434, | |
| "loss": 1.5028, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018874953891552933, | |
| "loss": 1.4024, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001886573220213943, | |
| "loss": 1.4699, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018856510512725932, | |
| "loss": 1.4158, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018847288823312433, | |
| "loss": 1.4024, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018838067133898932, | |
| "loss": 1.4017, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001882884544448543, | |
| "loss": 1.3641, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001881962375507193, | |
| "loss": 1.4008, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001881040206565843, | |
| "loss": 1.3428, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018801180376244928, | |
| "loss": 1.3389, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018791958686831427, | |
| "loss": 1.3816, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018782736997417928, | |
| "loss": 1.3573, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001877351530800443, | |
| "loss": 1.3648, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018764293618590928, | |
| "loss": 1.4559, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018755071929177426, | |
| "loss": 1.3925, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018745850239763924, | |
| "loss": 1.3426, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018736628550350426, | |
| "loss": 1.3808, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018727406860936924, | |
| "loss": 1.4028, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018718185171523422, | |
| "loss": 1.4092, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018708963482109924, | |
| "loss": 1.3941, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018699741792696425, | |
| "loss": 1.3246, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018690520103282923, | |
| "loss": 1.3828, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018681298413869422, | |
| "loss": 1.3388, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001867207672445592, | |
| "loss": 1.3951, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001866285503504242, | |
| "loss": 1.3484, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001865363334562892, | |
| "loss": 1.5975, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00018644411656215418, | |
| "loss": 1.4879, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001863518996680192, | |
| "loss": 1.3751, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001862596827738842, | |
| "loss": 1.3561, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0001861674658797492, | |
| "loss": 1.3283, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018607524898561417, | |
| "loss": 1.4457, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018598303209147916, | |
| "loss": 1.4338, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018589081519734417, | |
| "loss": 1.313, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018579859830320915, | |
| "loss": 1.4286, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00018570638140907414, | |
| "loss": 1.3152, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00018561416451493915, | |
| "loss": 1.3988, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00018552194762080416, | |
| "loss": 1.3082, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00018542973072666915, | |
| "loss": 1.2954, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00018533751383253413, | |
| "loss": 1.2728, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00018524529693839911, | |
| "loss": 1.3098, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00018515308004426413, | |
| "loss": 1.3343, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001850608631501291, | |
| "loss": 1.3702, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001849686462559941, | |
| "loss": 1.3413, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001848764293618591, | |
| "loss": 1.3541, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00018478421246772412, | |
| "loss": 1.3882, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0001846919955735891, | |
| "loss": 1.338, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00018459977867945409, | |
| "loss": 1.3311, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00018450756178531907, | |
| "loss": 1.3407, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00018441534489118408, | |
| "loss": 1.4169, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00018432312799704907, | |
| "loss": 1.2783, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00018423091110291405, | |
| "loss": 1.3291, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00018413869420877906, | |
| "loss": 1.4276, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00018404647731464407, | |
| "loss": 1.4504, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00018395426042050906, | |
| "loss": 1.356, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00018386204352637404, | |
| "loss": 1.3567, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00018376982663223903, | |
| "loss": 1.2416, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00018367760973810404, | |
| "loss": 1.2971, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00018358539284396902, | |
| "loss": 1.2429, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.000183493175949834, | |
| "loss": 1.3004, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00018340095905569902, | |
| "loss": 1.3765, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00018330874216156403, | |
| "loss": 1.4921, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00018321652526742901, | |
| "loss": 1.3344, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.000183124308373294, | |
| "loss": 1.4283, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00018303209147915898, | |
| "loss": 1.2761, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.000182939874585024, | |
| "loss": 1.3461, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00018284765769088898, | |
| "loss": 1.2124, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00018275544079675396, | |
| "loss": 1.3249, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00018266322390261898, | |
| "loss": 1.3656, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.000182571007008484, | |
| "loss": 1.3039, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00018247879011434897, | |
| "loss": 1.3722, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00018238657322021396, | |
| "loss": 1.1888, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00018229435632607894, | |
| "loss": 1.3604, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00018220213943194392, | |
| "loss": 1.4169, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00018210992253780894, | |
| "loss": 1.2332, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00018201770564367392, | |
| "loss": 1.2774, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00018192548874953893, | |
| "loss": 1.4248, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00018183327185540392, | |
| "loss": 1.3928, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018174105496126893, | |
| "loss": 1.2566, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001816488380671339, | |
| "loss": 1.2803, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001815566211729989, | |
| "loss": 1.3071, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.464213985758627, | |
| "eval_loss": 1.279906988143921, | |
| "eval_runtime": 97.8688, | |
| "eval_samples_per_second": 111.925, | |
| "eval_steps_per_second": 13.998, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018146440427886388, | |
| "loss": 1.3159, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001813721873847289, | |
| "loss": 1.4002, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018127997049059388, | |
| "loss": 1.2217, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001811877535964589, | |
| "loss": 1.2869, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018109553670232387, | |
| "loss": 1.2793, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018100331980818888, | |
| "loss": 1.3284, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018091110291405387, | |
| "loss": 1.1991, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018081888601991885, | |
| "loss": 1.1353, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018072666912578384, | |
| "loss": 1.3133, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018063445223164885, | |
| "loss": 1.3075, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018054223533751383, | |
| "loss": 1.4026, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018045001844337885, | |
| "loss": 1.3367, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018035780154924383, | |
| "loss": 1.3156, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018026558465510884, | |
| "loss": 1.2022, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018017336776097383, | |
| "loss": 1.3409, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001800811508668388, | |
| "loss": 1.2041, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001799889339727038, | |
| "loss": 1.3062, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001798967170785688, | |
| "loss": 1.2372, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001798045001844338, | |
| "loss": 1.2237, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001797122832902988, | |
| "loss": 1.2524, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017962006639616379, | |
| "loss": 1.4144, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001795278495020288, | |
| "loss": 1.1676, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017943563260789378, | |
| "loss": 1.2909, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017934341571375877, | |
| "loss": 1.372, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017925119881962375, | |
| "loss": 1.2902, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017915898192548876, | |
| "loss": 1.2131, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017906676503135375, | |
| "loss": 1.198, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017897454813721876, | |
| "loss": 1.2151, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017888233124308374, | |
| "loss": 1.3141, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017879011434894875, | |
| "loss": 1.3788, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017869789745481374, | |
| "loss": 1.2766, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017860568056067872, | |
| "loss": 1.2166, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001785134636665437, | |
| "loss": 1.3074, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017842124677240872, | |
| "loss": 1.2592, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001783290298782737, | |
| "loss": 1.2932, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00017823681298413871, | |
| "loss": 1.2729, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001781445960900037, | |
| "loss": 1.256, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001780523791958687, | |
| "loss": 1.284, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001779601623017337, | |
| "loss": 1.2473, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00017786794540759868, | |
| "loss": 1.3336, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00017777572851346366, | |
| "loss": 1.3713, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00017768351161932868, | |
| "loss": 1.3888, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00017759129472519366, | |
| "loss": 1.2189, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00017749907783105864, | |
| "loss": 1.3783, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00017740686093692366, | |
| "loss": 1.2576, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00017731464404278867, | |
| "loss": 1.2524, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00017722242714865365, | |
| "loss": 1.1614, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00017713021025451864, | |
| "loss": 1.2631, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00017703799336038362, | |
| "loss": 1.2215, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00017694577646624863, | |
| "loss": 1.266, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00017685355957211362, | |
| "loss": 1.1883, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001767613426779786, | |
| "loss": 1.2202, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001766691257838436, | |
| "loss": 1.238, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001765769088897086, | |
| "loss": 1.2836, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001764846919955736, | |
| "loss": 1.2879, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001763924751014386, | |
| "loss": 1.17, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00017630025820730358, | |
| "loss": 1.1675, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00017620804131316856, | |
| "loss": 1.1862, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00017611582441903357, | |
| "loss": 1.2787, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00017602360752489856, | |
| "loss": 1.1561, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00017593139063076357, | |
| "loss": 1.3317, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00017583917373662855, | |
| "loss": 1.2562, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00017574695684249356, | |
| "loss": 1.3731, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00017565473994835855, | |
| "loss": 1.304, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00017556252305422353, | |
| "loss": 1.2217, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00017547030616008852, | |
| "loss": 1.2968, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00017537808926595353, | |
| "loss": 1.2932, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00017528587237181851, | |
| "loss": 1.3466, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00017519365547768353, | |
| "loss": 1.272, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001751014385835485, | |
| "loss": 1.1447, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00017500922168941352, | |
| "loss": 1.2865, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001749170047952785, | |
| "loss": 1.287, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001748247879011435, | |
| "loss": 1.2388, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00017473257100700847, | |
| "loss": 1.2865, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00017464035411287349, | |
| "loss": 1.2697, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00017454813721873847, | |
| "loss": 1.2994, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00017445592032460348, | |
| "loss": 1.3243, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00017436370343046847, | |
| "loss": 1.1788, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00017427148653633348, | |
| "loss": 1.3052, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00017417926964219846, | |
| "loss": 1.2467, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00017408705274806345, | |
| "loss": 1.2646, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00017399483585392843, | |
| "loss": 1.3098, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00017390261895979344, | |
| "loss": 1.3249, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00017381040206565843, | |
| "loss": 1.3779, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00017371818517152344, | |
| "loss": 1.3639, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00017362596827738842, | |
| "loss": 1.2494, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00017353375138325343, | |
| "loss": 1.2869, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00017344153448911842, | |
| "loss": 1.2766, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0001733493175949834, | |
| "loss": 1.1126, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0001732571007008484, | |
| "loss": 1.1705, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001731648838067134, | |
| "loss": 1.2194, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00017307266691257838, | |
| "loss": 1.2963, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001729804500184434, | |
| "loss": 1.2211, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00017288823312430838, | |
| "loss": 1.1875, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001727960162301734, | |
| "loss": 1.3088, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00017270379933603838, | |
| "loss": 1.3143, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00017261158244190336, | |
| "loss": 1.2684, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00017251936554776834, | |
| "loss": 1.1308, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00017242714865363336, | |
| "loss": 1.2508, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00017233493175949834, | |
| "loss": 1.297, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.4720649990870915, | |
| "eval_loss": 1.250298261642456, | |
| "eval_runtime": 98.4328, | |
| "eval_samples_per_second": 111.284, | |
| "eval_steps_per_second": 13.918, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00017224271486536335, | |
| "loss": 1.2519, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00017215049797122834, | |
| "loss": 1.1632, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00017205828107709335, | |
| "loss": 1.2063, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00017196606418295833, | |
| "loss": 1.2205, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00017187384728882332, | |
| "loss": 1.2787, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001717816303946883, | |
| "loss": 1.1501, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001716894135005533, | |
| "loss": 1.4336, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001715971966064183, | |
| "loss": 1.228, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001715049797122833, | |
| "loss": 1.2311, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001714127628181483, | |
| "loss": 1.2151, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001713205459240133, | |
| "loss": 1.324, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001712283290298783, | |
| "loss": 1.2236, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017113611213574327, | |
| "loss": 1.1156, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017104389524160826, | |
| "loss": 1.1789, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017095167834747327, | |
| "loss": 1.1482, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017085946145333825, | |
| "loss": 1.2907, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017076724455920326, | |
| "loss": 1.255, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017067502766506825, | |
| "loss": 1.1669, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017058281077093323, | |
| "loss": 1.2531, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017049059387679825, | |
| "loss": 1.3122, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017039837698266323, | |
| "loss": 1.2077, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017030616008852821, | |
| "loss": 1.2112, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001702139431943932, | |
| "loss": 1.2232, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001701217263002582, | |
| "loss": 1.2612, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00017002950940612322, | |
| "loss": 1.2819, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001699372925119882, | |
| "loss": 1.3206, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001698450756178532, | |
| "loss": 1.1989, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001697528587237182, | |
| "loss": 1.204, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00016966064182958319, | |
| "loss": 1.1277, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00016956842493544817, | |
| "loss": 1.1263, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00016947620804131316, | |
| "loss": 1.1812, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00016938399114717817, | |
| "loss": 1.2092, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00016929177425304318, | |
| "loss": 1.1473, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00016919955735890816, | |
| "loss": 1.3631, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00016910734046477315, | |
| "loss": 1.2097, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00016901512357063816, | |
| "loss": 1.1183, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00016892290667650314, | |
| "loss": 1.1925, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00016883068978236813, | |
| "loss": 1.2737, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001687384728882331, | |
| "loss": 1.0825, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00016864625599409812, | |
| "loss": 1.2121, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00016855403909996313, | |
| "loss": 1.167, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00016846182220582812, | |
| "loss": 1.3436, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001683696053116931, | |
| "loss": 1.1877, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00016827738841755812, | |
| "loss": 1.2584, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0001681851715234231, | |
| "loss": 1.3245, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00016809295462928808, | |
| "loss": 1.2784, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00016800073773515307, | |
| "loss": 1.0824, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00016790852084101808, | |
| "loss": 1.1694, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0001678163039468831, | |
| "loss": 1.1642, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016772408705274808, | |
| "loss": 1.1797, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016763187015861306, | |
| "loss": 1.2161, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016753965326447807, | |
| "loss": 1.2212, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016744743637034306, | |
| "loss": 1.0782, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016735521947620804, | |
| "loss": 1.2315, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016726300258207302, | |
| "loss": 1.1335, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016717078568793804, | |
| "loss": 1.1268, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016707856879380305, | |
| "loss": 1.3353, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016698635189966803, | |
| "loss": 1.2586, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016689413500553302, | |
| "loss": 1.3272, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016680191811139803, | |
| "loss": 1.1169, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.000166709701217263, | |
| "loss": 1.1928, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.000166617484323128, | |
| "loss": 1.0965, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00016652526742899298, | |
| "loss": 1.2023, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.000166433050534858, | |
| "loss": 1.239, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.000166340833640723, | |
| "loss": 1.2667, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.000166248616746588, | |
| "loss": 1.3098, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00016615639985245297, | |
| "loss": 1.1174, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00016606418295831798, | |
| "loss": 1.1965, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00016597196606418297, | |
| "loss": 1.1481, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00016587974917004795, | |
| "loss": 1.1209, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00016578753227591294, | |
| "loss": 1.1346, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00016569531538177795, | |
| "loss": 1.2816, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00016560309848764296, | |
| "loss": 1.1948, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00016551088159350795, | |
| "loss": 1.1452, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00016541866469937293, | |
| "loss": 1.1751, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00016532644780523794, | |
| "loss": 1.212, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00016523423091110293, | |
| "loss": 1.1469, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001651420140169679, | |
| "loss": 1.2593, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001650497971228329, | |
| "loss": 1.1924, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001649575802286979, | |
| "loss": 1.1856, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00016486536333456292, | |
| "loss": 1.2306, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001647731464404279, | |
| "loss": 1.2943, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00016468092954629289, | |
| "loss": 1.1842, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00016458871265215787, | |
| "loss": 1.322, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00016449649575802288, | |
| "loss": 1.1888, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00016440427886388787, | |
| "loss": 1.1909, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00016431206196975285, | |
| "loss": 1.2228, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00016421984507561786, | |
| "loss": 1.1169, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00016412762818148287, | |
| "loss": 1.2129, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00016403541128734786, | |
| "loss": 1.2108, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00016394319439321284, | |
| "loss": 1.3192, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00016385097749907783, | |
| "loss": 1.11, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00016375876060494284, | |
| "loss": 1.1571, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00016366654371080782, | |
| "loss": 1.2533, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0001635743268166728, | |
| "loss": 1.0954, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00016348210992253782, | |
| "loss": 1.1373, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00016338989302840283, | |
| "loss": 1.1435, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00016329767613426782, | |
| "loss": 1.1704, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001632054592401328, | |
| "loss": 1.0662, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00016311324234599778, | |
| "loss": 1.3121, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.49945225488406064, | |
| "eval_loss": 1.1661431789398193, | |
| "eval_runtime": 98.5644, | |
| "eval_samples_per_second": 111.135, | |
| "eval_steps_per_second": 13.9, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001630210254518628, | |
| "loss": 1.2862, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00016292880855772778, | |
| "loss": 1.2184, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00016283659166359276, | |
| "loss": 1.1609, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00016274437476945778, | |
| "loss": 1.1033, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0001626521578753228, | |
| "loss": 1.1446, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00016255994098118777, | |
| "loss": 1.2345, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00016246772408705276, | |
| "loss": 1.1732, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00016237550719291774, | |
| "loss": 1.2738, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00016228329029878275, | |
| "loss": 1.2372, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00016219107340464774, | |
| "loss": 1.142, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00016209885651051272, | |
| "loss": 1.277, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00016200663961637773, | |
| "loss": 1.1757, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00016191442272224274, | |
| "loss": 1.239, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00016182220582810773, | |
| "loss": 1.1598, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001617299889339727, | |
| "loss": 1.2175, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001616377720398377, | |
| "loss": 1.1578, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001615455551457027, | |
| "loss": 1.2052, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001614533382515677, | |
| "loss": 1.1283, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00016136112135743268, | |
| "loss": 1.233, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001612689044632977, | |
| "loss": 1.1148, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0001611766875691627, | |
| "loss": 1.1895, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00016108447067502768, | |
| "loss": 1.3549, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00016099225378089267, | |
| "loss": 1.2073, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00016090003688675765, | |
| "loss": 1.194, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00016080781999262267, | |
| "loss": 1.2183, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016071560309848765, | |
| "loss": 1.2088, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016062338620435263, | |
| "loss": 1.3126, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016053116931021765, | |
| "loss": 1.2154, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016043895241608266, | |
| "loss": 1.1869, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016034673552194764, | |
| "loss": 1.1124, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016025451862781263, | |
| "loss": 1.1789, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001601623017336776, | |
| "loss": 1.1254, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00016007008483954262, | |
| "loss": 1.3232, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001599778679454076, | |
| "loss": 1.1419, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001598856510512726, | |
| "loss": 1.2861, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001597934341571376, | |
| "loss": 1.1492, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0001597012172630026, | |
| "loss": 1.2304, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0001596090003688676, | |
| "loss": 1.2289, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00015951678347473258, | |
| "loss": 1.1479, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00015942456658059757, | |
| "loss": 1.222, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00015933234968646258, | |
| "loss": 1.105, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00015924013279232756, | |
| "loss": 1.1094, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00015914791589819255, | |
| "loss": 1.0859, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00015905569900405756, | |
| "loss": 1.2148, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00015896348210992254, | |
| "loss": 1.2015, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00015887126521578755, | |
| "loss": 1.0869, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00015877904832165254, | |
| "loss": 1.2852, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00015868683142751752, | |
| "loss": 1.14, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0001585946145333825, | |
| "loss": 1.2118, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00015850239763924752, | |
| "loss": 1.1661, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0001584101807451125, | |
| "loss": 1.07, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00015831796385097752, | |
| "loss": 1.1214, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0001582257469568425, | |
| "loss": 1.285, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0001581335300627075, | |
| "loss": 1.1742, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0001580413131685725, | |
| "loss": 1.1425, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00015794909627443748, | |
| "loss": 1.0878, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00015785687938030246, | |
| "loss": 1.2311, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00015776466248616748, | |
| "loss": 1.1238, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00015767244559203246, | |
| "loss": 1.0743, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00015758022869789747, | |
| "loss": 1.0789, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00015748801180376246, | |
| "loss": 1.1919, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00015739579490962747, | |
| "loss": 1.1697, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00015730357801549245, | |
| "loss": 1.2384, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00015721136112135744, | |
| "loss": 1.1858, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00015711914422722242, | |
| "loss": 1.1766, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00015702692733308743, | |
| "loss": 1.0852, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00015693471043895242, | |
| "loss": 1.1499, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00015684249354481743, | |
| "loss": 1.2804, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0001567502766506824, | |
| "loss": 1.2195, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00015665805975654742, | |
| "loss": 1.1809, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0001565658428624124, | |
| "loss": 1.2306, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0001564736259682774, | |
| "loss": 1.1516, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00015638140907414238, | |
| "loss": 1.1354, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0001562891921800074, | |
| "loss": 1.1047, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00015619697528587237, | |
| "loss": 1.1401, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00015610475839173736, | |
| "loss": 1.1011, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00015601254149760237, | |
| "loss": 1.0627, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00015592032460346738, | |
| "loss": 1.2008, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00015582810770933237, | |
| "loss": 1.0876, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00015573589081519735, | |
| "loss": 1.2179, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00015564367392106233, | |
| "loss": 1.1274, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00015555145702692735, | |
| "loss": 1.2143, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00015545924013279233, | |
| "loss": 1.264, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00015536702323865731, | |
| "loss": 1.0355, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00015527480634452233, | |
| "loss": 1.1883, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00015518258945038734, | |
| "loss": 1.151, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00015509037255625232, | |
| "loss": 1.1937, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0001549981556621173, | |
| "loss": 1.1426, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0001549059387679823, | |
| "loss": 1.1356, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0001548137218738473, | |
| "loss": 1.0953, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0001547215049797123, | |
| "loss": 1.2168, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00015462928808557727, | |
| "loss": 1.2641, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00015453707119144228, | |
| "loss": 0.9988, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0001544448542973073, | |
| "loss": 1.2423, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00015435263740317228, | |
| "loss": 1.1201, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00015426042050903726, | |
| "loss": 1.1261, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00015416820361490225, | |
| "loss": 1.1374, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00015407598672076726, | |
| "loss": 0.9945, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00015398376982663224, | |
| "loss": 1.1362, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00015389155293249723, | |
| "loss": 1.1806, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.5240094942486763, | |
| "eval_loss": 1.1137409210205078, | |
| "eval_runtime": 98.12, | |
| "eval_samples_per_second": 111.639, | |
| "eval_steps_per_second": 13.963, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00015379933603836224, | |
| "loss": 1.1386, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00015370711914422725, | |
| "loss": 1.129, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00015361490225009223, | |
| "loss": 1.1191, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00015352268535595722, | |
| "loss": 1.2112, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0001534304684618222, | |
| "loss": 1.0587, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0001533382515676872, | |
| "loss": 1.2067, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0001532460346735522, | |
| "loss": 1.1811, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00015315381777941718, | |
| "loss": 1.1983, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0001530616008852822, | |
| "loss": 1.2142, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00015296938399114718, | |
| "loss": 1.1877, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0001528771670970122, | |
| "loss": 1.1208, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00015278495020287718, | |
| "loss": 1.2037, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00015269273330874216, | |
| "loss": 1.0518, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00015260051641460714, | |
| "loss": 1.1385, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00015250829952047216, | |
| "loss": 1.2212, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00015241608262633714, | |
| "loss": 1.2147, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00015232386573220215, | |
| "loss": 1.0961, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00015223164883806714, | |
| "loss": 1.1507, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00015213943194393215, | |
| "loss": 1.2598, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00015204721504979713, | |
| "loss": 1.1551, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00015195499815566212, | |
| "loss": 1.3621, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0001518627812615271, | |
| "loss": 1.1444, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0001517705643673921, | |
| "loss": 1.1967, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001516783474732571, | |
| "loss": 1.1623, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001515861305791221, | |
| "loss": 1.1205, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001514939136849871, | |
| "loss": 1.0907, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001514016967908521, | |
| "loss": 1.0634, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001513094798967171, | |
| "loss": 1.1764, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00015121726300258207, | |
| "loss": 1.169, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00015112504610844706, | |
| "loss": 1.0883, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00015103282921431207, | |
| "loss": 1.2949, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00015094061232017705, | |
| "loss": 1.1286, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00015084839542604207, | |
| "loss": 1.08, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00015075617853190705, | |
| "loss": 1.146, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00015066396163777206, | |
| "loss": 1.2019, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00015057174474363705, | |
| "loss": 1.1424, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00015047952784950203, | |
| "loss": 1.114, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00015038731095536701, | |
| "loss": 1.1133, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00015029509406123203, | |
| "loss": 1.1234, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000150202877167097, | |
| "loss": 1.2356, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00015011066027296202, | |
| "loss": 1.2222, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000150018443378827, | |
| "loss": 1.1921, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00014992622648469202, | |
| "loss": 1.1302, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000149834009590557, | |
| "loss": 1.1424, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.000149741792696422, | |
| "loss": 1.1571, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00014964957580228697, | |
| "loss": 1.0979, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00014955735890815198, | |
| "loss": 1.0564, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00014946514201401697, | |
| "loss": 1.007, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00014937292511988198, | |
| "loss": 1.2216, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00014928070822574696, | |
| "loss": 0.9655, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00014918849133161197, | |
| "loss": 0.9971, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00014909627443747696, | |
| "loss": 1.0476, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00014900405754334194, | |
| "loss": 1.0946, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00014891184064920693, | |
| "loss": 1.128, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00014881962375507194, | |
| "loss": 1.124, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00014872740686093692, | |
| "loss": 1.0277, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00014863518996680193, | |
| "loss": 1.2334, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00014854297307266692, | |
| "loss": 0.9921, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00014845075617853193, | |
| "loss": 1.0135, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00014835853928439692, | |
| "loss": 1.0203, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.0001482663223902619, | |
| "loss": 1.0778, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00014817410549612688, | |
| "loss": 1.0985, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0001480818886019919, | |
| "loss": 1.1, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00014798967170785688, | |
| "loss": 1.1413, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0001478974548137219, | |
| "loss": 1.0165, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00014780523791958688, | |
| "loss": 1.0329, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0001477130210254519, | |
| "loss": 1.0341, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00014762080413131687, | |
| "loss": 0.9721, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00014752858723718186, | |
| "loss": 1.1558, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00014743637034304684, | |
| "loss": 1.0964, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00014734415344891183, | |
| "loss": 0.9984, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00014725193655477684, | |
| "loss": 1.1214, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00014715971966064185, | |
| "loss": 1.0954, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00014706750276650683, | |
| "loss": 1.0381, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00014697528587237182, | |
| "loss": 1.0516, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00014688306897823683, | |
| "loss": 1.1157, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.0001467908520841018, | |
| "loss": 0.9874, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0001466986351899668, | |
| "loss": 1.0634, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00014660641829583178, | |
| "loss": 1.1058, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0001465142014016968, | |
| "loss": 1.0966, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0001464219845075618, | |
| "loss": 1.0457, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0001463297676134268, | |
| "loss": 1.1258, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00014623755071929177, | |
| "loss": 1.1258, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00014614533382515679, | |
| "loss": 1.2046, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00014605311693102177, | |
| "loss": 1.0688, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00014596090003688675, | |
| "loss": 0.9497, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00014586868314275174, | |
| "loss": 1.1829, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00014577646624861675, | |
| "loss": 1.1155, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00014568424935448176, | |
| "loss": 1.1273, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00014559203246034675, | |
| "loss": 1.011, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00014549981556621173, | |
| "loss": 1.081, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00014540759867207674, | |
| "loss": 1.041, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00014531538177794173, | |
| "loss": 1.1286, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0001452231648838067, | |
| "loss": 0.9914, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0001451309479896717, | |
| "loss": 1.0105, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0001450387310955367, | |
| "loss": 1.0517, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00014494651420140172, | |
| "loss": 1.0245, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0001448542973072667, | |
| "loss": 1.0713, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0001447620804131317, | |
| "loss": 1.0638, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.0001446698635189967, | |
| "loss": 1.0839, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_accuracy": 0.5164323534781815, | |
| "eval_loss": 1.1340410709381104, | |
| "eval_runtime": 98.4525, | |
| "eval_samples_per_second": 111.262, | |
| "eval_steps_per_second": 13.915, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "step": 6000, | |
| "total_flos": 2.0396723395200123e+18, | |
| "train_loss": 1.29178360859553, | |
| "train_runtime": 1561.0386, | |
| "train_samples_per_second": 222.273, | |
| "train_steps_per_second": 13.893 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 21688, | |
| "num_train_epochs": 4, | |
| "save_steps": 1000, | |
| "total_flos": 2.0396723395200123e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |