| { |
| "best_metric": 0.8585651674989221, |
| "best_model_checkpoint": "/DATA/DATA3/wjt/AIGV-qwen2.5/AIGV-main/output/quality_MLP/v2-20250503-111817/checkpoint-2000", |
| "epoch": 6.480881399870382, |
| "eval_steps": 2000, |
| "global_step": 10000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006480881399870382, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.295336787564767e-07, |
| "loss": 0.5365753173828125, |
| "memory(GiB)": 32.2, |
| "step": 1, |
| "train_speed(iter/s)": 0.260463 |
| }, |
| { |
| "epoch": 0.0032404406999351912, |
| "grad_norm": 1.078125, |
| "learning_rate": 6.476683937823834e-07, |
| "loss": 0.49399423599243164, |
| "memory(GiB)": 32.2, |
| "step": 5, |
| "train_speed(iter/s)": 0.639071 |
| }, |
| { |
| "epoch": 0.0064808813998703824, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.2953367875647669e-06, |
| "loss": 0.5098413467407227, |
| "memory(GiB)": 34.73, |
| "step": 10, |
| "train_speed(iter/s)": 0.770125 |
| }, |
| { |
| "epoch": 0.009721322099805573, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.9430051813471504e-06, |
| "loss": 0.5465492248535156, |
| "memory(GiB)": 38.69, |
| "step": 15, |
| "train_speed(iter/s)": 0.741698 |
| }, |
| { |
| "epoch": 0.012961762799740765, |
| "grad_norm": 0.5703125, |
| "learning_rate": 2.5906735751295338e-06, |
| "loss": 0.5868881225585938, |
| "memory(GiB)": 38.69, |
| "step": 20, |
| "train_speed(iter/s)": 0.769326 |
| }, |
| { |
| "epoch": 0.016202203499675955, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.238341968911917e-06, |
| "loss": 0.4694648742675781, |
| "memory(GiB)": 38.69, |
| "step": 25, |
| "train_speed(iter/s)": 0.810966 |
| }, |
| { |
| "epoch": 0.019442644199611146, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.886010362694301e-06, |
| "loss": 0.5193641662597657, |
| "memory(GiB)": 38.69, |
| "step": 30, |
| "train_speed(iter/s)": 0.788689 |
| }, |
| { |
| "epoch": 0.02268308489954634, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.533678756476685e-06, |
| "loss": 0.6055156707763671, |
| "memory(GiB)": 43.05, |
| "step": 35, |
| "train_speed(iter/s)": 0.770943 |
| }, |
| { |
| "epoch": 0.02592352559948153, |
| "grad_norm": 0.55078125, |
| "learning_rate": 5.1813471502590676e-06, |
| "loss": 0.562900161743164, |
| "memory(GiB)": 43.05, |
| "step": 40, |
| "train_speed(iter/s)": 0.780659 |
| }, |
| { |
| "epoch": 0.02916396629941672, |
| "grad_norm": 1.078125, |
| "learning_rate": 5.829015544041451e-06, |
| "loss": 0.5973004817962646, |
| "memory(GiB)": 43.05, |
| "step": 45, |
| "train_speed(iter/s)": 0.781833 |
| }, |
| { |
| "epoch": 0.03240440699935191, |
| "grad_norm": 0.5390625, |
| "learning_rate": 6.476683937823834e-06, |
| "loss": 0.6351554870605469, |
| "memory(GiB)": 43.05, |
| "step": 50, |
| "train_speed(iter/s)": 0.784208 |
| }, |
| { |
| "epoch": 0.0356448476992871, |
| "grad_norm": 0.5390625, |
| "learning_rate": 7.124352331606218e-06, |
| "loss": 0.5043731689453125, |
| "memory(GiB)": 43.05, |
| "step": 55, |
| "train_speed(iter/s)": 0.801354 |
| }, |
| { |
| "epoch": 0.03888528839922229, |
| "grad_norm": 0.53515625, |
| "learning_rate": 7.772020725388602e-06, |
| "loss": 0.509942626953125, |
| "memory(GiB)": 43.05, |
| "step": 60, |
| "train_speed(iter/s)": 0.811061 |
| }, |
| { |
| "epoch": 0.04212572909915749, |
| "grad_norm": 1.046875, |
| "learning_rate": 8.419689119170985e-06, |
| "loss": 0.5730291366577148, |
| "memory(GiB)": 43.05, |
| "step": 65, |
| "train_speed(iter/s)": 0.796095 |
| }, |
| { |
| "epoch": 0.04536616979909268, |
| "grad_norm": 1.0703125, |
| "learning_rate": 9.06735751295337e-06, |
| "loss": 0.5984684944152832, |
| "memory(GiB)": 43.05, |
| "step": 70, |
| "train_speed(iter/s)": 0.782284 |
| }, |
| { |
| "epoch": 0.04860661049902787, |
| "grad_norm": 1.078125, |
| "learning_rate": 9.715025906735752e-06, |
| "loss": 0.5932106018066406, |
| "memory(GiB)": 43.05, |
| "step": 75, |
| "train_speed(iter/s)": 0.754338 |
| }, |
| { |
| "epoch": 0.05184705119896306, |
| "grad_norm": 0.5703125, |
| "learning_rate": 1.0362694300518135e-05, |
| "loss": 0.5908912658691406, |
| "memory(GiB)": 43.05, |
| "step": 80, |
| "train_speed(iter/s)": 0.753761 |
| }, |
| { |
| "epoch": 0.05508749189889825, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.101036269430052e-05, |
| "loss": 0.5384979248046875, |
| "memory(GiB)": 43.05, |
| "step": 85, |
| "train_speed(iter/s)": 0.759779 |
| }, |
| { |
| "epoch": 0.05832793259883344, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.1658031088082903e-05, |
| "loss": 0.5038749694824218, |
| "memory(GiB)": 43.05, |
| "step": 90, |
| "train_speed(iter/s)": 0.759167 |
| }, |
| { |
| "epoch": 0.06156837329876863, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.2305699481865286e-05, |
| "loss": 0.5468292713165284, |
| "memory(GiB)": 43.05, |
| "step": 95, |
| "train_speed(iter/s)": 0.759403 |
| }, |
| { |
| "epoch": 0.06480881399870382, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.2953367875647668e-05, |
| "loss": 0.48979339599609373, |
| "memory(GiB)": 43.05, |
| "step": 100, |
| "train_speed(iter/s)": 0.755654 |
| }, |
| { |
| "epoch": 0.06804925469863901, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.3601036269430053e-05, |
| "loss": 0.5300430297851563, |
| "memory(GiB)": 43.05, |
| "step": 105, |
| "train_speed(iter/s)": 0.763136 |
| }, |
| { |
| "epoch": 0.0712896953985742, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.4248704663212436e-05, |
| "loss": 0.5134796142578125, |
| "memory(GiB)": 43.05, |
| "step": 110, |
| "train_speed(iter/s)": 0.772151 |
| }, |
| { |
| "epoch": 0.07453013609850939, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.4896373056994819e-05, |
| "loss": 0.5596694946289062, |
| "memory(GiB)": 43.05, |
| "step": 115, |
| "train_speed(iter/s)": 0.768512 |
| }, |
| { |
| "epoch": 0.07777057679844458, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.5544041450777204e-05, |
| "loss": 0.6054977416992188, |
| "memory(GiB)": 43.05, |
| "step": 120, |
| "train_speed(iter/s)": 0.753057 |
| }, |
| { |
| "epoch": 0.08101101749837979, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.6191709844559585e-05, |
| "loss": 0.5818267822265625, |
| "memory(GiB)": 43.05, |
| "step": 125, |
| "train_speed(iter/s)": 0.752964 |
| }, |
| { |
| "epoch": 0.08425145819831498, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.683937823834197e-05, |
| "loss": 0.4538330078125, |
| "memory(GiB)": 43.05, |
| "step": 130, |
| "train_speed(iter/s)": 0.760911 |
| }, |
| { |
| "epoch": 0.08749189889825017, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.7487046632124354e-05, |
| "loss": 0.56064453125, |
| "memory(GiB)": 43.05, |
| "step": 135, |
| "train_speed(iter/s)": 0.770633 |
| }, |
| { |
| "epoch": 0.09073233959818536, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.813471502590674e-05, |
| "loss": 0.5388214111328125, |
| "memory(GiB)": 43.05, |
| "step": 140, |
| "train_speed(iter/s)": 0.765569 |
| }, |
| { |
| "epoch": 0.09397278029812055, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.878238341968912e-05, |
| "loss": 0.4977081298828125, |
| "memory(GiB)": 43.05, |
| "step": 145, |
| "train_speed(iter/s)": 0.767162 |
| }, |
| { |
| "epoch": 0.09721322099805574, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.9430051813471504e-05, |
| "loss": 0.502288818359375, |
| "memory(GiB)": 43.05, |
| "step": 150, |
| "train_speed(iter/s)": 0.767997 |
| }, |
| { |
| "epoch": 0.10045366169799093, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.0077720207253886e-05, |
| "loss": 0.5877655029296875, |
| "memory(GiB)": 43.05, |
| "step": 155, |
| "train_speed(iter/s)": 0.764005 |
| }, |
| { |
| "epoch": 0.10369410239792612, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.072538860103627e-05, |
| "loss": 0.497576904296875, |
| "memory(GiB)": 43.05, |
| "step": 160, |
| "train_speed(iter/s)": 0.765148 |
| }, |
| { |
| "epoch": 0.10693454309786131, |
| "grad_norm": 1.1015625, |
| "learning_rate": 2.1373056994818655e-05, |
| "loss": 0.482635498046875, |
| "memory(GiB)": 43.05, |
| "step": 165, |
| "train_speed(iter/s)": 0.768335 |
| }, |
| { |
| "epoch": 0.1101749837977965, |
| "grad_norm": 1.1015625, |
| "learning_rate": 2.202072538860104e-05, |
| "loss": 0.600677490234375, |
| "memory(GiB)": 43.05, |
| "step": 170, |
| "train_speed(iter/s)": 0.768261 |
| }, |
| { |
| "epoch": 0.11341542449773169, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.266839378238342e-05, |
| "loss": 0.50074462890625, |
| "memory(GiB)": 43.05, |
| "step": 175, |
| "train_speed(iter/s)": 0.774096 |
| }, |
| { |
| "epoch": 0.11665586519766688, |
| "grad_norm": 1.109375, |
| "learning_rate": 2.3316062176165805e-05, |
| "loss": 0.5025146484375, |
| "memory(GiB)": 43.05, |
| "step": 180, |
| "train_speed(iter/s)": 0.77619 |
| }, |
| { |
| "epoch": 0.11989630589760207, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.3963730569948187e-05, |
| "loss": 0.457147216796875, |
| "memory(GiB)": 43.05, |
| "step": 185, |
| "train_speed(iter/s)": 0.778384 |
| }, |
| { |
| "epoch": 0.12313674659753726, |
| "grad_norm": 1.1953125, |
| "learning_rate": 2.461139896373057e-05, |
| "loss": 0.518804931640625, |
| "memory(GiB)": 43.05, |
| "step": 190, |
| "train_speed(iter/s)": 0.772501 |
| }, |
| { |
| "epoch": 0.12637718729747247, |
| "grad_norm": 1.1796875, |
| "learning_rate": 2.5259067357512956e-05, |
| "loss": 0.52044677734375, |
| "memory(GiB)": 43.05, |
| "step": 195, |
| "train_speed(iter/s)": 0.768668 |
| }, |
| { |
| "epoch": 0.12961762799740764, |
| "grad_norm": 1.25, |
| "learning_rate": 2.5906735751295337e-05, |
| "loss": 0.51678466796875, |
| "memory(GiB)": 43.05, |
| "step": 200, |
| "train_speed(iter/s)": 0.769841 |
| }, |
| { |
| "epoch": 0.13285806869734285, |
| "grad_norm": 1.296875, |
| "learning_rate": 2.655440414507772e-05, |
| "loss": 0.57254638671875, |
| "memory(GiB)": 43.05, |
| "step": 205, |
| "train_speed(iter/s)": 0.769437 |
| }, |
| { |
| "epoch": 0.13609850939727802, |
| "grad_norm": 1.359375, |
| "learning_rate": 2.7202072538860106e-05, |
| "loss": 0.5950927734375, |
| "memory(GiB)": 43.05, |
| "step": 210, |
| "train_speed(iter/s)": 0.758835 |
| }, |
| { |
| "epoch": 0.13933895009721323, |
| "grad_norm": 1.40625, |
| "learning_rate": 2.7849740932642487e-05, |
| "loss": 0.4545166015625, |
| "memory(GiB)": 43.05, |
| "step": 215, |
| "train_speed(iter/s)": 0.758691 |
| }, |
| { |
| "epoch": 0.1425793907971484, |
| "grad_norm": 1.5390625, |
| "learning_rate": 2.8497409326424872e-05, |
| "loss": 0.4698974609375, |
| "memory(GiB)": 43.05, |
| "step": 220, |
| "train_speed(iter/s)": 0.763472 |
| }, |
| { |
| "epoch": 0.1458198314970836, |
| "grad_norm": 1.6796875, |
| "learning_rate": 2.9145077720207253e-05, |
| "loss": 0.527783203125, |
| "memory(GiB)": 43.05, |
| "step": 225, |
| "train_speed(iter/s)": 0.759259 |
| }, |
| { |
| "epoch": 0.14906027219701878, |
| "grad_norm": 1.640625, |
| "learning_rate": 2.9792746113989638e-05, |
| "loss": 0.3810546875, |
| "memory(GiB)": 43.05, |
| "step": 230, |
| "train_speed(iter/s)": 0.759974 |
| }, |
| { |
| "epoch": 0.152300712896954, |
| "grad_norm": 2.03125, |
| "learning_rate": 3.0440414507772026e-05, |
| "loss": 0.6087646484375, |
| "memory(GiB)": 43.05, |
| "step": 235, |
| "train_speed(iter/s)": 0.759738 |
| }, |
| { |
| "epoch": 0.15554115359688916, |
| "grad_norm": 1.9765625, |
| "learning_rate": 3.108808290155441e-05, |
| "loss": 0.539697265625, |
| "memory(GiB)": 43.05, |
| "step": 240, |
| "train_speed(iter/s)": 0.757412 |
| }, |
| { |
| "epoch": 0.15878159429682437, |
| "grad_norm": 2.546875, |
| "learning_rate": 3.173575129533679e-05, |
| "loss": 0.414892578125, |
| "memory(GiB)": 43.05, |
| "step": 245, |
| "train_speed(iter/s)": 0.757164 |
| }, |
| { |
| "epoch": 0.16202203499675957, |
| "grad_norm": 2.765625, |
| "learning_rate": 3.238341968911917e-05, |
| "loss": 0.466015625, |
| "memory(GiB)": 43.05, |
| "step": 250, |
| "train_speed(iter/s)": 0.760448 |
| }, |
| { |
| "epoch": 0.16526247569669475, |
| "grad_norm": 3.34375, |
| "learning_rate": 3.303108808290156e-05, |
| "loss": 0.4650390625, |
| "memory(GiB)": 43.05, |
| "step": 255, |
| "train_speed(iter/s)": 0.760227 |
| }, |
| { |
| "epoch": 0.16850291639662995, |
| "grad_norm": 3.53125, |
| "learning_rate": 3.367875647668394e-05, |
| "loss": 0.42080078125, |
| "memory(GiB)": 43.05, |
| "step": 260, |
| "train_speed(iter/s)": 0.758035 |
| }, |
| { |
| "epoch": 0.17174335709656513, |
| "grad_norm": 3.9375, |
| "learning_rate": 3.432642487046632e-05, |
| "loss": 0.360791015625, |
| "memory(GiB)": 43.05, |
| "step": 265, |
| "train_speed(iter/s)": 0.759979 |
| }, |
| { |
| "epoch": 0.17498379779650033, |
| "grad_norm": 4.75, |
| "learning_rate": 3.497409326424871e-05, |
| "loss": 0.40830078125, |
| "memory(GiB)": 43.05, |
| "step": 270, |
| "train_speed(iter/s)": 0.760981 |
| }, |
| { |
| "epoch": 0.1782242384964355, |
| "grad_norm": 5.46875, |
| "learning_rate": 3.562176165803109e-05, |
| "loss": 0.3232421875, |
| "memory(GiB)": 43.05, |
| "step": 275, |
| "train_speed(iter/s)": 0.76279 |
| }, |
| { |
| "epoch": 0.18146467919637072, |
| "grad_norm": 5.28125, |
| "learning_rate": 3.626943005181348e-05, |
| "loss": 0.35078125, |
| "memory(GiB)": 43.05, |
| "step": 280, |
| "train_speed(iter/s)": 0.760403 |
| }, |
| { |
| "epoch": 0.1847051198963059, |
| "grad_norm": 6.125, |
| "learning_rate": 3.691709844559585e-05, |
| "loss": 0.262109375, |
| "memory(GiB)": 43.05, |
| "step": 285, |
| "train_speed(iter/s)": 0.75989 |
| }, |
| { |
| "epoch": 0.1879455605962411, |
| "grad_norm": 7.40625, |
| "learning_rate": 3.756476683937824e-05, |
| "loss": 0.240234375, |
| "memory(GiB)": 43.05, |
| "step": 290, |
| "train_speed(iter/s)": 0.764385 |
| }, |
| { |
| "epoch": 0.19118600129617627, |
| "grad_norm": 8.1875, |
| "learning_rate": 3.821243523316063e-05, |
| "loss": 0.2201171875, |
| "memory(GiB)": 43.05, |
| "step": 295, |
| "train_speed(iter/s)": 0.761947 |
| }, |
| { |
| "epoch": 0.19442644199611148, |
| "grad_norm": 8.1875, |
| "learning_rate": 3.886010362694301e-05, |
| "loss": 0.13984375, |
| "memory(GiB)": 43.05, |
| "step": 300, |
| "train_speed(iter/s)": 0.76188 |
| }, |
| { |
| "epoch": 0.19766688269604665, |
| "grad_norm": 10.625, |
| "learning_rate": 3.950777202072539e-05, |
| "loss": 0.1181640625, |
| "memory(GiB)": 43.05, |
| "step": 305, |
| "train_speed(iter/s)": 0.766249 |
| }, |
| { |
| "epoch": 0.20090732339598186, |
| "grad_norm": 0.54296875, |
| "learning_rate": 4.015544041450777e-05, |
| "loss": 0.1419921875, |
| "memory(GiB)": 43.05, |
| "step": 310, |
| "train_speed(iter/s)": 0.762176 |
| }, |
| { |
| "epoch": 0.20414776409591703, |
| "grad_norm": 1.9140625, |
| "learning_rate": 4.080310880829016e-05, |
| "loss": 0.14765625, |
| "memory(GiB)": 43.05, |
| "step": 315, |
| "train_speed(iter/s)": 0.759722 |
| }, |
| { |
| "epoch": 0.20738820479585224, |
| "grad_norm": 2.34375, |
| "learning_rate": 4.145077720207254e-05, |
| "loss": 0.1498046875, |
| "memory(GiB)": 43.05, |
| "step": 320, |
| "train_speed(iter/s)": 0.759166 |
| }, |
| { |
| "epoch": 0.21062864549578741, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.209844559585492e-05, |
| "loss": 0.0775390625, |
| "memory(GiB)": 43.05, |
| "step": 325, |
| "train_speed(iter/s)": 0.76154 |
| }, |
| { |
| "epoch": 0.21386908619572262, |
| "grad_norm": 10.625, |
| "learning_rate": 4.274611398963731e-05, |
| "loss": 0.09013671875, |
| "memory(GiB)": 43.05, |
| "step": 330, |
| "train_speed(iter/s)": 0.763867 |
| }, |
| { |
| "epoch": 0.21710952689565782, |
| "grad_norm": 0.7734375, |
| "learning_rate": 4.339378238341969e-05, |
| "loss": 0.1416015625, |
| "memory(GiB)": 43.05, |
| "step": 335, |
| "train_speed(iter/s)": 0.761462 |
| }, |
| { |
| "epoch": 0.220349967595593, |
| "grad_norm": 6.28125, |
| "learning_rate": 4.404145077720208e-05, |
| "loss": 0.0611328125, |
| "memory(GiB)": 43.05, |
| "step": 340, |
| "train_speed(iter/s)": 0.763926 |
| }, |
| { |
| "epoch": 0.2235904082955282, |
| "grad_norm": 5.5625, |
| "learning_rate": 4.468911917098445e-05, |
| "loss": 0.080859375, |
| "memory(GiB)": 43.05, |
| "step": 345, |
| "train_speed(iter/s)": 0.763924 |
| }, |
| { |
| "epoch": 0.22683084899546338, |
| "grad_norm": 2.203125, |
| "learning_rate": 4.533678756476684e-05, |
| "loss": 0.096484375, |
| "memory(GiB)": 43.05, |
| "step": 350, |
| "train_speed(iter/s)": 0.764442 |
| }, |
| { |
| "epoch": 0.23007128969539858, |
| "grad_norm": 10.25, |
| "learning_rate": 4.598445595854923e-05, |
| "loss": 0.059375, |
| "memory(GiB)": 43.05, |
| "step": 355, |
| "train_speed(iter/s)": 0.765663 |
| }, |
| { |
| "epoch": 0.23331173039533376, |
| "grad_norm": 10.5, |
| "learning_rate": 4.663212435233161e-05, |
| "loss": 0.1283203125, |
| "memory(GiB)": 43.05, |
| "step": 360, |
| "train_speed(iter/s)": 0.762173 |
| }, |
| { |
| "epoch": 0.23655217109526896, |
| "grad_norm": 12.9375, |
| "learning_rate": 4.727979274611399e-05, |
| "loss": 0.1201171875, |
| "memory(GiB)": 43.05, |
| "step": 365, |
| "train_speed(iter/s)": 0.761207 |
| }, |
| { |
| "epoch": 0.23979261179520414, |
| "grad_norm": 4.5625, |
| "learning_rate": 4.792746113989637e-05, |
| "loss": 0.05654296875, |
| "memory(GiB)": 43.05, |
| "step": 370, |
| "train_speed(iter/s)": 0.761617 |
| }, |
| { |
| "epoch": 0.24303305249513935, |
| "grad_norm": 3.921875, |
| "learning_rate": 4.857512953367876e-05, |
| "loss": 0.05234375, |
| "memory(GiB)": 43.05, |
| "step": 375, |
| "train_speed(iter/s)": 0.765087 |
| }, |
| { |
| "epoch": 0.24627349319507452, |
| "grad_norm": 4.96875, |
| "learning_rate": 4.922279792746114e-05, |
| "loss": 0.0787109375, |
| "memory(GiB)": 43.05, |
| "step": 380, |
| "train_speed(iter/s)": 0.763822 |
| }, |
| { |
| "epoch": 0.24951393389500973, |
| "grad_norm": 6.6875, |
| "learning_rate": 4.9870466321243523e-05, |
| "loss": 0.1029296875, |
| "memory(GiB)": 43.05, |
| "step": 385, |
| "train_speed(iter/s)": 0.763392 |
| }, |
| { |
| "epoch": 0.25275437459494493, |
| "grad_norm": 10.875, |
| "learning_rate": 5.051813471502591e-05, |
| "loss": 0.075390625, |
| "memory(GiB)": 43.05, |
| "step": 390, |
| "train_speed(iter/s)": 0.764437 |
| }, |
| { |
| "epoch": 0.2559948152948801, |
| "grad_norm": 4.34375, |
| "learning_rate": 5.11658031088083e-05, |
| "loss": 0.0646484375, |
| "memory(GiB)": 43.05, |
| "step": 395, |
| "train_speed(iter/s)": 0.762921 |
| }, |
| { |
| "epoch": 0.2592352559948153, |
| "grad_norm": 9.5, |
| "learning_rate": 5.1813471502590674e-05, |
| "loss": 0.0505859375, |
| "memory(GiB)": 43.05, |
| "step": 400, |
| "train_speed(iter/s)": 0.763592 |
| }, |
| { |
| "epoch": 0.26247569669475046, |
| "grad_norm": 3.296875, |
| "learning_rate": 5.2461139896373055e-05, |
| "loss": 0.0591796875, |
| "memory(GiB)": 43.05, |
| "step": 405, |
| "train_speed(iter/s)": 0.761955 |
| }, |
| { |
| "epoch": 0.2657161373946857, |
| "grad_norm": 0.7734375, |
| "learning_rate": 5.310880829015544e-05, |
| "loss": 0.065234375, |
| "memory(GiB)": 43.05, |
| "step": 410, |
| "train_speed(iter/s)": 0.765098 |
| }, |
| { |
| "epoch": 0.26895657809462087, |
| "grad_norm": 12.9375, |
| "learning_rate": 5.375647668393783e-05, |
| "loss": 0.0865234375, |
| "memory(GiB)": 43.05, |
| "step": 415, |
| "train_speed(iter/s)": 0.76542 |
| }, |
| { |
| "epoch": 0.27219701879455604, |
| "grad_norm": 2.640625, |
| "learning_rate": 5.440414507772021e-05, |
| "loss": 0.0478515625, |
| "memory(GiB)": 43.05, |
| "step": 420, |
| "train_speed(iter/s)": 0.768599 |
| }, |
| { |
| "epoch": 0.2754374594944913, |
| "grad_norm": 12.5625, |
| "learning_rate": 5.505181347150259e-05, |
| "loss": 0.07587890625, |
| "memory(GiB)": 43.05, |
| "step": 425, |
| "train_speed(iter/s)": 0.769594 |
| }, |
| { |
| "epoch": 0.27867790019442645, |
| "grad_norm": 2.90625, |
| "learning_rate": 5.5699481865284975e-05, |
| "loss": 0.0716796875, |
| "memory(GiB)": 43.05, |
| "step": 430, |
| "train_speed(iter/s)": 0.77124 |
| }, |
| { |
| "epoch": 0.28191834089436163, |
| "grad_norm": 14.4375, |
| "learning_rate": 5.634715025906736e-05, |
| "loss": 0.06484375, |
| "memory(GiB)": 43.05, |
| "step": 435, |
| "train_speed(iter/s)": 0.773115 |
| }, |
| { |
| "epoch": 0.2851587815942968, |
| "grad_norm": 9.75, |
| "learning_rate": 5.6994818652849744e-05, |
| "loss": 0.0515625, |
| "memory(GiB)": 43.05, |
| "step": 440, |
| "train_speed(iter/s)": 0.77487 |
| }, |
| { |
| "epoch": 0.28839922229423204, |
| "grad_norm": 11.0, |
| "learning_rate": 5.764248704663213e-05, |
| "loss": 0.0740234375, |
| "memory(GiB)": 43.05, |
| "step": 445, |
| "train_speed(iter/s)": 0.775776 |
| }, |
| { |
| "epoch": 0.2916396629941672, |
| "grad_norm": 1.2734375, |
| "learning_rate": 5.8290155440414506e-05, |
| "loss": 0.08212890625, |
| "memory(GiB)": 43.05, |
| "step": 450, |
| "train_speed(iter/s)": 0.776063 |
| }, |
| { |
| "epoch": 0.2948801036941024, |
| "grad_norm": 8.875, |
| "learning_rate": 5.8937823834196894e-05, |
| "loss": 0.06875, |
| "memory(GiB)": 43.05, |
| "step": 455, |
| "train_speed(iter/s)": 0.77501 |
| }, |
| { |
| "epoch": 0.29812054439403757, |
| "grad_norm": 1.46875, |
| "learning_rate": 5.9585492227979276e-05, |
| "loss": 0.05693359375, |
| "memory(GiB)": 43.05, |
| "step": 460, |
| "train_speed(iter/s)": 0.776518 |
| }, |
| { |
| "epoch": 0.3013609850939728, |
| "grad_norm": 1.8359375, |
| "learning_rate": 6.0233160621761664e-05, |
| "loss": 0.0486328125, |
| "memory(GiB)": 43.05, |
| "step": 465, |
| "train_speed(iter/s)": 0.777886 |
| }, |
| { |
| "epoch": 0.304601425793908, |
| "grad_norm": 15.1875, |
| "learning_rate": 6.088082901554405e-05, |
| "loss": 0.0626953125, |
| "memory(GiB)": 43.05, |
| "step": 470, |
| "train_speed(iter/s)": 0.778123 |
| }, |
| { |
| "epoch": 0.30784186649384315, |
| "grad_norm": 9.9375, |
| "learning_rate": 6.152849740932643e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 475, |
| "train_speed(iter/s)": 0.7802 |
| }, |
| { |
| "epoch": 0.31108230719377833, |
| "grad_norm": 12.0, |
| "learning_rate": 6.217616580310881e-05, |
| "loss": 0.05234375, |
| "memory(GiB)": 43.05, |
| "step": 480, |
| "train_speed(iter/s)": 0.781066 |
| }, |
| { |
| "epoch": 0.31432274789371356, |
| "grad_norm": 9.625, |
| "learning_rate": 6.28238341968912e-05, |
| "loss": 0.05703125, |
| "memory(GiB)": 43.05, |
| "step": 485, |
| "train_speed(iter/s)": 0.777163 |
| }, |
| { |
| "epoch": 0.31756318859364874, |
| "grad_norm": 1.4453125, |
| "learning_rate": 6.347150259067358e-05, |
| "loss": 0.0689453125, |
| "memory(GiB)": 43.05, |
| "step": 490, |
| "train_speed(iter/s)": 0.776769 |
| }, |
| { |
| "epoch": 0.3208036292935839, |
| "grad_norm": 12.4375, |
| "learning_rate": 6.411917098445595e-05, |
| "loss": 0.11328125, |
| "memory(GiB)": 43.05, |
| "step": 495, |
| "train_speed(iter/s)": 0.776231 |
| }, |
| { |
| "epoch": 0.32404406999351915, |
| "grad_norm": 1.1796875, |
| "learning_rate": 6.476683937823834e-05, |
| "loss": 0.0337890625, |
| "memory(GiB)": 43.05, |
| "step": 500, |
| "train_speed(iter/s)": 0.777694 |
| }, |
| { |
| "epoch": 0.3272845106934543, |
| "grad_norm": 8.6875, |
| "learning_rate": 6.541450777202073e-05, |
| "loss": 0.06787109375, |
| "memory(GiB)": 43.05, |
| "step": 505, |
| "train_speed(iter/s)": 0.779091 |
| }, |
| { |
| "epoch": 0.3305249513933895, |
| "grad_norm": 1.1953125, |
| "learning_rate": 6.606217616580311e-05, |
| "loss": 0.06767578125, |
| "memory(GiB)": 43.05, |
| "step": 510, |
| "train_speed(iter/s)": 0.77883 |
| }, |
| { |
| "epoch": 0.3337653920933247, |
| "grad_norm": 1.9140625, |
| "learning_rate": 6.67098445595855e-05, |
| "loss": 0.0328125, |
| "memory(GiB)": 43.05, |
| "step": 515, |
| "train_speed(iter/s)": 0.777959 |
| }, |
| { |
| "epoch": 0.3370058327932599, |
| "grad_norm": 10.5, |
| "learning_rate": 6.735751295336788e-05, |
| "loss": 0.04677734375, |
| "memory(GiB)": 43.05, |
| "step": 520, |
| "train_speed(iter/s)": 0.777442 |
| }, |
| { |
| "epoch": 0.3402462734931951, |
| "grad_norm": 9.6875, |
| "learning_rate": 6.800518134715027e-05, |
| "loss": 0.0787109375, |
| "memory(GiB)": 43.05, |
| "step": 525, |
| "train_speed(iter/s)": 0.777622 |
| }, |
| { |
| "epoch": 0.34348671419313026, |
| "grad_norm": 10.0625, |
| "learning_rate": 6.865284974093264e-05, |
| "loss": 0.06318359375, |
| "memory(GiB)": 43.05, |
| "step": 530, |
| "train_speed(iter/s)": 0.7802 |
| }, |
| { |
| "epoch": 0.34672715489306544, |
| "grad_norm": 11.125, |
| "learning_rate": 6.930051813471503e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 535, |
| "train_speed(iter/s)": 0.780842 |
| }, |
| { |
| "epoch": 0.34996759559300067, |
| "grad_norm": 0.66015625, |
| "learning_rate": 6.994818652849742e-05, |
| "loss": 0.0392578125, |
| "memory(GiB)": 43.05, |
| "step": 540, |
| "train_speed(iter/s)": 0.779714 |
| }, |
| { |
| "epoch": 0.35320803629293585, |
| "grad_norm": 7.375, |
| "learning_rate": 7.059585492227979e-05, |
| "loss": 0.0400390625, |
| "memory(GiB)": 43.05, |
| "step": 545, |
| "train_speed(iter/s)": 0.779558 |
| }, |
| { |
| "epoch": 0.356448476992871, |
| "grad_norm": 1.671875, |
| "learning_rate": 7.124352331606218e-05, |
| "loss": 0.0501953125, |
| "memory(GiB)": 43.05, |
| "step": 550, |
| "train_speed(iter/s)": 0.779993 |
| }, |
| { |
| "epoch": 0.3596889176928062, |
| "grad_norm": 8.875, |
| "learning_rate": 7.189119170984457e-05, |
| "loss": 0.0609375, |
| "memory(GiB)": 43.05, |
| "step": 555, |
| "train_speed(iter/s)": 0.782429 |
| }, |
| { |
| "epoch": 0.36292935839274143, |
| "grad_norm": 16.125, |
| "learning_rate": 7.253886010362695e-05, |
| "loss": 0.082421875, |
| "memory(GiB)": 43.05, |
| "step": 560, |
| "train_speed(iter/s)": 0.782177 |
| }, |
| { |
| "epoch": 0.3661697990926766, |
| "grad_norm": 3.78125, |
| "learning_rate": 7.318652849740933e-05, |
| "loss": 0.066015625, |
| "memory(GiB)": 43.05, |
| "step": 565, |
| "train_speed(iter/s)": 0.780785 |
| }, |
| { |
| "epoch": 0.3694102397926118, |
| "grad_norm": 1.890625, |
| "learning_rate": 7.38341968911917e-05, |
| "loss": 0.0556640625, |
| "memory(GiB)": 43.05, |
| "step": 570, |
| "train_speed(iter/s)": 0.778912 |
| }, |
| { |
| "epoch": 0.37265068049254696, |
| "grad_norm": 11.875, |
| "learning_rate": 7.448186528497409e-05, |
| "loss": 0.0791015625, |
| "memory(GiB)": 43.05, |
| "step": 575, |
| "train_speed(iter/s)": 0.77911 |
| }, |
| { |
| "epoch": 0.3758911211924822, |
| "grad_norm": 2.5, |
| "learning_rate": 7.512953367875648e-05, |
| "loss": 0.0390625, |
| "memory(GiB)": 43.05, |
| "step": 580, |
| "train_speed(iter/s)": 0.779861 |
| }, |
| { |
| "epoch": 0.37913156189241737, |
| "grad_norm": 14.9375, |
| "learning_rate": 7.577720207253887e-05, |
| "loss": 0.037890625, |
| "memory(GiB)": 43.05, |
| "step": 585, |
| "train_speed(iter/s)": 0.779469 |
| }, |
| { |
| "epoch": 0.38237200259235254, |
| "grad_norm": 0.94921875, |
| "learning_rate": 7.642487046632126e-05, |
| "loss": 0.05390625, |
| "memory(GiB)": 43.05, |
| "step": 590, |
| "train_speed(iter/s)": 0.781608 |
| }, |
| { |
| "epoch": 0.3856124432922878, |
| "grad_norm": 1.265625, |
| "learning_rate": 7.707253886010363e-05, |
| "loss": 0.0486328125, |
| "memory(GiB)": 43.05, |
| "step": 595, |
| "train_speed(iter/s)": 0.782382 |
| }, |
| { |
| "epoch": 0.38885288399222295, |
| "grad_norm": 8.4375, |
| "learning_rate": 7.772020725388602e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 600, |
| "train_speed(iter/s)": 0.781728 |
| }, |
| { |
| "epoch": 0.39209332469215813, |
| "grad_norm": 10.1875, |
| "learning_rate": 7.836787564766839e-05, |
| "loss": 0.0505859375, |
| "memory(GiB)": 43.05, |
| "step": 605, |
| "train_speed(iter/s)": 0.780685 |
| }, |
| { |
| "epoch": 0.3953337653920933, |
| "grad_norm": 8.3125, |
| "learning_rate": 7.901554404145078e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 610, |
| "train_speed(iter/s)": 0.78192 |
| }, |
| { |
| "epoch": 0.39857420609202854, |
| "grad_norm": 9.4375, |
| "learning_rate": 7.966321243523317e-05, |
| "loss": 0.0421875, |
| "memory(GiB)": 43.05, |
| "step": 615, |
| "train_speed(iter/s)": 0.781089 |
| }, |
| { |
| "epoch": 0.4018146467919637, |
| "grad_norm": 3.78125, |
| "learning_rate": 8.031088082901554e-05, |
| "loss": 0.04609375, |
| "memory(GiB)": 43.05, |
| "step": 620, |
| "train_speed(iter/s)": 0.779088 |
| }, |
| { |
| "epoch": 0.4050550874918989, |
| "grad_norm": 5.125, |
| "learning_rate": 8.095854922279793e-05, |
| "loss": 0.03359375, |
| "memory(GiB)": 43.05, |
| "step": 625, |
| "train_speed(iter/s)": 0.778978 |
| }, |
| { |
| "epoch": 0.40829552819183407, |
| "grad_norm": 2.890625, |
| "learning_rate": 8.160621761658032e-05, |
| "loss": 0.05361328125, |
| "memory(GiB)": 43.05, |
| "step": 630, |
| "train_speed(iter/s)": 0.779658 |
| }, |
| { |
| "epoch": 0.4115359688917693, |
| "grad_norm": 1.2734375, |
| "learning_rate": 8.22538860103627e-05, |
| "loss": 0.0625, |
| "memory(GiB)": 43.05, |
| "step": 635, |
| "train_speed(iter/s)": 0.780894 |
| }, |
| { |
| "epoch": 0.4147764095917045, |
| "grad_norm": 1.859375, |
| "learning_rate": 8.290155440414508e-05, |
| "loss": 0.0771484375, |
| "memory(GiB)": 43.05, |
| "step": 640, |
| "train_speed(iter/s)": 0.778867 |
| }, |
| { |
| "epoch": 0.41801685029163965, |
| "grad_norm": 1.21875, |
| "learning_rate": 8.354922279792747e-05, |
| "loss": 0.065625, |
| "memory(GiB)": 43.05, |
| "step": 645, |
| "train_speed(iter/s)": 0.77719 |
| }, |
| { |
| "epoch": 0.42125729099157483, |
| "grad_norm": 1.5546875, |
| "learning_rate": 8.419689119170984e-05, |
| "loss": 0.06796875, |
| "memory(GiB)": 43.05, |
| "step": 650, |
| "train_speed(iter/s)": 0.778792 |
| }, |
| { |
| "epoch": 0.42449773169151006, |
| "grad_norm": 1.171875, |
| "learning_rate": 8.484455958549223e-05, |
| "loss": 0.04267578125, |
| "memory(GiB)": 43.05, |
| "step": 655, |
| "train_speed(iter/s)": 0.777084 |
| }, |
| { |
| "epoch": 0.42773817239144524, |
| "grad_norm": 0.81640625, |
| "learning_rate": 8.549222797927462e-05, |
| "loss": 0.03828125, |
| "memory(GiB)": 43.05, |
| "step": 660, |
| "train_speed(iter/s)": 0.778245 |
| }, |
| { |
| "epoch": 0.4309786130913804, |
| "grad_norm": 9.8125, |
| "learning_rate": 8.6139896373057e-05, |
| "loss": 0.06953125, |
| "memory(GiB)": 43.05, |
| "step": 665, |
| "train_speed(iter/s)": 0.778039 |
| }, |
| { |
| "epoch": 0.43421905379131565, |
| "grad_norm": 4.09375, |
| "learning_rate": 8.678756476683938e-05, |
| "loss": 0.06484375, |
| "memory(GiB)": 43.05, |
| "step": 670, |
| "train_speed(iter/s)": 0.777077 |
| }, |
| { |
| "epoch": 0.4374594944912508, |
| "grad_norm": 12.6875, |
| "learning_rate": 8.743523316062177e-05, |
| "loss": 0.052734375, |
| "memory(GiB)": 43.05, |
| "step": 675, |
| "train_speed(iter/s)": 0.777329 |
| }, |
| { |
| "epoch": 0.440699935191186, |
| "grad_norm": 6.9375, |
| "learning_rate": 8.808290155440416e-05, |
| "loss": 0.045703125, |
| "memory(GiB)": 43.05, |
| "step": 680, |
| "train_speed(iter/s)": 0.7785 |
| }, |
| { |
| "epoch": 0.4439403758911212, |
| "grad_norm": 2.328125, |
| "learning_rate": 8.873056994818653e-05, |
| "loss": 0.05625, |
| "memory(GiB)": 43.05, |
| "step": 685, |
| "train_speed(iter/s)": 0.776505 |
| }, |
| { |
| "epoch": 0.4471808165910564, |
| "grad_norm": 16.625, |
| "learning_rate": 8.93782383419689e-05, |
| "loss": 0.0560546875, |
| "memory(GiB)": 43.05, |
| "step": 690, |
| "train_speed(iter/s)": 0.776332 |
| }, |
| { |
| "epoch": 0.4504212572909916, |
| "grad_norm": 13.4375, |
| "learning_rate": 9.00259067357513e-05, |
| "loss": 0.0673828125, |
| "memory(GiB)": 43.05, |
| "step": 695, |
| "train_speed(iter/s)": 0.777024 |
| }, |
| { |
| "epoch": 0.45366169799092676, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.067357512953368e-05, |
| "loss": 0.0634765625, |
| "memory(GiB)": 43.05, |
| "step": 700, |
| "train_speed(iter/s)": 0.77715 |
| }, |
| { |
| "epoch": 0.45690213869086194, |
| "grad_norm": 1.09375, |
| "learning_rate": 9.132124352331607e-05, |
| "loss": 0.03935546875, |
| "memory(GiB)": 43.05, |
| "step": 705, |
| "train_speed(iter/s)": 0.778282 |
| }, |
| { |
| "epoch": 0.46014257939079717, |
| "grad_norm": 9.875, |
| "learning_rate": 9.196891191709846e-05, |
| "loss": 0.07890625, |
| "memory(GiB)": 43.05, |
| "step": 710, |
| "train_speed(iter/s)": 0.778004 |
| }, |
| { |
| "epoch": 0.46338302009073234, |
| "grad_norm": 7.15625, |
| "learning_rate": 9.261658031088083e-05, |
| "loss": 0.0337890625, |
| "memory(GiB)": 43.05, |
| "step": 715, |
| "train_speed(iter/s)": 0.778247 |
| }, |
| { |
| "epoch": 0.4666234607906675, |
| "grad_norm": 13.9375, |
| "learning_rate": 9.326424870466322e-05, |
| "loss": 0.0638671875, |
| "memory(GiB)": 43.05, |
| "step": 720, |
| "train_speed(iter/s)": 0.779281 |
| }, |
| { |
| "epoch": 0.4698639014906027, |
| "grad_norm": 9.8125, |
| "learning_rate": 9.39119170984456e-05, |
| "loss": 0.036328125, |
| "memory(GiB)": 43.05, |
| "step": 725, |
| "train_speed(iter/s)": 0.779538 |
| }, |
| { |
| "epoch": 0.47310434219053793, |
| "grad_norm": 4.34375, |
| "learning_rate": 9.455958549222798e-05, |
| "loss": 0.0443359375, |
| "memory(GiB)": 43.05, |
| "step": 730, |
| "train_speed(iter/s)": 0.777998 |
| }, |
| { |
| "epoch": 0.4763447828904731, |
| "grad_norm": 3.96875, |
| "learning_rate": 9.520725388601037e-05, |
| "loss": 0.04453125, |
| "memory(GiB)": 43.05, |
| "step": 735, |
| "train_speed(iter/s)": 0.775346 |
| }, |
| { |
| "epoch": 0.4795852235904083, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.585492227979275e-05, |
| "loss": 0.0478515625, |
| "memory(GiB)": 43.05, |
| "step": 740, |
| "train_speed(iter/s)": 0.775944 |
| }, |
| { |
| "epoch": 0.48282566429034346, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.650259067357513e-05, |
| "loss": 0.0458984375, |
| "memory(GiB)": 43.05, |
| "step": 745, |
| "train_speed(iter/s)": 0.776402 |
| }, |
| { |
| "epoch": 0.4860661049902787, |
| "grad_norm": 13.375, |
| "learning_rate": 9.715025906735752e-05, |
| "loss": 0.0447265625, |
| "memory(GiB)": 43.05, |
| "step": 750, |
| "train_speed(iter/s)": 0.77542 |
| }, |
| { |
| "epoch": 0.48930654569021387, |
| "grad_norm": 8.375, |
| "learning_rate": 9.779792746113991e-05, |
| "loss": 0.0296875, |
| "memory(GiB)": 43.05, |
| "step": 755, |
| "train_speed(iter/s)": 0.777383 |
| }, |
| { |
| "epoch": 0.49254698639014904, |
| "grad_norm": 10.375, |
| "learning_rate": 9.844559585492228e-05, |
| "loss": 0.065625, |
| "memory(GiB)": 43.05, |
| "step": 760, |
| "train_speed(iter/s)": 0.77834 |
| }, |
| { |
| "epoch": 0.4957874270900843, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.909326424870466e-05, |
| "loss": 0.06845703125, |
| "memory(GiB)": 43.05, |
| "step": 765, |
| "train_speed(iter/s)": 0.778175 |
| }, |
| { |
| "epoch": 0.49902786779001945, |
| "grad_norm": 0.875, |
| "learning_rate": 9.974093264248705e-05, |
| "loss": 0.070703125, |
| "memory(GiB)": 43.05, |
| "step": 770, |
| "train_speed(iter/s)": 0.776861 |
| }, |
| { |
| "epoch": 0.5022683084899546, |
| "grad_norm": 5.71875, |
| "learning_rate": 9.999998966446853e-05, |
| "loss": 0.0751953125, |
| "memory(GiB)": 43.05, |
| "step": 775, |
| "train_speed(iter/s)": 0.77442 |
| }, |
| { |
| "epoch": 0.5055087491898899, |
| "grad_norm": 13.5, |
| "learning_rate": 9.999992650290278e-05, |
| "loss": 0.07265625, |
| "memory(GiB)": 43.05, |
| "step": 780, |
| "train_speed(iter/s)": 0.774088 |
| }, |
| { |
| "epoch": 0.508749189889825, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.999980592180564e-05, |
| "loss": 0.0541015625, |
| "memory(GiB)": 43.05, |
| "step": 785, |
| "train_speed(iter/s)": 0.775409 |
| }, |
| { |
| "epoch": 0.5119896305897602, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.999962792131561e-05, |
| "loss": 0.076171875, |
| "memory(GiB)": 43.05, |
| "step": 790, |
| "train_speed(iter/s)": 0.775991 |
| }, |
| { |
| "epoch": 0.5152300712896954, |
| "grad_norm": 12.375, |
| "learning_rate": 9.999939250163708e-05, |
| "loss": 0.038671875, |
| "memory(GiB)": 43.05, |
| "step": 795, |
| "train_speed(iter/s)": 0.776217 |
| }, |
| { |
| "epoch": 0.5184705119896306, |
| "grad_norm": 10.5, |
| "learning_rate": 9.999909966304044e-05, |
| "loss": 0.0849609375, |
| "memory(GiB)": 43.05, |
| "step": 800, |
| "train_speed(iter/s)": 0.774889 |
| }, |
| { |
| "epoch": 0.5217109526895658, |
| "grad_norm": 12.1875, |
| "learning_rate": 9.999874940586194e-05, |
| "loss": 0.0599609375, |
| "memory(GiB)": 43.05, |
| "step": 805, |
| "train_speed(iter/s)": 0.775316 |
| }, |
| { |
| "epoch": 0.5249513933895009, |
| "grad_norm": 19.0, |
| "learning_rate": 9.999834173050383e-05, |
| "loss": 0.0408203125, |
| "memory(GiB)": 43.05, |
| "step": 810, |
| "train_speed(iter/s)": 0.774057 |
| }, |
| { |
| "epoch": 0.5281918340894362, |
| "grad_norm": 11.25, |
| "learning_rate": 9.99978766374343e-05, |
| "loss": 0.0421875, |
| "memory(GiB)": 43.05, |
| "step": 815, |
| "train_speed(iter/s)": 0.774313 |
| }, |
| { |
| "epoch": 0.5314322747893714, |
| "grad_norm": 18.0, |
| "learning_rate": 9.999735412718742e-05, |
| "loss": 0.0634765625, |
| "memory(GiB)": 43.05, |
| "step": 820, |
| "train_speed(iter/s)": 0.773696 |
| }, |
| { |
| "epoch": 0.5346727154893065, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.999677420036327e-05, |
| "loss": 0.075390625, |
| "memory(GiB)": 43.05, |
| "step": 825, |
| "train_speed(iter/s)": 0.774409 |
| }, |
| { |
| "epoch": 0.5379131561892417, |
| "grad_norm": 11.1875, |
| "learning_rate": 9.999613685762782e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 830, |
| "train_speed(iter/s)": 0.775219 |
| }, |
| { |
| "epoch": 0.541153596889177, |
| "grad_norm": 10.625, |
| "learning_rate": 9.999544209971299e-05, |
| "loss": 0.0587890625, |
| "memory(GiB)": 43.05, |
| "step": 835, |
| "train_speed(iter/s)": 0.774369 |
| }, |
| { |
| "epoch": 0.5443940375891121, |
| "grad_norm": 14.25, |
| "learning_rate": 9.999468992741665e-05, |
| "loss": 0.05078125, |
| "memory(GiB)": 43.05, |
| "step": 840, |
| "train_speed(iter/s)": 0.771281 |
| }, |
| { |
| "epoch": 0.5476344782890473, |
| "grad_norm": 2.109375, |
| "learning_rate": 9.999388034160256e-05, |
| "loss": 0.03955078125, |
| "memory(GiB)": 43.05, |
| "step": 845, |
| "train_speed(iter/s)": 0.772797 |
| }, |
| { |
| "epoch": 0.5508749189889826, |
| "grad_norm": 1.6015625, |
| "learning_rate": 9.999301334320046e-05, |
| "loss": 0.043359375, |
| "memory(GiB)": 43.05, |
| "step": 850, |
| "train_speed(iter/s)": 0.77259 |
| }, |
| { |
| "epoch": 0.5541153596889177, |
| "grad_norm": 10.75, |
| "learning_rate": 9.999208893320602e-05, |
| "loss": 0.0517578125, |
| "memory(GiB)": 43.05, |
| "step": 855, |
| "train_speed(iter/s)": 0.772764 |
| }, |
| { |
| "epoch": 0.5573558003888529, |
| "grad_norm": 3.375, |
| "learning_rate": 9.999110711268078e-05, |
| "loss": 0.026953125, |
| "memory(GiB)": 43.05, |
| "step": 860, |
| "train_speed(iter/s)": 0.772924 |
| }, |
| { |
| "epoch": 0.560596241088788, |
| "grad_norm": 5.875, |
| "learning_rate": 9.99900678827523e-05, |
| "loss": 0.0490234375, |
| "memory(GiB)": 43.05, |
| "step": 865, |
| "train_speed(iter/s)": 0.771177 |
| }, |
| { |
| "epoch": 0.5638366817887233, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.998897124461401e-05, |
| "loss": 0.0421875, |
| "memory(GiB)": 43.05, |
| "step": 870, |
| "train_speed(iter/s)": 0.771316 |
| }, |
| { |
| "epoch": 0.5670771224886585, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.998781719952526e-05, |
| "loss": 0.0564453125, |
| "memory(GiB)": 43.05, |
| "step": 875, |
| "train_speed(iter/s)": 0.772151 |
| }, |
| { |
| "epoch": 0.5703175631885936, |
| "grad_norm": 12.8125, |
| "learning_rate": 9.998660574881138e-05, |
| "loss": 0.09287109375, |
| "memory(GiB)": 43.05, |
| "step": 880, |
| "train_speed(iter/s)": 0.772395 |
| }, |
| { |
| "epoch": 0.5735580038885288, |
| "grad_norm": 5.8125, |
| "learning_rate": 9.998533689386357e-05, |
| "loss": 0.08984375, |
| "memory(GiB)": 43.05, |
| "step": 885, |
| "train_speed(iter/s)": 0.772617 |
| }, |
| { |
| "epoch": 0.5767984445884641, |
| "grad_norm": 16.875, |
| "learning_rate": 9.998401063613897e-05, |
| "loss": 0.040625, |
| "memory(GiB)": 43.05, |
| "step": 890, |
| "train_speed(iter/s)": 0.77251 |
| }, |
| { |
| "epoch": 0.5800388852883992, |
| "grad_norm": 14.3125, |
| "learning_rate": 9.998262697716065e-05, |
| "loss": 0.089453125, |
| "memory(GiB)": 43.05, |
| "step": 895, |
| "train_speed(iter/s)": 0.771789 |
| }, |
| { |
| "epoch": 0.5832793259883344, |
| "grad_norm": 2.046875, |
| "learning_rate": 9.998118591851762e-05, |
| "loss": 0.053125, |
| "memory(GiB)": 43.05, |
| "step": 900, |
| "train_speed(iter/s)": 0.772184 |
| }, |
| { |
| "epoch": 0.5865197666882696, |
| "grad_norm": 7.28125, |
| "learning_rate": 9.997968746186472e-05, |
| "loss": 0.0642578125, |
| "memory(GiB)": 43.05, |
| "step": 905, |
| "train_speed(iter/s)": 0.773333 |
| }, |
| { |
| "epoch": 0.5897602073882048, |
| "grad_norm": 18.0, |
| "learning_rate": 9.997813160892283e-05, |
| "loss": 0.0443359375, |
| "memory(GiB)": 43.05, |
| "step": 910, |
| "train_speed(iter/s)": 0.772565 |
| }, |
| { |
| "epoch": 0.59300064808814, |
| "grad_norm": 3.5, |
| "learning_rate": 9.997651836147864e-05, |
| "loss": 0.0650390625, |
| "memory(GiB)": 43.05, |
| "step": 915, |
| "train_speed(iter/s)": 0.772992 |
| }, |
| { |
| "epoch": 0.5962410887880751, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.99748477213848e-05, |
| "loss": 0.04140625, |
| "memory(GiB)": 43.05, |
| "step": 920, |
| "train_speed(iter/s)": 0.774383 |
| }, |
| { |
| "epoch": 0.5994815294880104, |
| "grad_norm": 1.421875, |
| "learning_rate": 9.997311969055987e-05, |
| "loss": 0.0591796875, |
| "memory(GiB)": 43.05, |
| "step": 925, |
| "train_speed(iter/s)": 0.77552 |
| }, |
| { |
| "epoch": 0.6027219701879456, |
| "grad_norm": 11.625, |
| "learning_rate": 9.99713342709883e-05, |
| "loss": 0.0369140625, |
| "memory(GiB)": 43.05, |
| "step": 930, |
| "train_speed(iter/s)": 0.775589 |
| }, |
| { |
| "epoch": 0.6059624108878807, |
| "grad_norm": 1.8046875, |
| "learning_rate": 9.996949146472045e-05, |
| "loss": 0.0556640625, |
| "memory(GiB)": 43.05, |
| "step": 935, |
| "train_speed(iter/s)": 0.774582 |
| }, |
| { |
| "epoch": 0.609202851587816, |
| "grad_norm": 2.03125, |
| "learning_rate": 9.996759127387258e-05, |
| "loss": 0.0376953125, |
| "memory(GiB)": 43.05, |
| "step": 940, |
| "train_speed(iter/s)": 0.774099 |
| }, |
| { |
| "epoch": 0.6124432922877512, |
| "grad_norm": 10.5, |
| "learning_rate": 9.996563370062685e-05, |
| "loss": 0.0390625, |
| "memory(GiB)": 43.05, |
| "step": 945, |
| "train_speed(iter/s)": 0.773654 |
| }, |
| { |
| "epoch": 0.6156837329876863, |
| "grad_norm": 2.046875, |
| "learning_rate": 9.996361874723137e-05, |
| "loss": 0.04248046875, |
| "memory(GiB)": 43.05, |
| "step": 950, |
| "train_speed(iter/s)": 0.774127 |
| }, |
| { |
| "epoch": 0.6189241736876215, |
| "grad_norm": 2.484375, |
| "learning_rate": 9.996154641600004e-05, |
| "loss": 0.058203125, |
| "memory(GiB)": 43.05, |
| "step": 955, |
| "train_speed(iter/s)": 0.773992 |
| }, |
| { |
| "epoch": 0.6221646143875567, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.995941670931272e-05, |
| "loss": 0.0546875, |
| "memory(GiB)": 43.05, |
| "step": 960, |
| "train_speed(iter/s)": 0.774066 |
| }, |
| { |
| "epoch": 0.6254050550874919, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.995722962961517e-05, |
| "loss": 0.05546875, |
| "memory(GiB)": 43.05, |
| "step": 965, |
| "train_speed(iter/s)": 0.774572 |
| }, |
| { |
| "epoch": 0.6286454957874271, |
| "grad_norm": 8.75, |
| "learning_rate": 9.9954985179419e-05, |
| "loss": 0.0599609375, |
| "memory(GiB)": 43.05, |
| "step": 970, |
| "train_speed(iter/s)": 0.774691 |
| }, |
| { |
| "epoch": 0.6318859364873622, |
| "grad_norm": 11.125, |
| "learning_rate": 9.995268336130173e-05, |
| "loss": 0.0427734375, |
| "memory(GiB)": 43.05, |
| "step": 975, |
| "train_speed(iter/s)": 0.774947 |
| }, |
| { |
| "epoch": 0.6351263771872975, |
| "grad_norm": 2.0, |
| "learning_rate": 9.995032417790673e-05, |
| "loss": 0.0779296875, |
| "memory(GiB)": 43.05, |
| "step": 980, |
| "train_speed(iter/s)": 0.774809 |
| }, |
| { |
| "epoch": 0.6383668178872327, |
| "grad_norm": 5.96875, |
| "learning_rate": 9.994790763194329e-05, |
| "loss": 0.0666015625, |
| "memory(GiB)": 43.05, |
| "step": 985, |
| "train_speed(iter/s)": 0.774857 |
| }, |
| { |
| "epoch": 0.6416072585871678, |
| "grad_norm": 3.703125, |
| "learning_rate": 9.994543372618654e-05, |
| "loss": 0.0865234375, |
| "memory(GiB)": 43.05, |
| "step": 990, |
| "train_speed(iter/s)": 0.77391 |
| }, |
| { |
| "epoch": 0.6448476992871031, |
| "grad_norm": 11.375, |
| "learning_rate": 9.994290246347751e-05, |
| "loss": 0.04453125, |
| "memory(GiB)": 43.05, |
| "step": 995, |
| "train_speed(iter/s)": 0.774097 |
| }, |
| { |
| "epoch": 0.6480881399870383, |
| "grad_norm": 8.8125, |
| "learning_rate": 9.994031384672306e-05, |
| "loss": 0.052734375, |
| "memory(GiB)": 43.05, |
| "step": 1000, |
| "train_speed(iter/s)": 0.773134 |
| }, |
| { |
| "epoch": 0.6513285806869734, |
| "grad_norm": 0.72265625, |
| "learning_rate": 9.993766787889596e-05, |
| "loss": 0.04765625, |
| "memory(GiB)": 43.05, |
| "step": 1005, |
| "train_speed(iter/s)": 0.773592 |
| }, |
| { |
| "epoch": 0.6545690213869086, |
| "grad_norm": 1.8046875, |
| "learning_rate": 9.99349645630348e-05, |
| "loss": 0.04921875, |
| "memory(GiB)": 43.05, |
| "step": 1010, |
| "train_speed(iter/s)": 0.77459 |
| }, |
| { |
| "epoch": 0.6578094620868438, |
| "grad_norm": 11.75, |
| "learning_rate": 9.993220390224405e-05, |
| "loss": 0.040625, |
| "memory(GiB)": 43.05, |
| "step": 1015, |
| "train_speed(iter/s)": 0.774071 |
| }, |
| { |
| "epoch": 0.661049902786779, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.992938589969405e-05, |
| "loss": 0.0345703125, |
| "memory(GiB)": 43.05, |
| "step": 1020, |
| "train_speed(iter/s)": 0.774983 |
| }, |
| { |
| "epoch": 0.6642903434867142, |
| "grad_norm": 0.7421875, |
| "learning_rate": 9.992651055862094e-05, |
| "loss": 0.04921875, |
| "memory(GiB)": 43.05, |
| "step": 1025, |
| "train_speed(iter/s)": 0.775407 |
| }, |
| { |
| "epoch": 0.6675307841866494, |
| "grad_norm": 10.625, |
| "learning_rate": 9.992357788232677e-05, |
| "loss": 0.07265625, |
| "memory(GiB)": 43.05, |
| "step": 1030, |
| "train_speed(iter/s)": 0.775489 |
| }, |
| { |
| "epoch": 0.6707712248865846, |
| "grad_norm": 11.0625, |
| "learning_rate": 9.992058787417941e-05, |
| "loss": 0.0412109375, |
| "memory(GiB)": 43.05, |
| "step": 1035, |
| "train_speed(iter/s)": 0.77588 |
| }, |
| { |
| "epoch": 0.6740116655865198, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.991754053761253e-05, |
| "loss": 0.04052734375, |
| "memory(GiB)": 43.05, |
| "step": 1040, |
| "train_speed(iter/s)": 0.775803 |
| }, |
| { |
| "epoch": 0.6772521062864549, |
| "grad_norm": 1.4609375, |
| "learning_rate": 9.991443587612567e-05, |
| "loss": 0.0552734375, |
| "memory(GiB)": 43.05, |
| "step": 1045, |
| "train_speed(iter/s)": 0.775196 |
| }, |
| { |
| "epoch": 0.6804925469863902, |
| "grad_norm": 7.34375, |
| "learning_rate": 9.991127389328423e-05, |
| "loss": 0.0451171875, |
| "memory(GiB)": 43.05, |
| "step": 1050, |
| "train_speed(iter/s)": 0.776071 |
| }, |
| { |
| "epoch": 0.6837329876863253, |
| "grad_norm": 7.625, |
| "learning_rate": 9.990805459271936e-05, |
| "loss": 0.048828125, |
| "memory(GiB)": 43.05, |
| "step": 1055, |
| "train_speed(iter/s)": 0.776 |
| }, |
| { |
| "epoch": 0.6869734283862605, |
| "grad_norm": 0.94921875, |
| "learning_rate": 9.990477797812814e-05, |
| "loss": 0.0818359375, |
| "memory(GiB)": 43.05, |
| "step": 1060, |
| "train_speed(iter/s)": 0.776054 |
| }, |
| { |
| "epoch": 0.6902138690861958, |
| "grad_norm": 1.75, |
| "learning_rate": 9.990144405327336e-05, |
| "loss": 0.0416015625, |
| "memory(GiB)": 43.05, |
| "step": 1065, |
| "train_speed(iter/s)": 0.775956 |
| }, |
| { |
| "epoch": 0.6934543097861309, |
| "grad_norm": 16.25, |
| "learning_rate": 9.98980528219837e-05, |
| "loss": 0.0697265625, |
| "memory(GiB)": 43.05, |
| "step": 1070, |
| "train_speed(iter/s)": 0.776794 |
| }, |
| { |
| "epoch": 0.6966947504860661, |
| "grad_norm": 1.0625, |
| "learning_rate": 9.989460428815362e-05, |
| "loss": 0.0607421875, |
| "memory(GiB)": 43.05, |
| "step": 1075, |
| "train_speed(iter/s)": 0.777 |
| }, |
| { |
| "epoch": 0.6999351911860013, |
| "grad_norm": 13.0625, |
| "learning_rate": 9.989109845574336e-05, |
| "loss": 0.0482421875, |
| "memory(GiB)": 43.05, |
| "step": 1080, |
| "train_speed(iter/s)": 0.776921 |
| }, |
| { |
| "epoch": 0.7031756318859365, |
| "grad_norm": 12.3125, |
| "learning_rate": 9.988753532877904e-05, |
| "loss": 0.059375, |
| "memory(GiB)": 43.05, |
| "step": 1085, |
| "train_speed(iter/s)": 0.776292 |
| }, |
| { |
| "epoch": 0.7064160725858717, |
| "grad_norm": 1.6484375, |
| "learning_rate": 9.98839149113525e-05, |
| "loss": 0.03642578125, |
| "memory(GiB)": 43.05, |
| "step": 1090, |
| "train_speed(iter/s)": 0.776828 |
| }, |
| { |
| "epoch": 0.7096565132858069, |
| "grad_norm": 10.3125, |
| "learning_rate": 9.988023720762138e-05, |
| "loss": 0.05390625, |
| "memory(GiB)": 43.05, |
| "step": 1095, |
| "train_speed(iter/s)": 0.776747 |
| }, |
| { |
| "epoch": 0.712896953985742, |
| "grad_norm": 12.875, |
| "learning_rate": 9.987650222180917e-05, |
| "loss": 0.046875, |
| "memory(GiB)": 43.05, |
| "step": 1100, |
| "train_speed(iter/s)": 0.774409 |
| }, |
| { |
| "epoch": 0.7161373946856773, |
| "grad_norm": 5.53125, |
| "learning_rate": 9.987270995820508e-05, |
| "loss": 0.0396484375, |
| "memory(GiB)": 43.05, |
| "step": 1105, |
| "train_speed(iter/s)": 0.774632 |
| }, |
| { |
| "epoch": 0.7193778353856124, |
| "grad_norm": 3.8125, |
| "learning_rate": 9.986886042116413e-05, |
| "loss": 0.04638671875, |
| "memory(GiB)": 43.05, |
| "step": 1110, |
| "train_speed(iter/s)": 0.774254 |
| }, |
| { |
| "epoch": 0.7226182760855476, |
| "grad_norm": 1.5546875, |
| "learning_rate": 9.986495361510705e-05, |
| "loss": 0.04453125, |
| "memory(GiB)": 43.05, |
| "step": 1115, |
| "train_speed(iter/s)": 0.774629 |
| }, |
| { |
| "epoch": 0.7258587167854829, |
| "grad_norm": 11.125, |
| "learning_rate": 9.986098954452043e-05, |
| "loss": 0.053515625, |
| "memory(GiB)": 43.05, |
| "step": 1120, |
| "train_speed(iter/s)": 0.775487 |
| }, |
| { |
| "epoch": 0.729099157485418, |
| "grad_norm": 12.8125, |
| "learning_rate": 9.985696821395659e-05, |
| "loss": 0.0421875, |
| "memory(GiB)": 43.05, |
| "step": 1125, |
| "train_speed(iter/s)": 0.776194 |
| }, |
| { |
| "epoch": 0.7323395981853532, |
| "grad_norm": 0.9375, |
| "learning_rate": 9.985288962803354e-05, |
| "loss": 0.0541015625, |
| "memory(GiB)": 43.05, |
| "step": 1130, |
| "train_speed(iter/s)": 0.776858 |
| }, |
| { |
| "epoch": 0.7355800388852884, |
| "grad_norm": 15.625, |
| "learning_rate": 9.984875379143515e-05, |
| "loss": 0.0466796875, |
| "memory(GiB)": 43.05, |
| "step": 1135, |
| "train_speed(iter/s)": 0.776744 |
| }, |
| { |
| "epoch": 0.7388204795852236, |
| "grad_norm": 12.375, |
| "learning_rate": 9.984456070891094e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 1140, |
| "train_speed(iter/s)": 0.777105 |
| }, |
| { |
| "epoch": 0.7420609202851588, |
| "grad_norm": 13.75, |
| "learning_rate": 9.984031038527628e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 1145, |
| "train_speed(iter/s)": 0.775913 |
| }, |
| { |
| "epoch": 0.7453013609850939, |
| "grad_norm": 10.8125, |
| "learning_rate": 9.983600282541213e-05, |
| "loss": 0.0427734375, |
| "memory(GiB)": 43.05, |
| "step": 1150, |
| "train_speed(iter/s)": 0.776782 |
| }, |
| { |
| "epoch": 0.7485418016850292, |
| "grad_norm": 0.546875, |
| "learning_rate": 9.98316380342653e-05, |
| "loss": 0.0521484375, |
| "memory(GiB)": 43.05, |
| "step": 1155, |
| "train_speed(iter/s)": 0.777432 |
| }, |
| { |
| "epoch": 0.7517822423849644, |
| "grad_norm": 4.25, |
| "learning_rate": 9.98272160168483e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 1160, |
| "train_speed(iter/s)": 0.77706 |
| }, |
| { |
| "epoch": 0.7550226830848995, |
| "grad_norm": 13.375, |
| "learning_rate": 9.982273677823928e-05, |
| "loss": 0.0568359375, |
| "memory(GiB)": 43.05, |
| "step": 1165, |
| "train_speed(iter/s)": 0.777442 |
| }, |
| { |
| "epoch": 0.7582631237848347, |
| "grad_norm": 9.8125, |
| "learning_rate": 9.981820032358222e-05, |
| "loss": 0.055859375, |
| "memory(GiB)": 43.05, |
| "step": 1170, |
| "train_speed(iter/s)": 0.777855 |
| }, |
| { |
| "epoch": 0.76150356448477, |
| "grad_norm": 10.125, |
| "learning_rate": 9.981360665808675e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 1175, |
| "train_speed(iter/s)": 0.778025 |
| }, |
| { |
| "epoch": 0.7647440051847051, |
| "grad_norm": 2.953125, |
| "learning_rate": 9.980895578702815e-05, |
| "loss": 0.06181640625, |
| "memory(GiB)": 43.05, |
| "step": 1180, |
| "train_speed(iter/s)": 0.778134 |
| }, |
| { |
| "epoch": 0.7679844458846403, |
| "grad_norm": 7.0, |
| "learning_rate": 9.980424771574749e-05, |
| "loss": 0.0611328125, |
| "memory(GiB)": 43.05, |
| "step": 1185, |
| "train_speed(iter/s)": 0.778486 |
| }, |
| { |
| "epoch": 0.7712248865845756, |
| "grad_norm": 11.6875, |
| "learning_rate": 9.979948244965147e-05, |
| "loss": 0.048046875, |
| "memory(GiB)": 43.05, |
| "step": 1190, |
| "train_speed(iter/s)": 0.777604 |
| }, |
| { |
| "epoch": 0.7744653272845107, |
| "grad_norm": 2.265625, |
| "learning_rate": 9.979465999421247e-05, |
| "loss": 0.04833984375, |
| "memory(GiB)": 43.05, |
| "step": 1195, |
| "train_speed(iter/s)": 0.776874 |
| }, |
| { |
| "epoch": 0.7777057679844459, |
| "grad_norm": 7.34375, |
| "learning_rate": 9.978978035496858e-05, |
| "loss": 0.05361328125, |
| "memory(GiB)": 43.05, |
| "step": 1200, |
| "train_speed(iter/s)": 0.776012 |
| }, |
| { |
| "epoch": 0.780946208684381, |
| "grad_norm": 0.81640625, |
| "learning_rate": 9.978484353752354e-05, |
| "loss": 0.0462890625, |
| "memory(GiB)": 43.05, |
| "step": 1205, |
| "train_speed(iter/s)": 0.77616 |
| }, |
| { |
| "epoch": 0.7841866493843163, |
| "grad_norm": 1.0703125, |
| "learning_rate": 9.977984954754674e-05, |
| "loss": 0.04921875, |
| "memory(GiB)": 43.05, |
| "step": 1210, |
| "train_speed(iter/s)": 0.776525 |
| }, |
| { |
| "epoch": 0.7874270900842515, |
| "grad_norm": 2.515625, |
| "learning_rate": 9.977479839077326e-05, |
| "loss": 0.036328125, |
| "memory(GiB)": 43.05, |
| "step": 1215, |
| "train_speed(iter/s)": 0.777313 |
| }, |
| { |
| "epoch": 0.7906675307841866, |
| "grad_norm": 16.125, |
| "learning_rate": 9.976969007300378e-05, |
| "loss": 0.0369140625, |
| "memory(GiB)": 43.05, |
| "step": 1220, |
| "train_speed(iter/s)": 0.776634 |
| }, |
| { |
| "epoch": 0.7939079714841218, |
| "grad_norm": 12.75, |
| "learning_rate": 9.976452460010468e-05, |
| "loss": 0.0384765625, |
| "memory(GiB)": 43.05, |
| "step": 1225, |
| "train_speed(iter/s)": 0.776792 |
| }, |
| { |
| "epoch": 0.7971484121840571, |
| "grad_norm": 3.0, |
| "learning_rate": 9.975930197800794e-05, |
| "loss": 0.046875, |
| "memory(GiB)": 43.05, |
| "step": 1230, |
| "train_speed(iter/s)": 0.777111 |
| }, |
| { |
| "epoch": 0.8003888528839922, |
| "grad_norm": 10.25, |
| "learning_rate": 9.975402221271117e-05, |
| "loss": 0.0427734375, |
| "memory(GiB)": 43.05, |
| "step": 1235, |
| "train_speed(iter/s)": 0.777639 |
| }, |
| { |
| "epoch": 0.8036292935839274, |
| "grad_norm": 0.875, |
| "learning_rate": 9.974868531027761e-05, |
| "loss": 0.0509765625, |
| "memory(GiB)": 43.05, |
| "step": 1240, |
| "train_speed(iter/s)": 0.777899 |
| }, |
| { |
| "epoch": 0.8068697342838627, |
| "grad_norm": 15.9375, |
| "learning_rate": 9.974329127683614e-05, |
| "loss": 0.07109375, |
| "memory(GiB)": 43.05, |
| "step": 1245, |
| "train_speed(iter/s)": 0.777904 |
| }, |
| { |
| "epoch": 0.8101101749837978, |
| "grad_norm": 12.9375, |
| "learning_rate": 9.973784011858123e-05, |
| "loss": 0.060546875, |
| "memory(GiB)": 43.05, |
| "step": 1250, |
| "train_speed(iter/s)": 0.777258 |
| }, |
| { |
| "epoch": 0.813350615683733, |
| "grad_norm": 10.0625, |
| "learning_rate": 9.97323318417729e-05, |
| "loss": 0.0505859375, |
| "memory(GiB)": 43.05, |
| "step": 1255, |
| "train_speed(iter/s)": 0.776907 |
| }, |
| { |
| "epoch": 0.8165910563836681, |
| "grad_norm": 2.03125, |
| "learning_rate": 9.972676645273688e-05, |
| "loss": 0.037890625, |
| "memory(GiB)": 43.05, |
| "step": 1260, |
| "train_speed(iter/s)": 0.776807 |
| }, |
| { |
| "epoch": 0.8198314970836034, |
| "grad_norm": 12.9375, |
| "learning_rate": 9.972114395786436e-05, |
| "loss": 0.048828125, |
| "memory(GiB)": 43.05, |
| "step": 1265, |
| "train_speed(iter/s)": 0.77681 |
| }, |
| { |
| "epoch": 0.8230719377835386, |
| "grad_norm": 12.4375, |
| "learning_rate": 9.971546436361221e-05, |
| "loss": 0.05, |
| "memory(GiB)": 43.05, |
| "step": 1270, |
| "train_speed(iter/s)": 0.775909 |
| }, |
| { |
| "epoch": 0.8263123784834737, |
| "grad_norm": 4.15625, |
| "learning_rate": 9.970972767650281e-05, |
| "loss": 0.04521484375, |
| "memory(GiB)": 43.05, |
| "step": 1275, |
| "train_speed(iter/s)": 0.775947 |
| }, |
| { |
| "epoch": 0.829552819183409, |
| "grad_norm": 4.875, |
| "learning_rate": 9.970393390312414e-05, |
| "loss": 0.06591796875, |
| "memory(GiB)": 43.05, |
| "step": 1280, |
| "train_speed(iter/s)": 0.77626 |
| }, |
| { |
| "epoch": 0.8327932598833442, |
| "grad_norm": 7.125, |
| "learning_rate": 9.969808305012971e-05, |
| "loss": 0.0515625, |
| "memory(GiB)": 43.05, |
| "step": 1285, |
| "train_speed(iter/s)": 0.776127 |
| }, |
| { |
| "epoch": 0.8360337005832793, |
| "grad_norm": 10.8125, |
| "learning_rate": 9.96921751242386e-05, |
| "loss": 0.0662109375, |
| "memory(GiB)": 43.05, |
| "step": 1290, |
| "train_speed(iter/s)": 0.775879 |
| }, |
| { |
| "epoch": 0.8392741412832145, |
| "grad_norm": 1.4140625, |
| "learning_rate": 9.968621013223544e-05, |
| "loss": 0.0544921875, |
| "memory(GiB)": 43.05, |
| "step": 1295, |
| "train_speed(iter/s)": 0.775725 |
| }, |
| { |
| "epoch": 0.8425145819831497, |
| "grad_norm": 6.21875, |
| "learning_rate": 9.968018808097039e-05, |
| "loss": 0.043359375, |
| "memory(GiB)": 43.05, |
| "step": 1300, |
| "train_speed(iter/s)": 0.775627 |
| }, |
| { |
| "epoch": 0.8457550226830849, |
| "grad_norm": 1.609375, |
| "learning_rate": 9.967410897735909e-05, |
| "loss": 0.058203125, |
| "memory(GiB)": 43.05, |
| "step": 1305, |
| "train_speed(iter/s)": 0.775025 |
| }, |
| { |
| "epoch": 0.8489954633830201, |
| "grad_norm": 4.53125, |
| "learning_rate": 9.966797282838274e-05, |
| "loss": 0.0521484375, |
| "memory(GiB)": 43.05, |
| "step": 1310, |
| "train_speed(iter/s)": 0.775352 |
| }, |
| { |
| "epoch": 0.8522359040829552, |
| "grad_norm": 17.75, |
| "learning_rate": 9.966177964108809e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 1315, |
| "train_speed(iter/s)": 0.77452 |
| }, |
| { |
| "epoch": 0.8554763447828905, |
| "grad_norm": 6.0625, |
| "learning_rate": 9.96555294225873e-05, |
| "loss": 0.034375, |
| "memory(GiB)": 43.05, |
| "step": 1320, |
| "train_speed(iter/s)": 0.775285 |
| }, |
| { |
| "epoch": 0.8587167854828257, |
| "grad_norm": 11.375, |
| "learning_rate": 9.964922218005812e-05, |
| "loss": 0.045703125, |
| "memory(GiB)": 43.05, |
| "step": 1325, |
| "train_speed(iter/s)": 0.775617 |
| }, |
| { |
| "epoch": 0.8619572261827608, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.964285792074368e-05, |
| "loss": 0.0630859375, |
| "memory(GiB)": 43.05, |
| "step": 1330, |
| "train_speed(iter/s)": 0.775421 |
| }, |
| { |
| "epoch": 0.8651976668826961, |
| "grad_norm": 12.9375, |
| "learning_rate": 9.96364366519527e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 1335, |
| "train_speed(iter/s)": 0.775758 |
| }, |
| { |
| "epoch": 0.8684381075826313, |
| "grad_norm": 3.46875, |
| "learning_rate": 9.962995838105929e-05, |
| "loss": 0.0572265625, |
| "memory(GiB)": 43.05, |
| "step": 1340, |
| "train_speed(iter/s)": 0.774714 |
| }, |
| { |
| "epoch": 0.8716785482825664, |
| "grad_norm": 3.90625, |
| "learning_rate": 9.962342311550305e-05, |
| "loss": 0.042578125, |
| "memory(GiB)": 43.05, |
| "step": 1345, |
| "train_speed(iter/s)": 0.773981 |
| }, |
| { |
| "epoch": 0.8749189889825016, |
| "grad_norm": 3.671875, |
| "learning_rate": 9.961683086278903e-05, |
| "loss": 0.0626953125, |
| "memory(GiB)": 43.05, |
| "step": 1350, |
| "train_speed(iter/s)": 0.773107 |
| }, |
| { |
| "epoch": 0.8781594296824368, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.961018163048773e-05, |
| "loss": 0.0419921875, |
| "memory(GiB)": 43.05, |
| "step": 1355, |
| "train_speed(iter/s)": 0.772617 |
| }, |
| { |
| "epoch": 0.881399870382372, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.960347542623504e-05, |
| "loss": 0.0521484375, |
| "memory(GiB)": 43.05, |
| "step": 1360, |
| "train_speed(iter/s)": 0.773595 |
| }, |
| { |
| "epoch": 0.8846403110823072, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.959671225773237e-05, |
| "loss": 0.0345703125, |
| "memory(GiB)": 43.05, |
| "step": 1365, |
| "train_speed(iter/s)": 0.774588 |
| }, |
| { |
| "epoch": 0.8878807517822424, |
| "grad_norm": 3.515625, |
| "learning_rate": 9.958989213274646e-05, |
| "loss": 0.05546875, |
| "memory(GiB)": 43.05, |
| "step": 1370, |
| "train_speed(iter/s)": 0.77517 |
| }, |
| { |
| "epoch": 0.8911211924821776, |
| "grad_norm": 1.1328125, |
| "learning_rate": 9.958301505910948e-05, |
| "loss": 0.0599609375, |
| "memory(GiB)": 43.05, |
| "step": 1375, |
| "train_speed(iter/s)": 0.774153 |
| }, |
| { |
| "epoch": 0.8943616331821128, |
| "grad_norm": 11.125, |
| "learning_rate": 9.957608104471903e-05, |
| "loss": 0.0654296875, |
| "memory(GiB)": 43.05, |
| "step": 1380, |
| "train_speed(iter/s)": 0.77468 |
| }, |
| { |
| "epoch": 0.8976020738820479, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.956909009753807e-05, |
| "loss": 0.0630859375, |
| "memory(GiB)": 43.05, |
| "step": 1385, |
| "train_speed(iter/s)": 0.77371 |
| }, |
| { |
| "epoch": 0.9008425145819832, |
| "grad_norm": 4.9375, |
| "learning_rate": 9.956204222559495e-05, |
| "loss": 0.045703125, |
| "memory(GiB)": 43.05, |
| "step": 1390, |
| "train_speed(iter/s)": 0.773416 |
| }, |
| { |
| "epoch": 0.9040829552819183, |
| "grad_norm": 9.8125, |
| "learning_rate": 9.955493743698339e-05, |
| "loss": 0.047265625, |
| "memory(GiB)": 43.05, |
| "step": 1395, |
| "train_speed(iter/s)": 0.774051 |
| }, |
| { |
| "epoch": 0.9073233959818535, |
| "grad_norm": 7.96875, |
| "learning_rate": 9.954777573986247e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 1400, |
| "train_speed(iter/s)": 0.773786 |
| }, |
| { |
| "epoch": 0.9105638366817888, |
| "grad_norm": 2.65625, |
| "learning_rate": 9.954055714245665e-05, |
| "loss": 0.0419921875, |
| "memory(GiB)": 43.05, |
| "step": 1405, |
| "train_speed(iter/s)": 0.774347 |
| }, |
| { |
| "epoch": 0.9138042773817239, |
| "grad_norm": 9.3125, |
| "learning_rate": 9.953328165305568e-05, |
| "loss": 0.0462890625, |
| "memory(GiB)": 43.05, |
| "step": 1410, |
| "train_speed(iter/s)": 0.774895 |
| }, |
| { |
| "epoch": 0.9170447180816591, |
| "grad_norm": 13.5625, |
| "learning_rate": 9.95259492800147e-05, |
| "loss": 0.0556640625, |
| "memory(GiB)": 43.05, |
| "step": 1415, |
| "train_speed(iter/s)": 0.774985 |
| }, |
| { |
| "epoch": 0.9202851587815943, |
| "grad_norm": 15.6875, |
| "learning_rate": 9.951856003175414e-05, |
| "loss": 0.0466796875, |
| "memory(GiB)": 43.05, |
| "step": 1420, |
| "train_speed(iter/s)": 0.77452 |
| }, |
| { |
| "epoch": 0.9235255994815295, |
| "grad_norm": 3.265625, |
| "learning_rate": 9.951111391675976e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 1425, |
| "train_speed(iter/s)": 0.774202 |
| }, |
| { |
| "epoch": 0.9267660401814647, |
| "grad_norm": 2.8125, |
| "learning_rate": 9.950361094358263e-05, |
| "loss": 0.0607421875, |
| "memory(GiB)": 43.05, |
| "step": 1430, |
| "train_speed(iter/s)": 0.774296 |
| }, |
| { |
| "epoch": 0.9300064808813999, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.949605112083909e-05, |
| "loss": 0.03828125, |
| "memory(GiB)": 43.05, |
| "step": 1435, |
| "train_speed(iter/s)": 0.775216 |
| }, |
| { |
| "epoch": 0.933246921581335, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.948843445721079e-05, |
| "loss": 0.04453125, |
| "memory(GiB)": 43.05, |
| "step": 1440, |
| "train_speed(iter/s)": 0.774922 |
| }, |
| { |
| "epoch": 0.9364873622812703, |
| "grad_norm": 3.765625, |
| "learning_rate": 9.948076096144463e-05, |
| "loss": 0.0392578125, |
| "memory(GiB)": 43.05, |
| "step": 1445, |
| "train_speed(iter/s)": 0.774879 |
| }, |
| { |
| "epoch": 0.9397278029812054, |
| "grad_norm": 12.4375, |
| "learning_rate": 9.947303064235283e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 1450, |
| "train_speed(iter/s)": 0.775133 |
| }, |
| { |
| "epoch": 0.9429682436811406, |
| "grad_norm": 12.875, |
| "learning_rate": 9.946524350881282e-05, |
| "loss": 0.046484375, |
| "memory(GiB)": 43.05, |
| "step": 1455, |
| "train_speed(iter/s)": 0.774842 |
| }, |
| { |
| "epoch": 0.9462086843810759, |
| "grad_norm": 6.65625, |
| "learning_rate": 9.945739956976725e-05, |
| "loss": 0.0421875, |
| "memory(GiB)": 43.05, |
| "step": 1460, |
| "train_speed(iter/s)": 0.774929 |
| }, |
| { |
| "epoch": 0.949449125081011, |
| "grad_norm": 0.4375, |
| "learning_rate": 9.944949883422408e-05, |
| "loss": 0.0283203125, |
| "memory(GiB)": 43.05, |
| "step": 1465, |
| "train_speed(iter/s)": 0.775231 |
| }, |
| { |
| "epoch": 0.9526895657809462, |
| "grad_norm": 4.1875, |
| "learning_rate": 9.944154131125642e-05, |
| "loss": 0.050390625, |
| "memory(GiB)": 43.05, |
| "step": 1470, |
| "train_speed(iter/s)": 0.775942 |
| }, |
| { |
| "epoch": 0.9559300064808814, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.943352701000266e-05, |
| "loss": 0.0587890625, |
| "memory(GiB)": 43.05, |
| "step": 1475, |
| "train_speed(iter/s)": 0.776016 |
| }, |
| { |
| "epoch": 0.9591704471808166, |
| "grad_norm": 1.1015625, |
| "learning_rate": 9.942545593966636e-05, |
| "loss": 0.0517578125, |
| "memory(GiB)": 43.05, |
| "step": 1480, |
| "train_speed(iter/s)": 0.776078 |
| }, |
| { |
| "epoch": 0.9624108878807518, |
| "grad_norm": 14.25, |
| "learning_rate": 9.941732810951626e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 1485, |
| "train_speed(iter/s)": 0.775252 |
| }, |
| { |
| "epoch": 0.9656513285806869, |
| "grad_norm": 2.8125, |
| "learning_rate": 9.940914352888628e-05, |
| "loss": 0.073828125, |
| "memory(GiB)": 43.05, |
| "step": 1490, |
| "train_speed(iter/s)": 0.774903 |
| }, |
| { |
| "epoch": 0.9688917692806222, |
| "grad_norm": 13.5, |
| "learning_rate": 9.940090220717556e-05, |
| "loss": 0.0498046875, |
| "memory(GiB)": 43.05, |
| "step": 1495, |
| "train_speed(iter/s)": 0.775228 |
| }, |
| { |
| "epoch": 0.9721322099805574, |
| "grad_norm": 1.5625, |
| "learning_rate": 9.939260415384837e-05, |
| "loss": 0.040234375, |
| "memory(GiB)": 43.05, |
| "step": 1500, |
| "train_speed(iter/s)": 0.773932 |
| }, |
| { |
| "epoch": 0.9753726506804925, |
| "grad_norm": 1.25, |
| "learning_rate": 9.93842493784341e-05, |
| "loss": 0.0744140625, |
| "memory(GiB)": 43.05, |
| "step": 1505, |
| "train_speed(iter/s)": 0.77458 |
| }, |
| { |
| "epoch": 0.9786130913804277, |
| "grad_norm": 14.4375, |
| "learning_rate": 9.937583789052735e-05, |
| "loss": 0.04296875, |
| "memory(GiB)": 43.05, |
| "step": 1510, |
| "train_speed(iter/s)": 0.774348 |
| }, |
| { |
| "epoch": 0.981853532080363, |
| "grad_norm": 10.6875, |
| "learning_rate": 9.936736969978778e-05, |
| "loss": 0.0515625, |
| "memory(GiB)": 43.05, |
| "step": 1515, |
| "train_speed(iter/s)": 0.774064 |
| }, |
| { |
| "epoch": 0.9850939727802981, |
| "grad_norm": 9.0, |
| "learning_rate": 9.93588448159402e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 1520, |
| "train_speed(iter/s)": 0.774565 |
| }, |
| { |
| "epoch": 0.9883344134802333, |
| "grad_norm": 6.71875, |
| "learning_rate": 9.935026324877455e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 1525, |
| "train_speed(iter/s)": 0.774861 |
| }, |
| { |
| "epoch": 0.9915748541801686, |
| "grad_norm": 1.265625, |
| "learning_rate": 9.93416250081458e-05, |
| "loss": 0.055859375, |
| "memory(GiB)": 43.05, |
| "step": 1530, |
| "train_speed(iter/s)": 0.774348 |
| }, |
| { |
| "epoch": 0.9948152948801037, |
| "grad_norm": 3.96875, |
| "learning_rate": 9.933293010397403e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 1535, |
| "train_speed(iter/s)": 0.773239 |
| }, |
| { |
| "epoch": 0.9980557355800389, |
| "grad_norm": 8.875, |
| "learning_rate": 9.932417854624444e-05, |
| "loss": 0.04375, |
| "memory(GiB)": 43.05, |
| "step": 1540, |
| "train_speed(iter/s)": 0.77374 |
| }, |
| { |
| "epoch": 1.0012961762799741, |
| "grad_norm": 2.375, |
| "learning_rate": 9.931537034500723e-05, |
| "loss": 0.0490234375, |
| "memory(GiB)": 43.05, |
| "step": 1545, |
| "train_speed(iter/s)": 0.774118 |
| }, |
| { |
| "epoch": 1.0045366169799093, |
| "grad_norm": 3.984375, |
| "learning_rate": 9.930650551037769e-05, |
| "loss": 0.0484375, |
| "memory(GiB)": 43.05, |
| "step": 1550, |
| "train_speed(iter/s)": 0.77415 |
| }, |
| { |
| "epoch": 1.0077770576798444, |
| "grad_norm": 9.875, |
| "learning_rate": 9.929758405253608e-05, |
| "loss": 0.0498046875, |
| "memory(GiB)": 43.05, |
| "step": 1555, |
| "train_speed(iter/s)": 0.774997 |
| }, |
| { |
| "epoch": 1.0110174983797797, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.928860598172778e-05, |
| "loss": 0.03359375, |
| "memory(GiB)": 43.05, |
| "step": 1560, |
| "train_speed(iter/s)": 0.775007 |
| }, |
| { |
| "epoch": 1.0142579390797148, |
| "grad_norm": 9.8125, |
| "learning_rate": 9.927957130826313e-05, |
| "loss": 0.025, |
| "memory(GiB)": 43.05, |
| "step": 1565, |
| "train_speed(iter/s)": 0.775272 |
| }, |
| { |
| "epoch": 1.01749837977965, |
| "grad_norm": 3.625, |
| "learning_rate": 9.927048004251747e-05, |
| "loss": 0.048828125, |
| "memory(GiB)": 43.05, |
| "step": 1570, |
| "train_speed(iter/s)": 0.774385 |
| }, |
| { |
| "epoch": 1.0207388204795853, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.926133219493115e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 1575, |
| "train_speed(iter/s)": 0.774424 |
| }, |
| { |
| "epoch": 1.0239792611795204, |
| "grad_norm": 0.78125, |
| "learning_rate": 9.925212777600946e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 1580, |
| "train_speed(iter/s)": 0.774282 |
| }, |
| { |
| "epoch": 1.0272197018794555, |
| "grad_norm": 13.375, |
| "learning_rate": 9.92428667963227e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 1585, |
| "train_speed(iter/s)": 0.773988 |
| }, |
| { |
| "epoch": 1.030460142579391, |
| "grad_norm": 0.76171875, |
| "learning_rate": 9.923354926650614e-05, |
| "loss": 0.040234375, |
| "memory(GiB)": 43.05, |
| "step": 1590, |
| "train_speed(iter/s)": 0.773749 |
| }, |
| { |
| "epoch": 1.033700583279326, |
| "grad_norm": 18.375, |
| "learning_rate": 9.922417519725992e-05, |
| "loss": 0.091015625, |
| "memory(GiB)": 43.05, |
| "step": 1595, |
| "train_speed(iter/s)": 0.773458 |
| }, |
| { |
| "epoch": 1.0369410239792611, |
| "grad_norm": 10.75, |
| "learning_rate": 9.921474459934917e-05, |
| "loss": 0.0474609375, |
| "memory(GiB)": 43.05, |
| "step": 1600, |
| "train_speed(iter/s)": 0.772849 |
| }, |
| { |
| "epoch": 1.0401814646791965, |
| "grad_norm": 12.375, |
| "learning_rate": 9.920525748360389e-05, |
| "loss": 0.0490234375, |
| "memory(GiB)": 43.05, |
| "step": 1605, |
| "train_speed(iter/s)": 0.772784 |
| }, |
| { |
| "epoch": 1.0434219053791316, |
| "grad_norm": 9.8125, |
| "learning_rate": 9.919571386091904e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 1610, |
| "train_speed(iter/s)": 0.772857 |
| }, |
| { |
| "epoch": 1.0466623460790667, |
| "grad_norm": 7.875, |
| "learning_rate": 9.918611374225442e-05, |
| "loss": 0.0560546875, |
| "memory(GiB)": 43.05, |
| "step": 1615, |
| "train_speed(iter/s)": 0.772536 |
| }, |
| { |
| "epoch": 1.0499027867790018, |
| "grad_norm": 3.859375, |
| "learning_rate": 9.917645713863475e-05, |
| "loss": 0.0421875, |
| "memory(GiB)": 43.05, |
| "step": 1620, |
| "train_speed(iter/s)": 0.771715 |
| }, |
| { |
| "epoch": 1.0531432274789372, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.916674406114959e-05, |
| "loss": 0.0455078125, |
| "memory(GiB)": 43.05, |
| "step": 1625, |
| "train_speed(iter/s)": 0.771869 |
| }, |
| { |
| "epoch": 1.0563836681788723, |
| "grad_norm": 10.5625, |
| "learning_rate": 9.915697452095337e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 1630, |
| "train_speed(iter/s)": 0.772117 |
| }, |
| { |
| "epoch": 1.0596241088788074, |
| "grad_norm": 16.5, |
| "learning_rate": 9.914714852926535e-05, |
| "loss": 0.0384765625, |
| "memory(GiB)": 43.05, |
| "step": 1635, |
| "train_speed(iter/s)": 0.771676 |
| }, |
| { |
| "epoch": 1.0628645495787428, |
| "grad_norm": 3.859375, |
| "learning_rate": 9.913726609736961e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 1640, |
| "train_speed(iter/s)": 0.771235 |
| }, |
| { |
| "epoch": 1.0661049902786779, |
| "grad_norm": 3.296875, |
| "learning_rate": 9.912732723661511e-05, |
| "loss": 0.0455078125, |
| "memory(GiB)": 43.05, |
| "step": 1645, |
| "train_speed(iter/s)": 0.770824 |
| }, |
| { |
| "epoch": 1.069345430978613, |
| "grad_norm": 2.046875, |
| "learning_rate": 9.911733195841549e-05, |
| "loss": 0.03515625, |
| "memory(GiB)": 43.05, |
| "step": 1650, |
| "train_speed(iter/s)": 0.769905 |
| }, |
| { |
| "epoch": 1.0725858716785484, |
| "grad_norm": 11.625, |
| "learning_rate": 9.91072802742493e-05, |
| "loss": 0.0427734375, |
| "memory(GiB)": 43.05, |
| "step": 1655, |
| "train_speed(iter/s)": 0.769874 |
| }, |
| { |
| "epoch": 1.0758263123784835, |
| "grad_norm": 12.8125, |
| "learning_rate": 9.90971721956598e-05, |
| "loss": 0.0451171875, |
| "memory(GiB)": 43.05, |
| "step": 1660, |
| "train_speed(iter/s)": 0.770173 |
| }, |
| { |
| "epoch": 1.0790667530784186, |
| "grad_norm": 2.71875, |
| "learning_rate": 9.908700773425503e-05, |
| "loss": 0.04921875, |
| "memory(GiB)": 43.05, |
| "step": 1665, |
| "train_speed(iter/s)": 0.77044 |
| }, |
| { |
| "epoch": 1.082307193778354, |
| "grad_norm": 0.93359375, |
| "learning_rate": 9.907678690170779e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 1670, |
| "train_speed(iter/s)": 0.770527 |
| }, |
| { |
| "epoch": 1.085547634478289, |
| "grad_norm": 13.8125, |
| "learning_rate": 9.906650970975558e-05, |
| "loss": 0.054296875, |
| "memory(GiB)": 43.05, |
| "step": 1675, |
| "train_speed(iter/s)": 0.770283 |
| }, |
| { |
| "epoch": 1.0887880751782242, |
| "grad_norm": 3.59375, |
| "learning_rate": 9.905617617020068e-05, |
| "loss": 0.058984375, |
| "memory(GiB)": 43.05, |
| "step": 1680, |
| "train_speed(iter/s)": 0.770746 |
| }, |
| { |
| "epoch": 1.0920285158781595, |
| "grad_norm": 10.3125, |
| "learning_rate": 9.904578629491003e-05, |
| "loss": 0.0447265625, |
| "memory(GiB)": 43.05, |
| "step": 1685, |
| "train_speed(iter/s)": 0.771044 |
| }, |
| { |
| "epoch": 1.0952689565780946, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.903534009581528e-05, |
| "loss": 0.062109375, |
| "memory(GiB)": 43.05, |
| "step": 1690, |
| "train_speed(iter/s)": 0.771311 |
| }, |
| { |
| "epoch": 1.0985093972780298, |
| "grad_norm": 1.7421875, |
| "learning_rate": 9.902483758491277e-05, |
| "loss": 0.053125, |
| "memory(GiB)": 43.05, |
| "step": 1695, |
| "train_speed(iter/s)": 0.770537 |
| }, |
| { |
| "epoch": 1.101749837977965, |
| "grad_norm": 2.484375, |
| "learning_rate": 9.90142787742635e-05, |
| "loss": 0.0431640625, |
| "memory(GiB)": 43.05, |
| "step": 1700, |
| "train_speed(iter/s)": 0.771309 |
| }, |
| { |
| "epoch": 1.1049902786779002, |
| "grad_norm": 3.109375, |
| "learning_rate": 9.900366367599314e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 1705, |
| "train_speed(iter/s)": 0.771763 |
| }, |
| { |
| "epoch": 1.1082307193778353, |
| "grad_norm": 11.75, |
| "learning_rate": 9.899299230229197e-05, |
| "loss": 0.059765625, |
| "memory(GiB)": 43.05, |
| "step": 1710, |
| "train_speed(iter/s)": 0.771543 |
| }, |
| { |
| "epoch": 1.1114711600777705, |
| "grad_norm": 11.5625, |
| "learning_rate": 9.898226466541493e-05, |
| "loss": 0.036328125, |
| "memory(GiB)": 43.05, |
| "step": 1715, |
| "train_speed(iter/s)": 0.771128 |
| }, |
| { |
| "epoch": 1.1147116007777058, |
| "grad_norm": 12.375, |
| "learning_rate": 9.897148077768155e-05, |
| "loss": 0.068359375, |
| "memory(GiB)": 43.05, |
| "step": 1720, |
| "train_speed(iter/s)": 0.771249 |
| }, |
| { |
| "epoch": 1.117952041477641, |
| "grad_norm": 1.125, |
| "learning_rate": 9.896064065147595e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 1725, |
| "train_speed(iter/s)": 0.771506 |
| }, |
| { |
| "epoch": 1.121192482177576, |
| "grad_norm": 3.21875, |
| "learning_rate": 9.894974429924686e-05, |
| "loss": 0.0455078125, |
| "memory(GiB)": 43.05, |
| "step": 1730, |
| "train_speed(iter/s)": 0.771759 |
| }, |
| { |
| "epoch": 1.1244329228775114, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.893879173350757e-05, |
| "loss": 0.052734375, |
| "memory(GiB)": 43.05, |
| "step": 1735, |
| "train_speed(iter/s)": 0.771993 |
| }, |
| { |
| "epoch": 1.1276733635774465, |
| "grad_norm": 2.421875, |
| "learning_rate": 9.892778296683591e-05, |
| "loss": 0.0341796875, |
| "memory(GiB)": 43.05, |
| "step": 1740, |
| "train_speed(iter/s)": 0.772457 |
| }, |
| { |
| "epoch": 1.1309138042773816, |
| "grad_norm": 14.3125, |
| "learning_rate": 9.891671801187428e-05, |
| "loss": 0.058984375, |
| "memory(GiB)": 43.05, |
| "step": 1745, |
| "train_speed(iter/s)": 0.772356 |
| }, |
| { |
| "epoch": 1.134154244977317, |
| "grad_norm": 12.9375, |
| "learning_rate": 9.890559688132956e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 1750, |
| "train_speed(iter/s)": 0.773111 |
| }, |
| { |
| "epoch": 1.137394685677252, |
| "grad_norm": 14.25, |
| "learning_rate": 9.88944195879732e-05, |
| "loss": 0.0640625, |
| "memory(GiB)": 43.05, |
| "step": 1755, |
| "train_speed(iter/s)": 0.772017 |
| }, |
| { |
| "epoch": 1.1406351263771872, |
| "grad_norm": 13.25, |
| "learning_rate": 9.888318614464113e-05, |
| "loss": 0.045703125, |
| "memory(GiB)": 43.05, |
| "step": 1760, |
| "train_speed(iter/s)": 0.771971 |
| }, |
| { |
| "epoch": 1.1438755670771226, |
| "grad_norm": 8.5625, |
| "learning_rate": 9.88718965642337e-05, |
| "loss": 0.0365234375, |
| "memory(GiB)": 43.05, |
| "step": 1765, |
| "train_speed(iter/s)": 0.771885 |
| }, |
| { |
| "epoch": 1.1471160077770577, |
| "grad_norm": 9.125, |
| "learning_rate": 9.886055085971583e-05, |
| "loss": 0.040234375, |
| "memory(GiB)": 43.05, |
| "step": 1770, |
| "train_speed(iter/s)": 0.772214 |
| }, |
| { |
| "epoch": 1.1503564484769928, |
| "grad_norm": 11.75, |
| "learning_rate": 9.88491490441168e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 1775, |
| "train_speed(iter/s)": 0.772198 |
| }, |
| { |
| "epoch": 1.1535968891769282, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.883769113053039e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 1780, |
| "train_speed(iter/s)": 0.771899 |
| }, |
| { |
| "epoch": 1.1568373298768633, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.882617713211477e-05, |
| "loss": 0.0267578125, |
| "memory(GiB)": 43.05, |
| "step": 1785, |
| "train_speed(iter/s)": 0.771741 |
| }, |
| { |
| "epoch": 1.1600777705767984, |
| "grad_norm": 13.25, |
| "learning_rate": 9.881460706209254e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 1790, |
| "train_speed(iter/s)": 0.771772 |
| }, |
| { |
| "epoch": 1.1633182112767337, |
| "grad_norm": 1.265625, |
| "learning_rate": 9.880298093375064e-05, |
| "loss": 0.035546875, |
| "memory(GiB)": 43.05, |
| "step": 1795, |
| "train_speed(iter/s)": 0.771523 |
| }, |
| { |
| "epoch": 1.1665586519766689, |
| "grad_norm": 9.5, |
| "learning_rate": 9.879129876044048e-05, |
| "loss": 0.035546875, |
| "memory(GiB)": 43.05, |
| "step": 1800, |
| "train_speed(iter/s)": 0.771301 |
| }, |
| { |
| "epoch": 1.169799092676604, |
| "grad_norm": 2.0, |
| "learning_rate": 9.877956055557776e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 1805, |
| "train_speed(iter/s)": 0.771532 |
| }, |
| { |
| "epoch": 1.173039533376539, |
| "grad_norm": 8.0625, |
| "learning_rate": 9.876776633264254e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 1810, |
| "train_speed(iter/s)": 0.771949 |
| }, |
| { |
| "epoch": 1.1762799740764744, |
| "grad_norm": 1.734375, |
| "learning_rate": 9.87559161051792e-05, |
| "loss": 0.0552734375, |
| "memory(GiB)": 43.05, |
| "step": 1815, |
| "train_speed(iter/s)": 0.772095 |
| }, |
| { |
| "epoch": 1.1795204147764096, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.874400988679646e-05, |
| "loss": 0.040234375, |
| "memory(GiB)": 43.05, |
| "step": 1820, |
| "train_speed(iter/s)": 0.772592 |
| }, |
| { |
| "epoch": 1.1827608554763447, |
| "grad_norm": 6.96875, |
| "learning_rate": 9.873204769116736e-05, |
| "loss": 0.0671875, |
| "memory(GiB)": 43.05, |
| "step": 1825, |
| "train_speed(iter/s)": 0.772865 |
| }, |
| { |
| "epoch": 1.18600129617628, |
| "grad_norm": 2.359375, |
| "learning_rate": 9.872002953202914e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 1830, |
| "train_speed(iter/s)": 0.771885 |
| }, |
| { |
| "epoch": 1.1892417368762151, |
| "grad_norm": 18.125, |
| "learning_rate": 9.87079554231834e-05, |
| "loss": 0.05791015625, |
| "memory(GiB)": 43.05, |
| "step": 1835, |
| "train_speed(iter/s)": 0.772038 |
| }, |
| { |
| "epoch": 1.1924821775761503, |
| "grad_norm": 10.5, |
| "learning_rate": 9.869582537849593e-05, |
| "loss": 0.05693359375, |
| "memory(GiB)": 43.05, |
| "step": 1840, |
| "train_speed(iter/s)": 0.771967 |
| }, |
| { |
| "epoch": 1.1957226182760856, |
| "grad_norm": 11.375, |
| "learning_rate": 9.86836394118968e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 1845, |
| "train_speed(iter/s)": 0.772659 |
| }, |
| { |
| "epoch": 1.1989630589760207, |
| "grad_norm": 3.359375, |
| "learning_rate": 9.867139753738028e-05, |
| "loss": 0.022265625, |
| "memory(GiB)": 43.05, |
| "step": 1850, |
| "train_speed(iter/s)": 0.771954 |
| }, |
| { |
| "epoch": 1.2022034996759559, |
| "grad_norm": 1.15625, |
| "learning_rate": 9.865909976900484e-05, |
| "loss": 0.05078125, |
| "memory(GiB)": 43.05, |
| "step": 1855, |
| "train_speed(iter/s)": 0.772176 |
| }, |
| { |
| "epoch": 1.2054439403758912, |
| "grad_norm": 16.5, |
| "learning_rate": 9.864674612089313e-05, |
| "loss": 0.0376953125, |
| "memory(GiB)": 43.05, |
| "step": 1860, |
| "train_speed(iter/s)": 0.77217 |
| }, |
| { |
| "epoch": 1.2086843810758263, |
| "grad_norm": 10.5625, |
| "learning_rate": 9.8634336607232e-05, |
| "loss": 0.045703125, |
| "memory(GiB)": 43.05, |
| "step": 1865, |
| "train_speed(iter/s)": 0.772255 |
| }, |
| { |
| "epoch": 1.2119248217757614, |
| "grad_norm": 7.34375, |
| "learning_rate": 9.862187124227245e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 1870, |
| "train_speed(iter/s)": 0.772989 |
| }, |
| { |
| "epoch": 1.2151652624756968, |
| "grad_norm": 3.390625, |
| "learning_rate": 9.860935004032957e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 1875, |
| "train_speed(iter/s)": 0.773017 |
| }, |
| { |
| "epoch": 1.218405703175632, |
| "grad_norm": 9.5, |
| "learning_rate": 9.859677301578265e-05, |
| "loss": 0.04921875, |
| "memory(GiB)": 43.05, |
| "step": 1880, |
| "train_speed(iter/s)": 0.77328 |
| }, |
| { |
| "epoch": 1.221646143875567, |
| "grad_norm": 5.71875, |
| "learning_rate": 9.858414018307503e-05, |
| "loss": 0.0431640625, |
| "memory(GiB)": 43.05, |
| "step": 1885, |
| "train_speed(iter/s)": 0.773031 |
| }, |
| { |
| "epoch": 1.2248865845755024, |
| "grad_norm": 12.1875, |
| "learning_rate": 9.857145155671417e-05, |
| "loss": 0.0412109375, |
| "memory(GiB)": 43.05, |
| "step": 1890, |
| "train_speed(iter/s)": 0.773256 |
| }, |
| { |
| "epoch": 1.2281270252754375, |
| "grad_norm": 1.265625, |
| "learning_rate": 9.855870715127158e-05, |
| "loss": 0.04462890625, |
| "memory(GiB)": 43.05, |
| "step": 1895, |
| "train_speed(iter/s)": 0.773677 |
| }, |
| { |
| "epoch": 1.2313674659753726, |
| "grad_norm": 3.25, |
| "learning_rate": 9.854590698138283e-05, |
| "loss": 0.0408203125, |
| "memory(GiB)": 43.05, |
| "step": 1900, |
| "train_speed(iter/s)": 0.772772 |
| }, |
| { |
| "epoch": 1.2346079066753077, |
| "grad_norm": 1.7578125, |
| "learning_rate": 9.853305106174756e-05, |
| "loss": 0.049609375, |
| "memory(GiB)": 43.05, |
| "step": 1905, |
| "train_speed(iter/s)": 0.772561 |
| }, |
| { |
| "epoch": 1.237848347375243, |
| "grad_norm": 1.609375, |
| "learning_rate": 9.852013940712938e-05, |
| "loss": 0.032421875, |
| "memory(GiB)": 43.05, |
| "step": 1910, |
| "train_speed(iter/s)": 0.772825 |
| }, |
| { |
| "epoch": 1.2410887880751782, |
| "grad_norm": 1.9765625, |
| "learning_rate": 9.850717203235598e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 1915, |
| "train_speed(iter/s)": 0.773516 |
| }, |
| { |
| "epoch": 1.2443292287751135, |
| "grad_norm": 11.375, |
| "learning_rate": 9.849414895231895e-05, |
| "loss": 0.0642578125, |
| "memory(GiB)": 43.05, |
| "step": 1920, |
| "train_speed(iter/s)": 0.773144 |
| }, |
| { |
| "epoch": 1.2475696694750487, |
| "grad_norm": 2.65625, |
| "learning_rate": 9.848107018197393e-05, |
| "loss": 0.081640625, |
| "memory(GiB)": 43.05, |
| "step": 1925, |
| "train_speed(iter/s)": 0.773611 |
| }, |
| { |
| "epoch": 1.2508101101749838, |
| "grad_norm": 5.15625, |
| "learning_rate": 9.846793573634043e-05, |
| "loss": 0.0564453125, |
| "memory(GiB)": 43.05, |
| "step": 1930, |
| "train_speed(iter/s)": 0.773066 |
| }, |
| { |
| "epoch": 1.254050550874919, |
| "grad_norm": 0.69921875, |
| "learning_rate": 9.845474563050199e-05, |
| "loss": 0.0720703125, |
| "memory(GiB)": 43.05, |
| "step": 1935, |
| "train_speed(iter/s)": 0.773323 |
| }, |
| { |
| "epoch": 1.2572909915748542, |
| "grad_norm": 9.3125, |
| "learning_rate": 9.8441499879606e-05, |
| "loss": 0.057421875, |
| "memory(GiB)": 43.05, |
| "step": 1940, |
| "train_speed(iter/s)": 0.773681 |
| }, |
| { |
| "epoch": 1.2605314322747894, |
| "grad_norm": 2.84375, |
| "learning_rate": 9.842819849886382e-05, |
| "loss": 0.0447265625, |
| "memory(GiB)": 43.05, |
| "step": 1945, |
| "train_speed(iter/s)": 0.772924 |
| }, |
| { |
| "epoch": 1.2637718729747245, |
| "grad_norm": 6.5625, |
| "learning_rate": 9.841484150355061e-05, |
| "loss": 0.0296875, |
| "memory(GiB)": 43.05, |
| "step": 1950, |
| "train_speed(iter/s)": 0.772429 |
| }, |
| { |
| "epoch": 1.2670123136746598, |
| "grad_norm": 17.5, |
| "learning_rate": 9.840142890900546e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 1955, |
| "train_speed(iter/s)": 0.771885 |
| }, |
| { |
| "epoch": 1.270252754374595, |
| "grad_norm": 13.4375, |
| "learning_rate": 9.838796073063127e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 1960, |
| "train_speed(iter/s)": 0.771377 |
| }, |
| { |
| "epoch": 1.27349319507453, |
| "grad_norm": 1.3828125, |
| "learning_rate": 9.837443698389482e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 1965, |
| "train_speed(iter/s)": 0.77133 |
| }, |
| { |
| "epoch": 1.2767336357744652, |
| "grad_norm": 7.40625, |
| "learning_rate": 9.836085768432665e-05, |
| "loss": 0.057421875, |
| "memory(GiB)": 43.05, |
| "step": 1970, |
| "train_speed(iter/s)": 0.771328 |
| }, |
| { |
| "epoch": 1.2799740764744005, |
| "grad_norm": 4.5, |
| "learning_rate": 9.834722284752116e-05, |
| "loss": 0.0560546875, |
| "memory(GiB)": 43.05, |
| "step": 1975, |
| "train_speed(iter/s)": 0.771233 |
| }, |
| { |
| "epoch": 1.2832145171743357, |
| "grad_norm": 11.625, |
| "learning_rate": 9.833353248913647e-05, |
| "loss": 0.034765625, |
| "memory(GiB)": 43.05, |
| "step": 1980, |
| "train_speed(iter/s)": 0.771228 |
| }, |
| { |
| "epoch": 1.286454957874271, |
| "grad_norm": 2.921875, |
| "learning_rate": 9.831978662489447e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 1985, |
| "train_speed(iter/s)": 0.771556 |
| }, |
| { |
| "epoch": 1.2896953985742061, |
| "grad_norm": 5.34375, |
| "learning_rate": 9.830598527058082e-05, |
| "loss": 0.058203125, |
| "memory(GiB)": 43.05, |
| "step": 1990, |
| "train_speed(iter/s)": 0.770866 |
| }, |
| { |
| "epoch": 1.2929358392741412, |
| "grad_norm": 5.8125, |
| "learning_rate": 9.82921284420449e-05, |
| "loss": 0.04091796875, |
| "memory(GiB)": 43.05, |
| "step": 1995, |
| "train_speed(iter/s)": 0.770663 |
| }, |
| { |
| "epoch": 1.2961762799740764, |
| "grad_norm": 11.0625, |
| "learning_rate": 9.827821615519976e-05, |
| "loss": 0.0369140625, |
| "memory(GiB)": 43.05, |
| "step": 2000, |
| "train_speed(iter/s)": 0.771025 |
| }, |
| { |
| "epoch": 1.2994167206740117, |
| "grad_norm": 8.625, |
| "learning_rate": 9.826424842602218e-05, |
| "loss": 0.051171875, |
| "memory(GiB)": 43.05, |
| "step": 2005, |
| "train_speed(iter/s)": 0.611913 |
| }, |
| { |
| "epoch": 1.3026571613739468, |
| "grad_norm": 12.875, |
| "learning_rate": 9.825022527055258e-05, |
| "loss": 0.058203125, |
| "memory(GiB)": 43.05, |
| "step": 2010, |
| "train_speed(iter/s)": 0.611729 |
| }, |
| { |
| "epoch": 1.3058976020738822, |
| "grad_norm": 0.89453125, |
| "learning_rate": 9.823614670489507e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 2015, |
| "train_speed(iter/s)": 0.612226 |
| }, |
| { |
| "epoch": 1.3091380427738173, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.822201274521734e-05, |
| "loss": 0.0423828125, |
| "memory(GiB)": 43.05, |
| "step": 2020, |
| "train_speed(iter/s)": 0.612871 |
| }, |
| { |
| "epoch": 1.3123784834737524, |
| "grad_norm": 16.125, |
| "learning_rate": 9.820782340775072e-05, |
| "loss": 0.0529296875, |
| "memory(GiB)": 43.05, |
| "step": 2025, |
| "train_speed(iter/s)": 0.612987 |
| }, |
| { |
| "epoch": 1.3156189241736875, |
| "grad_norm": 1.96875, |
| "learning_rate": 9.819357870879016e-05, |
| "loss": 0.0486328125, |
| "memory(GiB)": 43.05, |
| "step": 2030, |
| "train_speed(iter/s)": 0.613366 |
| }, |
| { |
| "epoch": 1.3188593648736229, |
| "grad_norm": 2.140625, |
| "learning_rate": 9.817927866469414e-05, |
| "loss": 0.0599609375, |
| "memory(GiB)": 43.05, |
| "step": 2035, |
| "train_speed(iter/s)": 0.613579 |
| }, |
| { |
| "epoch": 1.322099805573558, |
| "grad_norm": 1.015625, |
| "learning_rate": 9.816492329188474e-05, |
| "loss": 0.06015625, |
| "memory(GiB)": 43.05, |
| "step": 2040, |
| "train_speed(iter/s)": 0.613311 |
| }, |
| { |
| "epoch": 1.3253402462734931, |
| "grad_norm": 1.40625, |
| "learning_rate": 9.815051260684753e-05, |
| "loss": 0.05029296875, |
| "memory(GiB)": 43.05, |
| "step": 2045, |
| "train_speed(iter/s)": 0.613476 |
| }, |
| { |
| "epoch": 1.3285806869734285, |
| "grad_norm": 14.625, |
| "learning_rate": 9.813604662613168e-05, |
| "loss": 0.03515625, |
| "memory(GiB)": 43.05, |
| "step": 2050, |
| "train_speed(iter/s)": 0.613536 |
| }, |
| { |
| "epoch": 1.3318211276733636, |
| "grad_norm": 13.8125, |
| "learning_rate": 9.812152536634975e-05, |
| "loss": 0.0609375, |
| "memory(GiB)": 43.05, |
| "step": 2055, |
| "train_speed(iter/s)": 0.613809 |
| }, |
| { |
| "epoch": 1.3350615683732987, |
| "grad_norm": 11.3125, |
| "learning_rate": 9.810694884417788e-05, |
| "loss": 0.02646484375, |
| "memory(GiB)": 43.05, |
| "step": 2060, |
| "train_speed(iter/s)": 0.614351 |
| }, |
| { |
| "epoch": 1.3383020090732338, |
| "grad_norm": 1.71875, |
| "learning_rate": 9.809231707635565e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 2065, |
| "train_speed(iter/s)": 0.614905 |
| }, |
| { |
| "epoch": 1.3415424497731692, |
| "grad_norm": 14.875, |
| "learning_rate": 9.807763007968602e-05, |
| "loss": 0.0361328125, |
| "memory(GiB)": 43.05, |
| "step": 2070, |
| "train_speed(iter/s)": 0.615482 |
| }, |
| { |
| "epoch": 1.3447828904731043, |
| "grad_norm": 1.046875, |
| "learning_rate": 9.806288787103548e-05, |
| "loss": 0.0431640625, |
| "memory(GiB)": 43.05, |
| "step": 2075, |
| "train_speed(iter/s)": 0.61574 |
| }, |
| { |
| "epoch": 1.3480233311730396, |
| "grad_norm": 12.0, |
| "learning_rate": 9.804809046733383e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 2080, |
| "train_speed(iter/s)": 0.61596 |
| }, |
| { |
| "epoch": 1.3512637718729748, |
| "grad_norm": 1.6328125, |
| "learning_rate": 9.80332378855743e-05, |
| "loss": 0.0400390625, |
| "memory(GiB)": 43.05, |
| "step": 2085, |
| "train_speed(iter/s)": 0.61648 |
| }, |
| { |
| "epoch": 1.3545042125729099, |
| "grad_norm": 14.9375, |
| "learning_rate": 9.80183301428135e-05, |
| "loss": 0.053125, |
| "memory(GiB)": 43.05, |
| "step": 2090, |
| "train_speed(iter/s)": 0.616824 |
| }, |
| { |
| "epoch": 1.357744653272845, |
| "grad_norm": 3.453125, |
| "learning_rate": 9.800336725617135e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 2095, |
| "train_speed(iter/s)": 0.617314 |
| }, |
| { |
| "epoch": 1.3609850939727803, |
| "grad_norm": 1.8046875, |
| "learning_rate": 9.798834924283112e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 2100, |
| "train_speed(iter/s)": 0.617762 |
| }, |
| { |
| "epoch": 1.3642255346727155, |
| "grad_norm": 0.73046875, |
| "learning_rate": 9.797327612003938e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 2105, |
| "train_speed(iter/s)": 0.617954 |
| }, |
| { |
| "epoch": 1.3674659753726508, |
| "grad_norm": 2.671875, |
| "learning_rate": 9.7958147905106e-05, |
| "loss": 0.037890625, |
| "memory(GiB)": 43.05, |
| "step": 2110, |
| "train_speed(iter/s)": 0.618398 |
| }, |
| { |
| "epoch": 1.370706416072586, |
| "grad_norm": 5.15625, |
| "learning_rate": 9.794296461540407e-05, |
| "loss": 0.0455078125, |
| "memory(GiB)": 43.05, |
| "step": 2115, |
| "train_speed(iter/s)": 0.618813 |
| }, |
| { |
| "epoch": 1.373946856772521, |
| "grad_norm": 13.25, |
| "learning_rate": 9.792772626837001e-05, |
| "loss": 0.034375, |
| "memory(GiB)": 43.05, |
| "step": 2120, |
| "train_speed(iter/s)": 0.618524 |
| }, |
| { |
| "epoch": 1.3771872974724562, |
| "grad_norm": 10.625, |
| "learning_rate": 9.791243288150338e-05, |
| "loss": 0.0265625, |
| "memory(GiB)": 43.05, |
| "step": 2125, |
| "train_speed(iter/s)": 0.619045 |
| }, |
| { |
| "epoch": 1.3804277381723915, |
| "grad_norm": 14.1875, |
| "learning_rate": 9.789708447236702e-05, |
| "loss": 0.0435546875, |
| "memory(GiB)": 43.05, |
| "step": 2130, |
| "train_speed(iter/s)": 0.619342 |
| }, |
| { |
| "epoch": 1.3836681788723266, |
| "grad_norm": 12.9375, |
| "learning_rate": 9.788168105858691e-05, |
| "loss": 0.0517578125, |
| "memory(GiB)": 43.05, |
| "step": 2135, |
| "train_speed(iter/s)": 0.61966 |
| }, |
| { |
| "epoch": 1.3869086195722617, |
| "grad_norm": 0.86328125, |
| "learning_rate": 9.786622265785221e-05, |
| "loss": 0.0447265625, |
| "memory(GiB)": 43.05, |
| "step": 2140, |
| "train_speed(iter/s)": 0.619981 |
| }, |
| { |
| "epoch": 1.390149060272197, |
| "grad_norm": 1.8671875, |
| "learning_rate": 9.785070928791525e-05, |
| "loss": 0.049609375, |
| "memory(GiB)": 43.05, |
| "step": 2145, |
| "train_speed(iter/s)": 0.620392 |
| }, |
| { |
| "epoch": 1.3933895009721322, |
| "grad_norm": 12.5625, |
| "learning_rate": 9.783514096659141e-05, |
| "loss": 0.0359375, |
| "memory(GiB)": 43.05, |
| "step": 2150, |
| "train_speed(iter/s)": 0.620642 |
| }, |
| { |
| "epoch": 1.3966299416720673, |
| "grad_norm": 2.109375, |
| "learning_rate": 9.78195177117593e-05, |
| "loss": 0.035546875, |
| "memory(GiB)": 43.05, |
| "step": 2155, |
| "train_speed(iter/s)": 0.621305 |
| }, |
| { |
| "epoch": 1.3998703823720027, |
| "grad_norm": 12.0, |
| "learning_rate": 9.78038395413605e-05, |
| "loss": 0.04921875, |
| "memory(GiB)": 43.05, |
| "step": 2160, |
| "train_speed(iter/s)": 0.621819 |
| }, |
| { |
| "epoch": 1.4031108230719378, |
| "grad_norm": 10.625, |
| "learning_rate": 9.778810647339971e-05, |
| "loss": 0.05625, |
| "memory(GiB)": 43.05, |
| "step": 2165, |
| "train_speed(iter/s)": 0.62221 |
| }, |
| { |
| "epoch": 1.406351263771873, |
| "grad_norm": 1.28125, |
| "learning_rate": 9.777231852594467e-05, |
| "loss": 0.015234375, |
| "memory(GiB)": 43.05, |
| "step": 2170, |
| "train_speed(iter/s)": 0.622883 |
| }, |
| { |
| "epoch": 1.4095917044718083, |
| "grad_norm": 3.15625, |
| "learning_rate": 9.775647571712614e-05, |
| "loss": 0.0431640625, |
| "memory(GiB)": 43.05, |
| "step": 2175, |
| "train_speed(iter/s)": 0.62319 |
| }, |
| { |
| "epoch": 1.4128321451717434, |
| "grad_norm": 0.8125, |
| "learning_rate": 9.774057806513788e-05, |
| "loss": 0.044140625, |
| "memory(GiB)": 43.05, |
| "step": 2180, |
| "train_speed(iter/s)": 0.623107 |
| }, |
| { |
| "epoch": 1.4160725858716785, |
| "grad_norm": 15.75, |
| "learning_rate": 9.772462558823662e-05, |
| "loss": 0.0615234375, |
| "memory(GiB)": 43.05, |
| "step": 2185, |
| "train_speed(iter/s)": 0.623169 |
| }, |
| { |
| "epoch": 1.4193130265716136, |
| "grad_norm": 8.25, |
| "learning_rate": 9.770861830474208e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 2190, |
| "train_speed(iter/s)": 0.623578 |
| }, |
| { |
| "epoch": 1.422553467271549, |
| "grad_norm": 3.75, |
| "learning_rate": 9.769255623303687e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 2195, |
| "train_speed(iter/s)": 0.62381 |
| }, |
| { |
| "epoch": 1.425793907971484, |
| "grad_norm": 1.53125, |
| "learning_rate": 9.767643939156658e-05, |
| "loss": 0.0638671875, |
| "memory(GiB)": 43.05, |
| "step": 2200, |
| "train_speed(iter/s)": 0.623993 |
| }, |
| { |
| "epoch": 1.4290343486714194, |
| "grad_norm": 14.4375, |
| "learning_rate": 9.766026779883966e-05, |
| "loss": 0.0453125, |
| "memory(GiB)": 43.05, |
| "step": 2205, |
| "train_speed(iter/s)": 0.624369 |
| }, |
| { |
| "epoch": 1.4322747893713546, |
| "grad_norm": 2.109375, |
| "learning_rate": 9.764404147342742e-05, |
| "loss": 0.0376953125, |
| "memory(GiB)": 43.05, |
| "step": 2210, |
| "train_speed(iter/s)": 0.624681 |
| }, |
| { |
| "epoch": 1.4355152300712897, |
| "grad_norm": 0.89453125, |
| "learning_rate": 9.76277604339641e-05, |
| "loss": 0.03203125, |
| "memory(GiB)": 43.05, |
| "step": 2215, |
| "train_speed(iter/s)": 0.624586 |
| }, |
| { |
| "epoch": 1.4387556707712248, |
| "grad_norm": 1.65625, |
| "learning_rate": 9.761142469914666e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 2220, |
| "train_speed(iter/s)": 0.624822 |
| }, |
| { |
| "epoch": 1.4419961114711601, |
| "grad_norm": 13.125, |
| "learning_rate": 9.759503428773498e-05, |
| "loss": 0.0884765625, |
| "memory(GiB)": 43.05, |
| "step": 2225, |
| "train_speed(iter/s)": 0.625064 |
| }, |
| { |
| "epoch": 1.4452365521710953, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.757858921855166e-05, |
| "loss": 0.060546875, |
| "memory(GiB)": 43.05, |
| "step": 2230, |
| "train_speed(iter/s)": 0.624889 |
| }, |
| { |
| "epoch": 1.4484769928710304, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.756208951048207e-05, |
| "loss": 0.0423828125, |
| "memory(GiB)": 43.05, |
| "step": 2235, |
| "train_speed(iter/s)": 0.624644 |
| }, |
| { |
| "epoch": 1.4517174335709657, |
| "grad_norm": 4.78125, |
| "learning_rate": 9.75455351824744e-05, |
| "loss": 0.0736328125, |
| "memory(GiB)": 43.05, |
| "step": 2240, |
| "train_speed(iter/s)": 0.624896 |
| }, |
| { |
| "epoch": 1.4549578742709008, |
| "grad_norm": 13.0625, |
| "learning_rate": 9.752892625353946e-05, |
| "loss": 0.0400390625, |
| "memory(GiB)": 43.05, |
| "step": 2245, |
| "train_speed(iter/s)": 0.625284 |
| }, |
| { |
| "epoch": 1.458198314970836, |
| "grad_norm": 5.5625, |
| "learning_rate": 9.751226274275085e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 2250, |
| "train_speed(iter/s)": 0.625567 |
| }, |
| { |
| "epoch": 1.4614387556707713, |
| "grad_norm": 1.9140625, |
| "learning_rate": 9.749554466924482e-05, |
| "loss": 0.034375, |
| "memory(GiB)": 43.05, |
| "step": 2255, |
| "train_speed(iter/s)": 0.625805 |
| }, |
| { |
| "epoch": 1.4646791963707064, |
| "grad_norm": 11.6875, |
| "learning_rate": 9.747877205222027e-05, |
| "loss": 0.0376953125, |
| "memory(GiB)": 43.05, |
| "step": 2260, |
| "train_speed(iter/s)": 0.626097 |
| }, |
| { |
| "epoch": 1.4679196370706415, |
| "grad_norm": 14.5625, |
| "learning_rate": 9.746194491093871e-05, |
| "loss": 0.0513671875, |
| "memory(GiB)": 43.05, |
| "step": 2265, |
| "train_speed(iter/s)": 0.626494 |
| }, |
| { |
| "epoch": 1.471160077770577, |
| "grad_norm": 13.4375, |
| "learning_rate": 9.744506326472435e-05, |
| "loss": 0.0513671875, |
| "memory(GiB)": 43.05, |
| "step": 2270, |
| "train_speed(iter/s)": 0.626361 |
| }, |
| { |
| "epoch": 1.474400518470512, |
| "grad_norm": 14.6875, |
| "learning_rate": 9.742812713296394e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 2275, |
| "train_speed(iter/s)": 0.62684 |
| }, |
| { |
| "epoch": 1.4776409591704471, |
| "grad_norm": 13.625, |
| "learning_rate": 9.741113653510677e-05, |
| "loss": 0.037890625, |
| "memory(GiB)": 43.05, |
| "step": 2280, |
| "train_speed(iter/s)": 0.626967 |
| }, |
| { |
| "epoch": 1.4808813998703823, |
| "grad_norm": 13.875, |
| "learning_rate": 9.739409149066472e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 2285, |
| "train_speed(iter/s)": 0.626882 |
| }, |
| { |
| "epoch": 1.4841218405703176, |
| "grad_norm": 0.95703125, |
| "learning_rate": 9.73769920192122e-05, |
| "loss": 0.0490234375, |
| "memory(GiB)": 43.05, |
| "step": 2290, |
| "train_speed(iter/s)": 0.626984 |
| }, |
| { |
| "epoch": 1.4873622812702527, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.73598381403861e-05, |
| "loss": 0.0859375, |
| "memory(GiB)": 43.05, |
| "step": 2295, |
| "train_speed(iter/s)": 0.627301 |
| }, |
| { |
| "epoch": 1.490602721970188, |
| "grad_norm": 1.25, |
| "learning_rate": 9.734262987388583e-05, |
| "loss": 0.05703125, |
| "memory(GiB)": 43.05, |
| "step": 2300, |
| "train_speed(iter/s)": 0.627792 |
| }, |
| { |
| "epoch": 1.4938431626701232, |
| "grad_norm": 0.6171875, |
| "learning_rate": 9.732536723947321e-05, |
| "loss": 0.0439453125, |
| "memory(GiB)": 43.05, |
| "step": 2305, |
| "train_speed(iter/s)": 0.627848 |
| }, |
| { |
| "epoch": 1.4970836033700583, |
| "grad_norm": 14.25, |
| "learning_rate": 9.73080502569725e-05, |
| "loss": 0.0638671875, |
| "memory(GiB)": 43.05, |
| "step": 2310, |
| "train_speed(iter/s)": 0.627722 |
| }, |
| { |
| "epoch": 1.5003240440699934, |
| "grad_norm": 0.90625, |
| "learning_rate": 9.729067894627042e-05, |
| "loss": 0.05546875, |
| "memory(GiB)": 43.05, |
| "step": 2315, |
| "train_speed(iter/s)": 0.627674 |
| }, |
| { |
| "epoch": 1.5035644847699285, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.727325332731604e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 2320, |
| "train_speed(iter/s)": 0.627639 |
| }, |
| { |
| "epoch": 1.5068049254698639, |
| "grad_norm": 1.078125, |
| "learning_rate": 9.72557734201208e-05, |
| "loss": 0.0431640625, |
| "memory(GiB)": 43.05, |
| "step": 2325, |
| "train_speed(iter/s)": 0.62791 |
| }, |
| { |
| "epoch": 1.5100453661697992, |
| "grad_norm": 11.5625, |
| "learning_rate": 9.72382392447585e-05, |
| "loss": 0.04521484375, |
| "memory(GiB)": 43.05, |
| "step": 2330, |
| "train_speed(iter/s)": 0.628461 |
| }, |
| { |
| "epoch": 1.5132858068697344, |
| "grad_norm": 3.140625, |
| "learning_rate": 9.722065082136525e-05, |
| "loss": 0.05859375, |
| "memory(GiB)": 43.05, |
| "step": 2335, |
| "train_speed(iter/s)": 0.628836 |
| }, |
| { |
| "epoch": 1.5165262475696695, |
| "grad_norm": 2.4375, |
| "learning_rate": 9.720300817013945e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 2340, |
| "train_speed(iter/s)": 0.628933 |
| }, |
| { |
| "epoch": 1.5197666882696046, |
| "grad_norm": 0.76953125, |
| "learning_rate": 9.71853113113418e-05, |
| "loss": 0.046484375, |
| "memory(GiB)": 43.05, |
| "step": 2345, |
| "train_speed(iter/s)": 0.629481 |
| }, |
| { |
| "epoch": 1.5230071289695397, |
| "grad_norm": 5.15625, |
| "learning_rate": 9.716756026529523e-05, |
| "loss": 0.0541015625, |
| "memory(GiB)": 43.05, |
| "step": 2350, |
| "train_speed(iter/s)": 0.629513 |
| }, |
| { |
| "epoch": 1.526247569669475, |
| "grad_norm": 1.625, |
| "learning_rate": 9.71497550523849e-05, |
| "loss": 0.0458984375, |
| "memory(GiB)": 43.05, |
| "step": 2355, |
| "train_speed(iter/s)": 0.629561 |
| }, |
| { |
| "epoch": 1.5294880103694104, |
| "grad_norm": 3.59375, |
| "learning_rate": 9.713189569305818e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 2360, |
| "train_speed(iter/s)": 0.630132 |
| }, |
| { |
| "epoch": 1.5327284510693455, |
| "grad_norm": 0.63671875, |
| "learning_rate": 9.711398220782464e-05, |
| "loss": 0.03359375, |
| "memory(GiB)": 43.05, |
| "step": 2365, |
| "train_speed(iter/s)": 0.630615 |
| }, |
| { |
| "epoch": 1.5359688917692806, |
| "grad_norm": 7.375, |
| "learning_rate": 9.709601461725597e-05, |
| "loss": 0.0453125, |
| "memory(GiB)": 43.05, |
| "step": 2370, |
| "train_speed(iter/s)": 0.631142 |
| }, |
| { |
| "epoch": 1.5392093324692158, |
| "grad_norm": 17.375, |
| "learning_rate": 9.7077992941986e-05, |
| "loss": 0.050390625, |
| "memory(GiB)": 43.05, |
| "step": 2375, |
| "train_speed(iter/s)": 0.631273 |
| }, |
| { |
| "epoch": 1.5424497731691509, |
| "grad_norm": 10.8125, |
| "learning_rate": 9.705991720271072e-05, |
| "loss": 0.042578125, |
| "memory(GiB)": 43.05, |
| "step": 2380, |
| "train_speed(iter/s)": 0.631619 |
| }, |
| { |
| "epoch": 1.5456902138690862, |
| "grad_norm": 2.828125, |
| "learning_rate": 9.704178742018816e-05, |
| "loss": 0.05703125, |
| "memory(GiB)": 43.05, |
| "step": 2385, |
| "train_speed(iter/s)": 0.631783 |
| }, |
| { |
| "epoch": 1.5489306545690213, |
| "grad_norm": 7.90625, |
| "learning_rate": 9.70236036152384e-05, |
| "loss": 0.052734375, |
| "memory(GiB)": 43.05, |
| "step": 2390, |
| "train_speed(iter/s)": 0.632404 |
| }, |
| { |
| "epoch": 1.5521710952689567, |
| "grad_norm": 2.3125, |
| "learning_rate": 9.70053658087436e-05, |
| "loss": 0.0517578125, |
| "memory(GiB)": 43.05, |
| "step": 2395, |
| "train_speed(iter/s)": 0.632371 |
| }, |
| { |
| "epoch": 1.5554115359688918, |
| "grad_norm": 11.9375, |
| "learning_rate": 9.69870740216479e-05, |
| "loss": 0.05, |
| "memory(GiB)": 43.05, |
| "step": 2400, |
| "train_speed(iter/s)": 0.63274 |
| }, |
| { |
| "epoch": 1.558651976668827, |
| "grad_norm": 12.6875, |
| "learning_rate": 9.696872827495747e-05, |
| "loss": 0.02734375, |
| "memory(GiB)": 43.05, |
| "step": 2405, |
| "train_speed(iter/s)": 0.633094 |
| }, |
| { |
| "epoch": 1.561892417368762, |
| "grad_norm": 6.1875, |
| "learning_rate": 9.695032858974042e-05, |
| "loss": 0.0451171875, |
| "memory(GiB)": 43.05, |
| "step": 2410, |
| "train_speed(iter/s)": 0.633711 |
| }, |
| { |
| "epoch": 1.5651328580686974, |
| "grad_norm": 10.125, |
| "learning_rate": 9.693187498712679e-05, |
| "loss": 0.0564453125, |
| "memory(GiB)": 43.05, |
| "step": 2415, |
| "train_speed(iter/s)": 0.634154 |
| }, |
| { |
| "epoch": 1.5683732987686325, |
| "grad_norm": 2.6875, |
| "learning_rate": 9.691336748830857e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 2420, |
| "train_speed(iter/s)": 0.634504 |
| }, |
| { |
| "epoch": 1.5716137394685679, |
| "grad_norm": 2.53125, |
| "learning_rate": 9.689480611453963e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 2425, |
| "train_speed(iter/s)": 0.634641 |
| }, |
| { |
| "epoch": 1.574854180168503, |
| "grad_norm": 0.828125, |
| "learning_rate": 9.687619088713571e-05, |
| "loss": 0.046875, |
| "memory(GiB)": 43.05, |
| "step": 2430, |
| "train_speed(iter/s)": 0.634905 |
| }, |
| { |
| "epoch": 1.578094620868438, |
| "grad_norm": 1.65625, |
| "learning_rate": 9.685752182747439e-05, |
| "loss": 0.0814453125, |
| "memory(GiB)": 43.05, |
| "step": 2435, |
| "train_speed(iter/s)": 0.635094 |
| }, |
| { |
| "epoch": 1.5813350615683732, |
| "grad_norm": 1.21875, |
| "learning_rate": 9.683879895699506e-05, |
| "loss": 0.0466796875, |
| "memory(GiB)": 43.05, |
| "step": 2440, |
| "train_speed(iter/s)": 0.635554 |
| }, |
| { |
| "epoch": 1.5845755022683083, |
| "grad_norm": 1.953125, |
| "learning_rate": 9.682002229719894e-05, |
| "loss": 0.04384765625, |
| "memory(GiB)": 43.05, |
| "step": 2445, |
| "train_speed(iter/s)": 0.635603 |
| }, |
| { |
| "epoch": 1.5878159429682437, |
| "grad_norm": 2.71875, |
| "learning_rate": 9.6801191869649e-05, |
| "loss": 0.0552734375, |
| "memory(GiB)": 43.05, |
| "step": 2450, |
| "train_speed(iter/s)": 0.636109 |
| }, |
| { |
| "epoch": 1.591056383668179, |
| "grad_norm": 2.921875, |
| "learning_rate": 9.678230769596996e-05, |
| "loss": 0.0421875, |
| "memory(GiB)": 43.05, |
| "step": 2455, |
| "train_speed(iter/s)": 0.636399 |
| }, |
| { |
| "epoch": 1.5942968243681142, |
| "grad_norm": 15.9375, |
| "learning_rate": 9.676336979784826e-05, |
| "loss": 0.045703125, |
| "memory(GiB)": 43.05, |
| "step": 2460, |
| "train_speed(iter/s)": 0.636494 |
| }, |
| { |
| "epoch": 1.5975372650680493, |
| "grad_norm": 12.8125, |
| "learning_rate": 9.674437819703202e-05, |
| "loss": 0.0341796875, |
| "memory(GiB)": 43.05, |
| "step": 2465, |
| "train_speed(iter/s)": 0.636654 |
| }, |
| { |
| "epoch": 1.6007777057679844, |
| "grad_norm": 16.875, |
| "learning_rate": 9.672533291533105e-05, |
| "loss": 0.04296875, |
| "memory(GiB)": 43.05, |
| "step": 2470, |
| "train_speed(iter/s)": 0.636867 |
| }, |
| { |
| "epoch": 1.6040181464679195, |
| "grad_norm": 3.703125, |
| "learning_rate": 9.670623397461684e-05, |
| "loss": 0.032421875, |
| "memory(GiB)": 43.05, |
| "step": 2475, |
| "train_speed(iter/s)": 0.636582 |
| }, |
| { |
| "epoch": 1.6072585871678549, |
| "grad_norm": 2.890625, |
| "learning_rate": 9.668708139682243e-05, |
| "loss": 0.0544921875, |
| "memory(GiB)": 43.05, |
| "step": 2480, |
| "train_speed(iter/s)": 0.636894 |
| }, |
| { |
| "epoch": 1.61049902786779, |
| "grad_norm": 12.1875, |
| "learning_rate": 9.666787520394251e-05, |
| "loss": 0.04296875, |
| "memory(GiB)": 43.05, |
| "step": 2485, |
| "train_speed(iter/s)": 0.637045 |
| }, |
| { |
| "epoch": 1.6137394685677253, |
| "grad_norm": 2.09375, |
| "learning_rate": 9.664861541803332e-05, |
| "loss": 0.03515625, |
| "memory(GiB)": 43.05, |
| "step": 2490, |
| "train_speed(iter/s)": 0.637455 |
| }, |
| { |
| "epoch": 1.6169799092676604, |
| "grad_norm": 10.0, |
| "learning_rate": 9.662930206121263e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 2495, |
| "train_speed(iter/s)": 0.637851 |
| }, |
| { |
| "epoch": 1.6202203499675956, |
| "grad_norm": 4.71875, |
| "learning_rate": 9.660993515565979e-05, |
| "loss": 0.0486328125, |
| "memory(GiB)": 43.05, |
| "step": 2500, |
| "train_speed(iter/s)": 0.637773 |
| }, |
| { |
| "epoch": 1.6234607906675307, |
| "grad_norm": 15.3125, |
| "learning_rate": 9.659051472361559e-05, |
| "loss": 0.053515625, |
| "memory(GiB)": 43.05, |
| "step": 2505, |
| "train_speed(iter/s)": 0.638016 |
| }, |
| { |
| "epoch": 1.626701231367466, |
| "grad_norm": 2.71875, |
| "learning_rate": 9.657104078738228e-05, |
| "loss": 0.0615234375, |
| "memory(GiB)": 43.05, |
| "step": 2510, |
| "train_speed(iter/s)": 0.638155 |
| }, |
| { |
| "epoch": 1.6299416720674011, |
| "grad_norm": 3.03125, |
| "learning_rate": 9.655151336932362e-05, |
| "loss": 0.0408203125, |
| "memory(GiB)": 43.05, |
| "step": 2515, |
| "train_speed(iter/s)": 0.638142 |
| }, |
| { |
| "epoch": 1.6331821127673365, |
| "grad_norm": 16.125, |
| "learning_rate": 9.653193249186472e-05, |
| "loss": 0.05537109375, |
| "memory(GiB)": 43.05, |
| "step": 2520, |
| "train_speed(iter/s)": 0.638251 |
| }, |
| { |
| "epoch": 1.6364225534672716, |
| "grad_norm": 16.5, |
| "learning_rate": 9.651229817749212e-05, |
| "loss": 0.04453125, |
| "memory(GiB)": 43.05, |
| "step": 2525, |
| "train_speed(iter/s)": 0.638404 |
| }, |
| { |
| "epoch": 1.6396629941672067, |
| "grad_norm": 0.8984375, |
| "learning_rate": 9.64926104487537e-05, |
| "loss": 0.053125, |
| "memory(GiB)": 43.05, |
| "step": 2530, |
| "train_speed(iter/s)": 0.638731 |
| }, |
| { |
| "epoch": 1.6429034348671419, |
| "grad_norm": 12.5625, |
| "learning_rate": 9.647286932825872e-05, |
| "loss": 0.0494140625, |
| "memory(GiB)": 43.05, |
| "step": 2535, |
| "train_speed(iter/s)": 0.639032 |
| }, |
| { |
| "epoch": 1.646143875567077, |
| "grad_norm": 9.0, |
| "learning_rate": 9.64530748386777e-05, |
| "loss": 0.0568359375, |
| "memory(GiB)": 43.05, |
| "step": 2540, |
| "train_speed(iter/s)": 0.639515 |
| }, |
| { |
| "epoch": 1.6493843162670123, |
| "grad_norm": 9.75, |
| "learning_rate": 9.643322700274251e-05, |
| "loss": 0.0552734375, |
| "memory(GiB)": 43.05, |
| "step": 2545, |
| "train_speed(iter/s)": 0.639772 |
| }, |
| { |
| "epoch": 1.6526247569669477, |
| "grad_norm": 1.3203125, |
| "learning_rate": 9.641332584324625e-05, |
| "loss": 0.0642578125, |
| "memory(GiB)": 43.05, |
| "step": 2550, |
| "train_speed(iter/s)": 0.640097 |
| }, |
| { |
| "epoch": 1.6558651976668828, |
| "grad_norm": 0.55078125, |
| "learning_rate": 9.639337138304323e-05, |
| "loss": 0.04296875, |
| "memory(GiB)": 43.05, |
| "step": 2555, |
| "train_speed(iter/s)": 0.640597 |
| }, |
| { |
| "epoch": 1.659105638366818, |
| "grad_norm": 0.671875, |
| "learning_rate": 9.637336364504903e-05, |
| "loss": 0.037890625, |
| "memory(GiB)": 43.05, |
| "step": 2560, |
| "train_speed(iter/s)": 0.640832 |
| }, |
| { |
| "epoch": 1.662346079066753, |
| "grad_norm": 1.25, |
| "learning_rate": 9.635330265224038e-05, |
| "loss": 0.026953125, |
| "memory(GiB)": 43.05, |
| "step": 2565, |
| "train_speed(iter/s)": 0.641157 |
| }, |
| { |
| "epoch": 1.6655865197666881, |
| "grad_norm": 0.51953125, |
| "learning_rate": 9.633318842765515e-05, |
| "loss": 0.040625, |
| "memory(GiB)": 43.05, |
| "step": 2570, |
| "train_speed(iter/s)": 0.641182 |
| }, |
| { |
| "epoch": 1.6688269604666235, |
| "grad_norm": 2.265625, |
| "learning_rate": 9.631302099439237e-05, |
| "loss": 0.0537109375, |
| "memory(GiB)": 43.05, |
| "step": 2575, |
| "train_speed(iter/s)": 0.641614 |
| }, |
| { |
| "epoch": 1.6720674011665586, |
| "grad_norm": 1.5390625, |
| "learning_rate": 9.629280037561217e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 2580, |
| "train_speed(iter/s)": 0.642187 |
| }, |
| { |
| "epoch": 1.675307841866494, |
| "grad_norm": 1.359375, |
| "learning_rate": 9.627252659453573e-05, |
| "loss": 0.0337890625, |
| "memory(GiB)": 43.05, |
| "step": 2585, |
| "train_speed(iter/s)": 0.64238 |
| }, |
| { |
| "epoch": 1.678548282566429, |
| "grad_norm": 12.0625, |
| "learning_rate": 9.625219967444537e-05, |
| "loss": 0.0537109375, |
| "memory(GiB)": 43.05, |
| "step": 2590, |
| "train_speed(iter/s)": 0.642289 |
| }, |
| { |
| "epoch": 1.6817887232663642, |
| "grad_norm": 3.25, |
| "learning_rate": 9.623181963868428e-05, |
| "loss": 0.043359375, |
| "memory(GiB)": 43.05, |
| "step": 2595, |
| "train_speed(iter/s)": 0.642606 |
| }, |
| { |
| "epoch": 1.6850291639662993, |
| "grad_norm": 0.98828125, |
| "learning_rate": 9.62113865106568e-05, |
| "loss": 0.0638671875, |
| "memory(GiB)": 43.05, |
| "step": 2600, |
| "train_speed(iter/s)": 0.643011 |
| }, |
| { |
| "epoch": 1.6882696046662347, |
| "grad_norm": 8.375, |
| "learning_rate": 9.619090031382815e-05, |
| "loss": 0.0548828125, |
| "memory(GiB)": 43.05, |
| "step": 2605, |
| "train_speed(iter/s)": 0.643071 |
| }, |
| { |
| "epoch": 1.6915100453661698, |
| "grad_norm": 3.25, |
| "learning_rate": 9.617036107172454e-05, |
| "loss": 0.0361328125, |
| "memory(GiB)": 43.05, |
| "step": 2610, |
| "train_speed(iter/s)": 0.643615 |
| }, |
| { |
| "epoch": 1.6947504860661051, |
| "grad_norm": 5.1875, |
| "learning_rate": 9.614976880793306e-05, |
| "loss": 0.0384765625, |
| "memory(GiB)": 43.05, |
| "step": 2615, |
| "train_speed(iter/s)": 0.64371 |
| }, |
| { |
| "epoch": 1.6979909267660402, |
| "grad_norm": 14.4375, |
| "learning_rate": 9.612912354610171e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 2620, |
| "train_speed(iter/s)": 0.643613 |
| }, |
| { |
| "epoch": 1.7012313674659754, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.610842530993935e-05, |
| "loss": 0.0671875, |
| "memory(GiB)": 43.05, |
| "step": 2625, |
| "train_speed(iter/s)": 0.644023 |
| }, |
| { |
| "epoch": 1.7044718081659105, |
| "grad_norm": 1.9296875, |
| "learning_rate": 9.608767412321568e-05, |
| "loss": 0.0455078125, |
| "memory(GiB)": 43.05, |
| "step": 2630, |
| "train_speed(iter/s)": 0.644424 |
| }, |
| { |
| "epoch": 1.7077122488658456, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.606687000976123e-05, |
| "loss": 0.045703125, |
| "memory(GiB)": 43.05, |
| "step": 2635, |
| "train_speed(iter/s)": 0.644588 |
| }, |
| { |
| "epoch": 1.710952689565781, |
| "grad_norm": 3.5625, |
| "learning_rate": 9.604601299346722e-05, |
| "loss": 0.0435546875, |
| "memory(GiB)": 43.05, |
| "step": 2640, |
| "train_speed(iter/s)": 0.64475 |
| }, |
| { |
| "epoch": 1.7141931302657163, |
| "grad_norm": 3.3125, |
| "learning_rate": 9.602510309828574e-05, |
| "loss": 0.06328125, |
| "memory(GiB)": 43.05, |
| "step": 2645, |
| "train_speed(iter/s)": 0.645059 |
| }, |
| { |
| "epoch": 1.7174335709656514, |
| "grad_norm": 13.5, |
| "learning_rate": 9.600414034822954e-05, |
| "loss": 0.03359375, |
| "memory(GiB)": 43.05, |
| "step": 2650, |
| "train_speed(iter/s)": 0.645293 |
| }, |
| { |
| "epoch": 1.7206740116655865, |
| "grad_norm": 3.265625, |
| "learning_rate": 9.598312476737206e-05, |
| "loss": 0.0517578125, |
| "memory(GiB)": 43.05, |
| "step": 2655, |
| "train_speed(iter/s)": 0.645706 |
| }, |
| { |
| "epoch": 1.7239144523655217, |
| "grad_norm": 16.5, |
| "learning_rate": 9.596205637984746e-05, |
| "loss": 0.030859375, |
| "memory(GiB)": 43.05, |
| "step": 2660, |
| "train_speed(iter/s)": 0.645929 |
| }, |
| { |
| "epoch": 1.7271548930654568, |
| "grad_norm": 11.875, |
| "learning_rate": 9.59409352098505e-05, |
| "loss": 0.0455078125, |
| "memory(GiB)": 43.05, |
| "step": 2665, |
| "train_speed(iter/s)": 0.646384 |
| }, |
| { |
| "epoch": 1.7303953337653921, |
| "grad_norm": 13.5625, |
| "learning_rate": 9.591976128163658e-05, |
| "loss": 0.0359375, |
| "memory(GiB)": 43.05, |
| "step": 2670, |
| "train_speed(iter/s)": 0.646461 |
| }, |
| { |
| "epoch": 1.7336357744653272, |
| "grad_norm": 12.4375, |
| "learning_rate": 9.589853461952166e-05, |
| "loss": 0.0390625, |
| "memory(GiB)": 43.05, |
| "step": 2675, |
| "train_speed(iter/s)": 0.646645 |
| }, |
| { |
| "epoch": 1.7368762151652626, |
| "grad_norm": 1.03125, |
| "learning_rate": 9.58772552478823e-05, |
| "loss": 0.051953125, |
| "memory(GiB)": 43.05, |
| "step": 2680, |
| "train_speed(iter/s)": 0.646713 |
| }, |
| { |
| "epoch": 1.7401166558651977, |
| "grad_norm": 10.125, |
| "learning_rate": 9.585592319115553e-05, |
| "loss": 0.04609375, |
| "memory(GiB)": 43.05, |
| "step": 2685, |
| "train_speed(iter/s)": 0.646561 |
| }, |
| { |
| "epoch": 1.7433570965651328, |
| "grad_norm": 4.1875, |
| "learning_rate": 9.583453847383895e-05, |
| "loss": 0.0271484375, |
| "memory(GiB)": 43.05, |
| "step": 2690, |
| "train_speed(iter/s)": 0.647019 |
| }, |
| { |
| "epoch": 1.746597537265068, |
| "grad_norm": 2.21875, |
| "learning_rate": 9.58131011204906e-05, |
| "loss": 0.046484375, |
| "memory(GiB)": 43.05, |
| "step": 2695, |
| "train_speed(iter/s)": 0.647176 |
| }, |
| { |
| "epoch": 1.7498379779650033, |
| "grad_norm": 2.84375, |
| "learning_rate": 9.579161115572898e-05, |
| "loss": 0.0716796875, |
| "memory(GiB)": 43.05, |
| "step": 2700, |
| "train_speed(iter/s)": 0.647045 |
| }, |
| { |
| "epoch": 1.7530784186649384, |
| "grad_norm": 13.375, |
| "learning_rate": 9.577006860423297e-05, |
| "loss": 0.0478515625, |
| "memory(GiB)": 43.05, |
| "step": 2705, |
| "train_speed(iter/s)": 0.647214 |
| }, |
| { |
| "epoch": 1.7563188593648738, |
| "grad_norm": 12.25, |
| "learning_rate": 9.57484734907419e-05, |
| "loss": 0.046484375, |
| "memory(GiB)": 43.05, |
| "step": 2710, |
| "train_speed(iter/s)": 0.647505 |
| }, |
| { |
| "epoch": 1.7595593000648089, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.572682584005541e-05, |
| "loss": 0.0439453125, |
| "memory(GiB)": 43.05, |
| "step": 2715, |
| "train_speed(iter/s)": 0.647532 |
| }, |
| { |
| "epoch": 1.762799740764744, |
| "grad_norm": 10.25, |
| "learning_rate": 9.570512567703352e-05, |
| "loss": 0.0455078125, |
| "memory(GiB)": 43.05, |
| "step": 2720, |
| "train_speed(iter/s)": 0.647616 |
| }, |
| { |
| "epoch": 1.7660401814646791, |
| "grad_norm": 4.03125, |
| "learning_rate": 9.568337302659651e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 2725, |
| "train_speed(iter/s)": 0.647668 |
| }, |
| { |
| "epoch": 1.7692806221646142, |
| "grad_norm": 8.8125, |
| "learning_rate": 9.566156791372498e-05, |
| "loss": 0.043359375, |
| "memory(GiB)": 43.05, |
| "step": 2730, |
| "train_speed(iter/s)": 0.647617 |
| }, |
| { |
| "epoch": 1.7725210628645496, |
| "grad_norm": 2.828125, |
| "learning_rate": 9.563971036345973e-05, |
| "loss": 0.065625, |
| "memory(GiB)": 43.05, |
| "step": 2735, |
| "train_speed(iter/s)": 0.647826 |
| }, |
| { |
| "epoch": 1.775761503564485, |
| "grad_norm": 4.9375, |
| "learning_rate": 9.56178004009018e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 2740, |
| "train_speed(iter/s)": 0.64777 |
| }, |
| { |
| "epoch": 1.77900194426442, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.559583805121246e-05, |
| "loss": 0.04697265625, |
| "memory(GiB)": 43.05, |
| "step": 2745, |
| "train_speed(iter/s)": 0.648228 |
| }, |
| { |
| "epoch": 1.7822423849643552, |
| "grad_norm": 1.875, |
| "learning_rate": 9.557382333961307e-05, |
| "loss": 0.053515625, |
| "memory(GiB)": 43.05, |
| "step": 2750, |
| "train_speed(iter/s)": 0.648208 |
| }, |
| { |
| "epoch": 1.7854828256642903, |
| "grad_norm": 11.75, |
| "learning_rate": 9.555175629138516e-05, |
| "loss": 0.0412109375, |
| "memory(GiB)": 43.05, |
| "step": 2755, |
| "train_speed(iter/s)": 0.648575 |
| }, |
| { |
| "epoch": 1.7887232663642254, |
| "grad_norm": 7.59375, |
| "learning_rate": 9.552963693187034e-05, |
| "loss": 0.05185546875, |
| "memory(GiB)": 43.05, |
| "step": 2760, |
| "train_speed(iter/s)": 0.648868 |
| }, |
| { |
| "epoch": 1.7919637070641607, |
| "grad_norm": 9.875, |
| "learning_rate": 9.550746528647036e-05, |
| "loss": 0.043359375, |
| "memory(GiB)": 43.05, |
| "step": 2765, |
| "train_speed(iter/s)": 0.649151 |
| }, |
| { |
| "epoch": 1.7952041477640959, |
| "grad_norm": 13.5625, |
| "learning_rate": 9.548524138064694e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 2770, |
| "train_speed(iter/s)": 0.649594 |
| }, |
| { |
| "epoch": 1.7984445884640312, |
| "grad_norm": 1.2734375, |
| "learning_rate": 9.546296523992183e-05, |
| "loss": 0.0439453125, |
| "memory(GiB)": 43.05, |
| "step": 2775, |
| "train_speed(iter/s)": 0.650047 |
| }, |
| { |
| "epoch": 1.8016850291639663, |
| "grad_norm": 4.46875, |
| "learning_rate": 9.544063688987681e-05, |
| "loss": 0.0421875, |
| "memory(GiB)": 43.05, |
| "step": 2780, |
| "train_speed(iter/s)": 0.649979 |
| }, |
| { |
| "epoch": 1.8049254698639015, |
| "grad_norm": 8.0, |
| "learning_rate": 9.541825635615356e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 2785, |
| "train_speed(iter/s)": 0.650359 |
| }, |
| { |
| "epoch": 1.8081659105638366, |
| "grad_norm": 0.59375, |
| "learning_rate": 9.539582366445372e-05, |
| "loss": 0.0490234375, |
| "memory(GiB)": 43.05, |
| "step": 2790, |
| "train_speed(iter/s)": 0.650716 |
| }, |
| { |
| "epoch": 1.811406351263772, |
| "grad_norm": 12.375, |
| "learning_rate": 9.537333884053883e-05, |
| "loss": 0.0673828125, |
| "memory(GiB)": 43.05, |
| "step": 2795, |
| "train_speed(iter/s)": 0.650829 |
| }, |
| { |
| "epoch": 1.814646791963707, |
| "grad_norm": 15.625, |
| "learning_rate": 9.535080191023026e-05, |
| "loss": 0.0251953125, |
| "memory(GiB)": 43.05, |
| "step": 2800, |
| "train_speed(iter/s)": 0.650751 |
| }, |
| { |
| "epoch": 1.8178872326636424, |
| "grad_norm": 14.25, |
| "learning_rate": 9.53282128994093e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 2805, |
| "train_speed(iter/s)": 0.650893 |
| }, |
| { |
| "epoch": 1.8211276733635775, |
| "grad_norm": 1.1015625, |
| "learning_rate": 9.530557183401696e-05, |
| "loss": 0.038671875, |
| "memory(GiB)": 43.05, |
| "step": 2810, |
| "train_speed(iter/s)": 0.650918 |
| }, |
| { |
| "epoch": 1.8243681140635126, |
| "grad_norm": 2.9375, |
| "learning_rate": 9.528287874005406e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 2815, |
| "train_speed(iter/s)": 0.650992 |
| }, |
| { |
| "epoch": 1.8276085547634477, |
| "grad_norm": 0.314453125, |
| "learning_rate": 9.526013364358118e-05, |
| "loss": 0.032421875, |
| "memory(GiB)": 43.05, |
| "step": 2820, |
| "train_speed(iter/s)": 0.651478 |
| }, |
| { |
| "epoch": 1.8308489954633829, |
| "grad_norm": 14.125, |
| "learning_rate": 9.523733657071864e-05, |
| "loss": 0.0431640625, |
| "memory(GiB)": 43.05, |
| "step": 2825, |
| "train_speed(iter/s)": 0.651322 |
| }, |
| { |
| "epoch": 1.8340894361633182, |
| "grad_norm": 12.6875, |
| "learning_rate": 9.521448754764639e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 2830, |
| "train_speed(iter/s)": 0.651614 |
| }, |
| { |
| "epoch": 1.8373298768632536, |
| "grad_norm": 2.3125, |
| "learning_rate": 9.519158660060409e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 2835, |
| "train_speed(iter/s)": 0.651857 |
| }, |
| { |
| "epoch": 1.8405703175631887, |
| "grad_norm": 2.921875, |
| "learning_rate": 9.5168633755891e-05, |
| "loss": 0.0419921875, |
| "memory(GiB)": 43.05, |
| "step": 2840, |
| "train_speed(iter/s)": 0.651982 |
| }, |
| { |
| "epoch": 1.8438107582631238, |
| "grad_norm": 1.453125, |
| "learning_rate": 9.514562903986601e-05, |
| "loss": 0.0412109375, |
| "memory(GiB)": 43.05, |
| "step": 2845, |
| "train_speed(iter/s)": 0.652264 |
| }, |
| { |
| "epoch": 1.847051198963059, |
| "grad_norm": 7.8125, |
| "learning_rate": 9.512257247894754e-05, |
| "loss": 0.0283203125, |
| "memory(GiB)": 43.05, |
| "step": 2850, |
| "train_speed(iter/s)": 0.652446 |
| }, |
| { |
| "epoch": 1.850291639662994, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.509946409961356e-05, |
| "loss": 0.0462890625, |
| "memory(GiB)": 43.05, |
| "step": 2855, |
| "train_speed(iter/s)": 0.652627 |
| }, |
| { |
| "epoch": 1.8535320803629294, |
| "grad_norm": 3.625, |
| "learning_rate": 9.50763039284016e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 2860, |
| "train_speed(iter/s)": 0.652836 |
| }, |
| { |
| "epoch": 1.8567725210628645, |
| "grad_norm": 12.5625, |
| "learning_rate": 9.505309199190857e-05, |
| "loss": 0.060546875, |
| "memory(GiB)": 43.05, |
| "step": 2865, |
| "train_speed(iter/s)": 0.653211 |
| }, |
| { |
| "epoch": 1.8600129617627998, |
| "grad_norm": 0.66796875, |
| "learning_rate": 9.50298283167909e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 2870, |
| "train_speed(iter/s)": 0.653502 |
| }, |
| { |
| "epoch": 1.863253402462735, |
| "grad_norm": 11.875, |
| "learning_rate": 9.500651292976444e-05, |
| "loss": 0.0373046875, |
| "memory(GiB)": 43.05, |
| "step": 2875, |
| "train_speed(iter/s)": 0.65399 |
| }, |
| { |
| "epoch": 1.86649384316267, |
| "grad_norm": 9.3125, |
| "learning_rate": 9.498314585760436e-05, |
| "loss": 0.038671875, |
| "memory(GiB)": 43.05, |
| "step": 2880, |
| "train_speed(iter/s)": 0.654499 |
| }, |
| { |
| "epoch": 1.8697342838626052, |
| "grad_norm": 2.359375, |
| "learning_rate": 9.495972712714525e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 2885, |
| "train_speed(iter/s)": 0.654706 |
| }, |
| { |
| "epoch": 1.8729747245625405, |
| "grad_norm": 9.9375, |
| "learning_rate": 9.4936256765281e-05, |
| "loss": 0.0353515625, |
| "memory(GiB)": 43.05, |
| "step": 2890, |
| "train_speed(iter/s)": 0.655062 |
| }, |
| { |
| "epoch": 1.8762151652624757, |
| "grad_norm": 11.5, |
| "learning_rate": 9.491273479896479e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 2895, |
| "train_speed(iter/s)": 0.655055 |
| }, |
| { |
| "epoch": 1.879455605962411, |
| "grad_norm": 0.390625, |
| "learning_rate": 9.488916125520905e-05, |
| "loss": 0.0384765625, |
| "memory(GiB)": 43.05, |
| "step": 2900, |
| "train_speed(iter/s)": 0.655601 |
| }, |
| { |
| "epoch": 1.8826960466623461, |
| "grad_norm": 9.8125, |
| "learning_rate": 9.486553616108547e-05, |
| "loss": 0.0337890625, |
| "memory(GiB)": 43.05, |
| "step": 2905, |
| "train_speed(iter/s)": 0.65603 |
| }, |
| { |
| "epoch": 1.8859364873622813, |
| "grad_norm": 2.78125, |
| "learning_rate": 9.484185954372493e-05, |
| "loss": 0.0451171875, |
| "memory(GiB)": 43.05, |
| "step": 2910, |
| "train_speed(iter/s)": 0.656213 |
| }, |
| { |
| "epoch": 1.8891769280622164, |
| "grad_norm": 1.40625, |
| "learning_rate": 9.481813143031747e-05, |
| "loss": 0.0482421875, |
| "memory(GiB)": 43.05, |
| "step": 2915, |
| "train_speed(iter/s)": 0.65662 |
| }, |
| { |
| "epoch": 1.8924173687621515, |
| "grad_norm": 16.5, |
| "learning_rate": 9.479435184811229e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 2920, |
| "train_speed(iter/s)": 0.656523 |
| }, |
| { |
| "epoch": 1.8956578094620868, |
| "grad_norm": 15.625, |
| "learning_rate": 9.477052082441765e-05, |
| "loss": 0.0271484375, |
| "memory(GiB)": 43.05, |
| "step": 2925, |
| "train_speed(iter/s)": 0.656557 |
| }, |
| { |
| "epoch": 1.8988982501620222, |
| "grad_norm": 9.8125, |
| "learning_rate": 9.474663838660094e-05, |
| "loss": 0.0419921875, |
| "memory(GiB)": 43.05, |
| "step": 2930, |
| "train_speed(iter/s)": 0.656682 |
| }, |
| { |
| "epoch": 1.9021386908619573, |
| "grad_norm": 1.3046875, |
| "learning_rate": 9.472270456208855e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 2935, |
| "train_speed(iter/s)": 0.656969 |
| }, |
| { |
| "epoch": 1.9053791315618924, |
| "grad_norm": 2.890625, |
| "learning_rate": 9.469871937836591e-05, |
| "loss": 0.038671875, |
| "memory(GiB)": 43.05, |
| "step": 2940, |
| "train_speed(iter/s)": 0.657051 |
| }, |
| { |
| "epoch": 1.9086195722618275, |
| "grad_norm": 0.77734375, |
| "learning_rate": 9.467468286297742e-05, |
| "loss": 0.0341796875, |
| "memory(GiB)": 43.05, |
| "step": 2945, |
| "train_speed(iter/s)": 0.657156 |
| }, |
| { |
| "epoch": 1.9118600129617627, |
| "grad_norm": 18.0, |
| "learning_rate": 9.465059504352643e-05, |
| "loss": 0.0607421875, |
| "memory(GiB)": 43.05, |
| "step": 2950, |
| "train_speed(iter/s)": 0.657068 |
| }, |
| { |
| "epoch": 1.915100453661698, |
| "grad_norm": 2.65625, |
| "learning_rate": 9.462645594767519e-05, |
| "loss": 0.041796875, |
| "memory(GiB)": 43.05, |
| "step": 2955, |
| "train_speed(iter/s)": 0.656706 |
| }, |
| { |
| "epoch": 1.9183408943616331, |
| "grad_norm": 0.474609375, |
| "learning_rate": 9.460226560314487e-05, |
| "loss": 0.04296875, |
| "memory(GiB)": 43.05, |
| "step": 2960, |
| "train_speed(iter/s)": 0.656903 |
| }, |
| { |
| "epoch": 1.9215813350615685, |
| "grad_norm": 13.8125, |
| "learning_rate": 9.457802403771548e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 2965, |
| "train_speed(iter/s)": 0.657217 |
| }, |
| { |
| "epoch": 1.9248217757615036, |
| "grad_norm": 10.8125, |
| "learning_rate": 9.455373127922583e-05, |
| "loss": 0.0369140625, |
| "memory(GiB)": 43.05, |
| "step": 2970, |
| "train_speed(iter/s)": 0.657478 |
| }, |
| { |
| "epoch": 1.9280622164614387, |
| "grad_norm": 1.640625, |
| "learning_rate": 9.452938735557355e-05, |
| "loss": 0.04609375, |
| "memory(GiB)": 43.05, |
| "step": 2975, |
| "train_speed(iter/s)": 0.657538 |
| }, |
| { |
| "epoch": 1.9313026571613738, |
| "grad_norm": 1.6640625, |
| "learning_rate": 9.450499229471501e-05, |
| "loss": 0.0353515625, |
| "memory(GiB)": 43.05, |
| "step": 2980, |
| "train_speed(iter/s)": 0.657753 |
| }, |
| { |
| "epoch": 1.9345430978613092, |
| "grad_norm": 1.8828125, |
| "learning_rate": 9.448054612466532e-05, |
| "loss": 0.047265625, |
| "memory(GiB)": 43.05, |
| "step": 2985, |
| "train_speed(iter/s)": 0.657793 |
| }, |
| { |
| "epoch": 1.9377835385612443, |
| "grad_norm": 9.5, |
| "learning_rate": 9.445604887349827e-05, |
| "loss": 0.05703125, |
| "memory(GiB)": 43.05, |
| "step": 2990, |
| "train_speed(iter/s)": 0.658063 |
| }, |
| { |
| "epoch": 1.9410239792611796, |
| "grad_norm": 4.46875, |
| "learning_rate": 9.443150056934631e-05, |
| "loss": 0.0224609375, |
| "memory(GiB)": 43.05, |
| "step": 2995, |
| "train_speed(iter/s)": 0.658252 |
| }, |
| { |
| "epoch": 1.9442644199611148, |
| "grad_norm": 1.1015625, |
| "learning_rate": 9.440690124040051e-05, |
| "loss": 0.0509765625, |
| "memory(GiB)": 43.05, |
| "step": 3000, |
| "train_speed(iter/s)": 0.658655 |
| }, |
| { |
| "epoch": 1.9475048606610499, |
| "grad_norm": 2.484375, |
| "learning_rate": 9.438225091491057e-05, |
| "loss": 0.06015625, |
| "memory(GiB)": 43.05, |
| "step": 3005, |
| "train_speed(iter/s)": 0.65849 |
| }, |
| { |
| "epoch": 1.950745301360985, |
| "grad_norm": 7.71875, |
| "learning_rate": 9.435754962118474e-05, |
| "loss": 0.0361328125, |
| "memory(GiB)": 43.05, |
| "step": 3010, |
| "train_speed(iter/s)": 0.658902 |
| }, |
| { |
| "epoch": 1.9539857420609201, |
| "grad_norm": 3.78125, |
| "learning_rate": 9.433279738758977e-05, |
| "loss": 0.045703125, |
| "memory(GiB)": 43.05, |
| "step": 3015, |
| "train_speed(iter/s)": 0.659047 |
| }, |
| { |
| "epoch": 1.9572261827608555, |
| "grad_norm": 14.1875, |
| "learning_rate": 9.430799424255096e-05, |
| "loss": 0.0373046875, |
| "memory(GiB)": 43.05, |
| "step": 3020, |
| "train_speed(iter/s)": 0.658827 |
| }, |
| { |
| "epoch": 1.9604666234607908, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.428314021455205e-05, |
| "loss": 0.0240234375, |
| "memory(GiB)": 43.05, |
| "step": 3025, |
| "train_speed(iter/s)": 0.658955 |
| }, |
| { |
| "epoch": 1.963707064160726, |
| "grad_norm": 3.859375, |
| "learning_rate": 9.42582353321352e-05, |
| "loss": 0.043359375, |
| "memory(GiB)": 43.05, |
| "step": 3030, |
| "train_speed(iter/s)": 0.658996 |
| }, |
| { |
| "epoch": 1.966947504860661, |
| "grad_norm": 3.890625, |
| "learning_rate": 9.423327962390098e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 3035, |
| "train_speed(iter/s)": 0.659243 |
| }, |
| { |
| "epoch": 1.9701879455605962, |
| "grad_norm": 5.0, |
| "learning_rate": 9.420827311850836e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 3040, |
| "train_speed(iter/s)": 0.659632 |
| }, |
| { |
| "epoch": 1.9734283862605313, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.41832158446746e-05, |
| "loss": 0.062890625, |
| "memory(GiB)": 43.05, |
| "step": 3045, |
| "train_speed(iter/s)": 0.659849 |
| }, |
| { |
| "epoch": 1.9766688269604666, |
| "grad_norm": 1.6953125, |
| "learning_rate": 9.415810783117528e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 3050, |
| "train_speed(iter/s)": 0.660188 |
| }, |
| { |
| "epoch": 1.9799092676604018, |
| "grad_norm": 14.0625, |
| "learning_rate": 9.413294910684426e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 3055, |
| "train_speed(iter/s)": 0.660519 |
| }, |
| { |
| "epoch": 1.983149708360337, |
| "grad_norm": 4.5625, |
| "learning_rate": 9.410773970057362e-05, |
| "loss": 0.0205078125, |
| "memory(GiB)": 43.05, |
| "step": 3060, |
| "train_speed(iter/s)": 0.6607 |
| }, |
| { |
| "epoch": 1.9863901490602722, |
| "grad_norm": 8.375, |
| "learning_rate": 9.408247964131364e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 3065, |
| "train_speed(iter/s)": 0.66096 |
| }, |
| { |
| "epoch": 1.9896305897602073, |
| "grad_norm": 4.3125, |
| "learning_rate": 9.405716895807279e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 3070, |
| "train_speed(iter/s)": 0.661027 |
| }, |
| { |
| "epoch": 1.9928710304601425, |
| "grad_norm": 10.9375, |
| "learning_rate": 9.403180767991767e-05, |
| "loss": 0.046875, |
| "memory(GiB)": 43.05, |
| "step": 3075, |
| "train_speed(iter/s)": 0.661232 |
| }, |
| { |
| "epoch": 1.9961114711600778, |
| "grad_norm": 17.625, |
| "learning_rate": 9.400639583597296e-05, |
| "loss": 0.0486328125, |
| "memory(GiB)": 43.05, |
| "step": 3080, |
| "train_speed(iter/s)": 0.660635 |
| }, |
| { |
| "epoch": 1.999351911860013, |
| "grad_norm": 5.3125, |
| "learning_rate": 9.398093345542144e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 3085, |
| "train_speed(iter/s)": 0.660788 |
| }, |
| { |
| "epoch": 2.0025923525599483, |
| "grad_norm": 5.03125, |
| "learning_rate": 9.395542056750391e-05, |
| "loss": 0.038671875, |
| "memory(GiB)": 43.05, |
| "step": 3090, |
| "train_speed(iter/s)": 0.660896 |
| }, |
| { |
| "epoch": 2.0058327932598834, |
| "grad_norm": 3.375, |
| "learning_rate": 9.392985720151915e-05, |
| "loss": 0.0544921875, |
| "memory(GiB)": 43.05, |
| "step": 3095, |
| "train_speed(iter/s)": 0.661222 |
| }, |
| { |
| "epoch": 2.0090732339598185, |
| "grad_norm": 12.8125, |
| "learning_rate": 9.390424338682396e-05, |
| "loss": 0.03798828125, |
| "memory(GiB)": 43.05, |
| "step": 3100, |
| "train_speed(iter/s)": 0.661405 |
| }, |
| { |
| "epoch": 2.0123136746597536, |
| "grad_norm": 1.015625, |
| "learning_rate": 9.387857915283304e-05, |
| "loss": 0.02734375, |
| "memory(GiB)": 43.05, |
| "step": 3105, |
| "train_speed(iter/s)": 0.661361 |
| }, |
| { |
| "epoch": 2.0155541153596888, |
| "grad_norm": 13.375, |
| "learning_rate": 9.385286452901902e-05, |
| "loss": 0.0447265625, |
| "memory(GiB)": 43.05, |
| "step": 3110, |
| "train_speed(iter/s)": 0.661828 |
| }, |
| { |
| "epoch": 2.0187945560596243, |
| "grad_norm": 2.828125, |
| "learning_rate": 9.382709954491235e-05, |
| "loss": 0.052734375, |
| "memory(GiB)": 43.05, |
| "step": 3115, |
| "train_speed(iter/s)": 0.661605 |
| }, |
| { |
| "epoch": 2.0220349967595594, |
| "grad_norm": 6.9375, |
| "learning_rate": 9.380128423010133e-05, |
| "loss": 0.03203125, |
| "memory(GiB)": 43.05, |
| "step": 3120, |
| "train_speed(iter/s)": 0.661861 |
| }, |
| { |
| "epoch": 2.0252754374594946, |
| "grad_norm": 1.671875, |
| "learning_rate": 9.377541861423211e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 3125, |
| "train_speed(iter/s)": 0.662028 |
| }, |
| { |
| "epoch": 2.0285158781594297, |
| "grad_norm": 1.2109375, |
| "learning_rate": 9.374950272700851e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 3130, |
| "train_speed(iter/s)": 0.662475 |
| }, |
| { |
| "epoch": 2.031756318859365, |
| "grad_norm": 17.0, |
| "learning_rate": 9.37235365981922e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 3135, |
| "train_speed(iter/s)": 0.662552 |
| }, |
| { |
| "epoch": 2.0349967595593, |
| "grad_norm": 7.75, |
| "learning_rate": 9.369752025760243e-05, |
| "loss": 0.0185546875, |
| "memory(GiB)": 43.05, |
| "step": 3140, |
| "train_speed(iter/s)": 0.662818 |
| }, |
| { |
| "epoch": 2.038237200259235, |
| "grad_norm": 0.97265625, |
| "learning_rate": 9.367145373511619e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 3145, |
| "train_speed(iter/s)": 0.662597 |
| }, |
| { |
| "epoch": 2.0414776409591706, |
| "grad_norm": 2.1875, |
| "learning_rate": 9.364533706066807e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 3150, |
| "train_speed(iter/s)": 0.662557 |
| }, |
| { |
| "epoch": 2.0447180816591057, |
| "grad_norm": 0.59765625, |
| "learning_rate": 9.361917026425025e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 3155, |
| "train_speed(iter/s)": 0.662682 |
| }, |
| { |
| "epoch": 2.047958522359041, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.35929533759125e-05, |
| "loss": 0.0455078125, |
| "memory(GiB)": 43.05, |
| "step": 3160, |
| "train_speed(iter/s)": 0.662887 |
| }, |
| { |
| "epoch": 2.051198963058976, |
| "grad_norm": 9.75, |
| "learning_rate": 9.356668642576205e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 3165, |
| "train_speed(iter/s)": 0.663122 |
| }, |
| { |
| "epoch": 2.054439403758911, |
| "grad_norm": 4.84375, |
| "learning_rate": 9.354036944396372e-05, |
| "loss": 0.022265625, |
| "memory(GiB)": 43.05, |
| "step": 3170, |
| "train_speed(iter/s)": 0.663428 |
| }, |
| { |
| "epoch": 2.057679844458846, |
| "grad_norm": 13.6875, |
| "learning_rate": 9.351400246073969e-05, |
| "loss": 0.0275390625, |
| "memory(GiB)": 43.05, |
| "step": 3175, |
| "train_speed(iter/s)": 0.663244 |
| }, |
| { |
| "epoch": 2.060920285158782, |
| "grad_norm": 11.125, |
| "learning_rate": 9.34875855063696e-05, |
| "loss": 0.04140625, |
| "memory(GiB)": 43.05, |
| "step": 3180, |
| "train_speed(iter/s)": 0.663705 |
| }, |
| { |
| "epoch": 2.064160725858717, |
| "grad_norm": 1.34375, |
| "learning_rate": 9.346111861119051e-05, |
| "loss": 0.048046875, |
| "memory(GiB)": 43.05, |
| "step": 3185, |
| "train_speed(iter/s)": 0.663841 |
| }, |
| { |
| "epoch": 2.067401166558652, |
| "grad_norm": 15.4375, |
| "learning_rate": 9.343460180559678e-05, |
| "loss": 0.01953125, |
| "memory(GiB)": 43.05, |
| "step": 3190, |
| "train_speed(iter/s)": 0.664087 |
| }, |
| { |
| "epoch": 2.070641607258587, |
| "grad_norm": 6.15625, |
| "learning_rate": 9.340803512004008e-05, |
| "loss": 0.0240234375, |
| "memory(GiB)": 43.05, |
| "step": 3195, |
| "train_speed(iter/s)": 0.664161 |
| }, |
| { |
| "epoch": 2.0738820479585223, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.338141858502944e-05, |
| "loss": 0.02578125, |
| "memory(GiB)": 43.05, |
| "step": 3200, |
| "train_speed(iter/s)": 0.664059 |
| }, |
| { |
| "epoch": 2.0771224886584574, |
| "grad_norm": 12.875, |
| "learning_rate": 9.335475223113104e-05, |
| "loss": 0.0486328125, |
| "memory(GiB)": 43.05, |
| "step": 3205, |
| "train_speed(iter/s)": 0.664006 |
| }, |
| { |
| "epoch": 2.080362929358393, |
| "grad_norm": 3.984375, |
| "learning_rate": 9.332803608896835e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 3210, |
| "train_speed(iter/s)": 0.664025 |
| }, |
| { |
| "epoch": 2.083603370058328, |
| "grad_norm": 1.78125, |
| "learning_rate": 9.330127018922194e-05, |
| "loss": 0.03828125, |
| "memory(GiB)": 43.05, |
| "step": 3215, |
| "train_speed(iter/s)": 0.664065 |
| }, |
| { |
| "epoch": 2.086843810758263, |
| "grad_norm": 3.09375, |
| "learning_rate": 9.32744545626296e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 3220, |
| "train_speed(iter/s)": 0.66432 |
| }, |
| { |
| "epoch": 2.0900842514581983, |
| "grad_norm": 5.15625, |
| "learning_rate": 9.324758923998617e-05, |
| "loss": 0.044921875, |
| "memory(GiB)": 43.05, |
| "step": 3225, |
| "train_speed(iter/s)": 0.664349 |
| }, |
| { |
| "epoch": 2.0933246921581334, |
| "grad_norm": 12.5, |
| "learning_rate": 9.32206742521436e-05, |
| "loss": 0.045703125, |
| "memory(GiB)": 43.05, |
| "step": 3230, |
| "train_speed(iter/s)": 0.664254 |
| }, |
| { |
| "epoch": 2.0965651328580686, |
| "grad_norm": 0.625, |
| "learning_rate": 9.319370963001084e-05, |
| "loss": 0.0208984375, |
| "memory(GiB)": 43.05, |
| "step": 3235, |
| "train_speed(iter/s)": 0.664253 |
| }, |
| { |
| "epoch": 2.0998055735580037, |
| "grad_norm": 6.84375, |
| "learning_rate": 9.316669540455386e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 3240, |
| "train_speed(iter/s)": 0.664384 |
| }, |
| { |
| "epoch": 2.1030460142579392, |
| "grad_norm": 1.390625, |
| "learning_rate": 9.313963160679557e-05, |
| "loss": 0.0505859375, |
| "memory(GiB)": 43.05, |
| "step": 3245, |
| "train_speed(iter/s)": 0.664557 |
| }, |
| { |
| "epoch": 2.1062864549578744, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.311251826781587e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 3250, |
| "train_speed(iter/s)": 0.664798 |
| }, |
| { |
| "epoch": 2.1095268956578095, |
| "grad_norm": 5.40625, |
| "learning_rate": 9.308535541875146e-05, |
| "loss": 0.0369140625, |
| "memory(GiB)": 43.05, |
| "step": 3255, |
| "train_speed(iter/s)": 0.664872 |
| }, |
| { |
| "epoch": 2.1127673363577446, |
| "grad_norm": 10.25, |
| "learning_rate": 9.3058143090796e-05, |
| "loss": 0.0228515625, |
| "memory(GiB)": 43.05, |
| "step": 3260, |
| "train_speed(iter/s)": 0.665106 |
| }, |
| { |
| "epoch": 2.1160077770576797, |
| "grad_norm": 2.390625, |
| "learning_rate": 9.303088131519986e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 3265, |
| "train_speed(iter/s)": 0.665408 |
| }, |
| { |
| "epoch": 2.119248217757615, |
| "grad_norm": 13.3125, |
| "learning_rate": 9.300357012327031e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 3270, |
| "train_speed(iter/s)": 0.665595 |
| }, |
| { |
| "epoch": 2.1224886584575504, |
| "grad_norm": 1.2734375, |
| "learning_rate": 9.297620954637126e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 3275, |
| "train_speed(iter/s)": 0.665503 |
| }, |
| { |
| "epoch": 2.1257290991574855, |
| "grad_norm": 0.474609375, |
| "learning_rate": 9.294879961592342e-05, |
| "loss": 0.029296875, |
| "memory(GiB)": 43.05, |
| "step": 3280, |
| "train_speed(iter/s)": 0.665823 |
| }, |
| { |
| "epoch": 2.1289695398574207, |
| "grad_norm": 16.75, |
| "learning_rate": 9.292134036340414e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 3285, |
| "train_speed(iter/s)": 0.66599 |
| }, |
| { |
| "epoch": 2.1322099805573558, |
| "grad_norm": 7.53125, |
| "learning_rate": 9.28938318203474e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 3290, |
| "train_speed(iter/s)": 0.66602 |
| }, |
| { |
| "epoch": 2.135450421257291, |
| "grad_norm": 13.3125, |
| "learning_rate": 9.286627401834385e-05, |
| "loss": 0.04140625, |
| "memory(GiB)": 43.05, |
| "step": 3295, |
| "train_speed(iter/s)": 0.666393 |
| }, |
| { |
| "epoch": 2.138690861957226, |
| "grad_norm": 12.4375, |
| "learning_rate": 9.283866698904059e-05, |
| "loss": 0.0455078125, |
| "memory(GiB)": 43.05, |
| "step": 3300, |
| "train_speed(iter/s)": 0.666836 |
| }, |
| { |
| "epoch": 2.141931302657161, |
| "grad_norm": 1.8828125, |
| "learning_rate": 9.281101076414133e-05, |
| "loss": 0.024609375, |
| "memory(GiB)": 43.05, |
| "step": 3305, |
| "train_speed(iter/s)": 0.666932 |
| }, |
| { |
| "epoch": 2.1451717433570967, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.278330537540631e-05, |
| "loss": 0.03203125, |
| "memory(GiB)": 43.05, |
| "step": 3310, |
| "train_speed(iter/s)": 0.667343 |
| }, |
| { |
| "epoch": 2.148412184057032, |
| "grad_norm": 13.125, |
| "learning_rate": 9.275555085465215e-05, |
| "loss": 0.034765625, |
| "memory(GiB)": 43.05, |
| "step": 3315, |
| "train_speed(iter/s)": 0.667436 |
| }, |
| { |
| "epoch": 2.151652624756967, |
| "grad_norm": 0.71875, |
| "learning_rate": 9.272774723375195e-05, |
| "loss": 0.0556640625, |
| "memory(GiB)": 43.05, |
| "step": 3320, |
| "train_speed(iter/s)": 0.66775 |
| }, |
| { |
| "epoch": 2.154893065456902, |
| "grad_norm": 11.6875, |
| "learning_rate": 9.269989454463514e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 3325, |
| "train_speed(iter/s)": 0.667811 |
| }, |
| { |
| "epoch": 2.158133506156837, |
| "grad_norm": 10.25, |
| "learning_rate": 9.267199281928758e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 3330, |
| "train_speed(iter/s)": 0.668054 |
| }, |
| { |
| "epoch": 2.1613739468567728, |
| "grad_norm": 9.375, |
| "learning_rate": 9.264404208975136e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 3335, |
| "train_speed(iter/s)": 0.668113 |
| }, |
| { |
| "epoch": 2.164614387556708, |
| "grad_norm": 2.296875, |
| "learning_rate": 9.26160423881249e-05, |
| "loss": 0.07109375, |
| "memory(GiB)": 43.05, |
| "step": 3340, |
| "train_speed(iter/s)": 0.668019 |
| }, |
| { |
| "epoch": 2.167854828256643, |
| "grad_norm": 7.28125, |
| "learning_rate": 9.258799374656286e-05, |
| "loss": 0.04296875, |
| "memory(GiB)": 43.05, |
| "step": 3345, |
| "train_speed(iter/s)": 0.668181 |
| }, |
| { |
| "epoch": 2.171095268956578, |
| "grad_norm": 12.375, |
| "learning_rate": 9.255989619727605e-05, |
| "loss": 0.0552734375, |
| "memory(GiB)": 43.05, |
| "step": 3350, |
| "train_speed(iter/s)": 0.668287 |
| }, |
| { |
| "epoch": 2.1743357096565132, |
| "grad_norm": 0.58984375, |
| "learning_rate": 9.25317497725315e-05, |
| "loss": 0.030859375, |
| "memory(GiB)": 43.05, |
| "step": 3355, |
| "train_speed(iter/s)": 0.668304 |
| }, |
| { |
| "epoch": 2.1775761503564484, |
| "grad_norm": 1.609375, |
| "learning_rate": 9.250355450465232e-05, |
| "loss": 0.020703125, |
| "memory(GiB)": 43.05, |
| "step": 3360, |
| "train_speed(iter/s)": 0.668607 |
| }, |
| { |
| "epoch": 2.1808165910563835, |
| "grad_norm": 6.28125, |
| "learning_rate": 9.247531042601777e-05, |
| "loss": 0.0376953125, |
| "memory(GiB)": 43.05, |
| "step": 3365, |
| "train_speed(iter/s)": 0.668622 |
| }, |
| { |
| "epoch": 2.184057031756319, |
| "grad_norm": 4.09375, |
| "learning_rate": 9.244701756906314e-05, |
| "loss": 0.0283203125, |
| "memory(GiB)": 43.05, |
| "step": 3370, |
| "train_speed(iter/s)": 0.66889 |
| }, |
| { |
| "epoch": 2.187297472456254, |
| "grad_norm": 2.921875, |
| "learning_rate": 9.241867596627969e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 3375, |
| "train_speed(iter/s)": 0.669121 |
| }, |
| { |
| "epoch": 2.1905379131561893, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.239028565021472e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 3380, |
| "train_speed(iter/s)": 0.669339 |
| }, |
| { |
| "epoch": 2.1937783538561244, |
| "grad_norm": 12.0, |
| "learning_rate": 9.236184665347147e-05, |
| "loss": 0.02578125, |
| "memory(GiB)": 43.05, |
| "step": 3385, |
| "train_speed(iter/s)": 0.6695 |
| }, |
| { |
| "epoch": 2.1970187945560595, |
| "grad_norm": 11.9375, |
| "learning_rate": 9.233335900870906e-05, |
| "loss": 0.0359375, |
| "memory(GiB)": 43.05, |
| "step": 3390, |
| "train_speed(iter/s)": 0.669846 |
| }, |
| { |
| "epoch": 2.2002592352559946, |
| "grad_norm": 14.6875, |
| "learning_rate": 9.230482274864244e-05, |
| "loss": 0.04296875, |
| "memory(GiB)": 43.05, |
| "step": 3395, |
| "train_speed(iter/s)": 0.669786 |
| }, |
| { |
| "epoch": 2.20349967595593, |
| "grad_norm": 15.1875, |
| "learning_rate": 9.227623790604248e-05, |
| "loss": 0.0337890625, |
| "memory(GiB)": 43.05, |
| "step": 3400, |
| "train_speed(iter/s)": 0.669768 |
| }, |
| { |
| "epoch": 2.2067401166558653, |
| "grad_norm": 20.375, |
| "learning_rate": 9.224760451373575e-05, |
| "loss": 0.044921875, |
| "memory(GiB)": 43.05, |
| "step": 3405, |
| "train_speed(iter/s)": 0.669685 |
| }, |
| { |
| "epoch": 2.2099805573558005, |
| "grad_norm": 1.5078125, |
| "learning_rate": 9.221892260460467e-05, |
| "loss": 0.03828125, |
| "memory(GiB)": 43.05, |
| "step": 3410, |
| "train_speed(iter/s)": 0.670045 |
| }, |
| { |
| "epoch": 2.2132209980557356, |
| "grad_norm": 10.0625, |
| "learning_rate": 9.219019221158729e-05, |
| "loss": 0.047265625, |
| "memory(GiB)": 43.05, |
| "step": 3415, |
| "train_speed(iter/s)": 0.670012 |
| }, |
| { |
| "epoch": 2.2164614387556707, |
| "grad_norm": 12.5625, |
| "learning_rate": 9.216141336767738e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 3420, |
| "train_speed(iter/s)": 0.669962 |
| }, |
| { |
| "epoch": 2.219701879455606, |
| "grad_norm": 4.375, |
| "learning_rate": 9.213258610592435e-05, |
| "loss": 0.03974609375, |
| "memory(GiB)": 43.05, |
| "step": 3425, |
| "train_speed(iter/s)": 0.67006 |
| }, |
| { |
| "epoch": 2.222942320155541, |
| "grad_norm": 3.265625, |
| "learning_rate": 9.210371045943318e-05, |
| "loss": 0.04013671875, |
| "memory(GiB)": 43.05, |
| "step": 3430, |
| "train_speed(iter/s)": 0.670476 |
| }, |
| { |
| "epoch": 2.2261827608554765, |
| "grad_norm": 13.875, |
| "learning_rate": 9.207478646136447e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 3435, |
| "train_speed(iter/s)": 0.670632 |
| }, |
| { |
| "epoch": 2.2294232015554116, |
| "grad_norm": 5.90625, |
| "learning_rate": 9.204581414493432e-05, |
| "loss": 0.0474609375, |
| "memory(GiB)": 43.05, |
| "step": 3440, |
| "train_speed(iter/s)": 0.670764 |
| }, |
| { |
| "epoch": 2.2326636422553467, |
| "grad_norm": 8.25, |
| "learning_rate": 9.201679354341428e-05, |
| "loss": 0.055078125, |
| "memory(GiB)": 43.05, |
| "step": 3445, |
| "train_speed(iter/s)": 0.671001 |
| }, |
| { |
| "epoch": 2.235904082955282, |
| "grad_norm": 16.0, |
| "learning_rate": 9.198772469013142e-05, |
| "loss": 0.0447265625, |
| "memory(GiB)": 43.05, |
| "step": 3450, |
| "train_speed(iter/s)": 0.67115 |
| }, |
| { |
| "epoch": 2.239144523655217, |
| "grad_norm": 0.49609375, |
| "learning_rate": 9.195860761846817e-05, |
| "loss": 0.0419921875, |
| "memory(GiB)": 43.05, |
| "step": 3455, |
| "train_speed(iter/s)": 0.671323 |
| }, |
| { |
| "epoch": 2.242384964355152, |
| "grad_norm": 1.796875, |
| "learning_rate": 9.192944236186236e-05, |
| "loss": 0.024609375, |
| "memory(GiB)": 43.05, |
| "step": 3460, |
| "train_speed(iter/s)": 0.671383 |
| }, |
| { |
| "epoch": 2.2456254050550877, |
| "grad_norm": 0.54296875, |
| "learning_rate": 9.190022895380714e-05, |
| "loss": 0.0396484375, |
| "memory(GiB)": 43.05, |
| "step": 3465, |
| "train_speed(iter/s)": 0.671342 |
| }, |
| { |
| "epoch": 2.248865845755023, |
| "grad_norm": 12.4375, |
| "learning_rate": 9.187096742785098e-05, |
| "loss": 0.0736328125, |
| "memory(GiB)": 43.05, |
| "step": 3470, |
| "train_speed(iter/s)": 0.671551 |
| }, |
| { |
| "epoch": 2.252106286454958, |
| "grad_norm": 18.0, |
| "learning_rate": 9.184165781759757e-05, |
| "loss": 0.0462890625, |
| "memory(GiB)": 43.05, |
| "step": 3475, |
| "train_speed(iter/s)": 0.671532 |
| }, |
| { |
| "epoch": 2.255346727154893, |
| "grad_norm": 2.578125, |
| "learning_rate": 9.181230015670583e-05, |
| "loss": 0.0369140625, |
| "memory(GiB)": 43.05, |
| "step": 3480, |
| "train_speed(iter/s)": 0.671873 |
| }, |
| { |
| "epoch": 2.258587167854828, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.178289447888992e-05, |
| "loss": 0.0486328125, |
| "memory(GiB)": 43.05, |
| "step": 3485, |
| "train_speed(iter/s)": 0.671924 |
| }, |
| { |
| "epoch": 2.2618276085547633, |
| "grad_norm": 10.6875, |
| "learning_rate": 9.175344081791906e-05, |
| "loss": 0.0341796875, |
| "memory(GiB)": 43.05, |
| "step": 3490, |
| "train_speed(iter/s)": 0.672205 |
| }, |
| { |
| "epoch": 2.2650680492546984, |
| "grad_norm": 1.5859375, |
| "learning_rate": 9.17239392076176e-05, |
| "loss": 0.04609375, |
| "memory(GiB)": 43.05, |
| "step": 3495, |
| "train_speed(iter/s)": 0.672161 |
| }, |
| { |
| "epoch": 2.268308489954634, |
| "grad_norm": 17.875, |
| "learning_rate": 9.169438968186499e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 3500, |
| "train_speed(iter/s)": 0.672368 |
| }, |
| { |
| "epoch": 2.271548930654569, |
| "grad_norm": 10.6875, |
| "learning_rate": 9.166479227459567e-05, |
| "loss": 0.0412109375, |
| "memory(GiB)": 43.05, |
| "step": 3505, |
| "train_speed(iter/s)": 0.67252 |
| }, |
| { |
| "epoch": 2.274789371354504, |
| "grad_norm": 2.96875, |
| "learning_rate": 9.163514701979904e-05, |
| "loss": 0.0337890625, |
| "memory(GiB)": 43.05, |
| "step": 3510, |
| "train_speed(iter/s)": 0.672228 |
| }, |
| { |
| "epoch": 2.2780298120544393, |
| "grad_norm": 8.5, |
| "learning_rate": 9.160545395151955e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 3515, |
| "train_speed(iter/s)": 0.67226 |
| }, |
| { |
| "epoch": 2.2812702527543745, |
| "grad_norm": 3.96875, |
| "learning_rate": 9.157571310385644e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 3520, |
| "train_speed(iter/s)": 0.672291 |
| }, |
| { |
| "epoch": 2.28451069345431, |
| "grad_norm": 10.5625, |
| "learning_rate": 9.154592451096388e-05, |
| "loss": 0.0546875, |
| "memory(GiB)": 43.05, |
| "step": 3525, |
| "train_speed(iter/s)": 0.672127 |
| }, |
| { |
| "epoch": 2.287751134154245, |
| "grad_norm": 11.4375, |
| "learning_rate": 9.151608820705087e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 3530, |
| "train_speed(iter/s)": 0.672413 |
| }, |
| { |
| "epoch": 2.2909915748541803, |
| "grad_norm": 2.984375, |
| "learning_rate": 9.148620422638119e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 3535, |
| "train_speed(iter/s)": 0.672445 |
| }, |
| { |
| "epoch": 2.2942320155541154, |
| "grad_norm": 9.875, |
| "learning_rate": 9.145627260327338e-05, |
| "loss": 0.0478515625, |
| "memory(GiB)": 43.05, |
| "step": 3540, |
| "train_speed(iter/s)": 0.6724 |
| }, |
| { |
| "epoch": 2.2974724562540505, |
| "grad_norm": 0.51953125, |
| "learning_rate": 9.142629337210066e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 3545, |
| "train_speed(iter/s)": 0.672421 |
| }, |
| { |
| "epoch": 2.3007128969539856, |
| "grad_norm": 10.0, |
| "learning_rate": 9.139626656729099e-05, |
| "loss": 0.034765625, |
| "memory(GiB)": 43.05, |
| "step": 3550, |
| "train_speed(iter/s)": 0.672505 |
| }, |
| { |
| "epoch": 2.3039533376539207, |
| "grad_norm": 6.78125, |
| "learning_rate": 9.136619222332687e-05, |
| "loss": 0.02265625, |
| "memory(GiB)": 43.05, |
| "step": 3555, |
| "train_speed(iter/s)": 0.672758 |
| }, |
| { |
| "epoch": 2.3071937783538563, |
| "grad_norm": 12.5625, |
| "learning_rate": 9.13360703747455e-05, |
| "loss": 0.045703125, |
| "memory(GiB)": 43.05, |
| "step": 3560, |
| "train_speed(iter/s)": 0.672888 |
| }, |
| { |
| "epoch": 2.3104342190537914, |
| "grad_norm": 11.1875, |
| "learning_rate": 9.130590105613854e-05, |
| "loss": 0.0228515625, |
| "memory(GiB)": 43.05, |
| "step": 3565, |
| "train_speed(iter/s)": 0.673289 |
| }, |
| { |
| "epoch": 2.3136746597537265, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.127568430215222e-05, |
| "loss": 0.0341796875, |
| "memory(GiB)": 43.05, |
| "step": 3570, |
| "train_speed(iter/s)": 0.67355 |
| }, |
| { |
| "epoch": 2.3169151004536617, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.124542014748723e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 3575, |
| "train_speed(iter/s)": 0.673451 |
| }, |
| { |
| "epoch": 2.320155541153597, |
| "grad_norm": 1.53125, |
| "learning_rate": 9.121510862689868e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 3580, |
| "train_speed(iter/s)": 0.673597 |
| }, |
| { |
| "epoch": 2.323395981853532, |
| "grad_norm": 10.875, |
| "learning_rate": 9.118474977519611e-05, |
| "loss": 0.0349609375, |
| "memory(GiB)": 43.05, |
| "step": 3585, |
| "train_speed(iter/s)": 0.673586 |
| }, |
| { |
| "epoch": 2.3266364225534675, |
| "grad_norm": 2.671875, |
| "learning_rate": 9.115434362724337e-05, |
| "loss": 0.0501953125, |
| "memory(GiB)": 43.05, |
| "step": 3590, |
| "train_speed(iter/s)": 0.673664 |
| }, |
| { |
| "epoch": 2.3298768632534026, |
| "grad_norm": 16.5, |
| "learning_rate": 9.112389021795865e-05, |
| "loss": 0.0392578125, |
| "memory(GiB)": 43.05, |
| "step": 3595, |
| "train_speed(iter/s)": 0.673618 |
| }, |
| { |
| "epoch": 2.3331173039533377, |
| "grad_norm": 10.375, |
| "learning_rate": 9.109338958231441e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 3600, |
| "train_speed(iter/s)": 0.673742 |
| }, |
| { |
| "epoch": 2.336357744653273, |
| "grad_norm": 3.3125, |
| "learning_rate": 9.106284175533737e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 3605, |
| "train_speed(iter/s)": 0.673942 |
| }, |
| { |
| "epoch": 2.339598185353208, |
| "grad_norm": 11.75, |
| "learning_rate": 9.10322467721084e-05, |
| "loss": 0.03203125, |
| "memory(GiB)": 43.05, |
| "step": 3610, |
| "train_speed(iter/s)": 0.674325 |
| }, |
| { |
| "epoch": 2.342838626053143, |
| "grad_norm": 2.5625, |
| "learning_rate": 9.100160466776252e-05, |
| "loss": 0.03203125, |
| "memory(GiB)": 43.05, |
| "step": 3615, |
| "train_speed(iter/s)": 0.674276 |
| }, |
| { |
| "epoch": 2.346079066753078, |
| "grad_norm": 6.9375, |
| "learning_rate": 9.097091547748893e-05, |
| "loss": 0.0228515625, |
| "memory(GiB)": 43.05, |
| "step": 3620, |
| "train_speed(iter/s)": 0.674489 |
| }, |
| { |
| "epoch": 2.3493195074530138, |
| "grad_norm": 10.5625, |
| "learning_rate": 9.094017923653084e-05, |
| "loss": 0.0455078125, |
| "memory(GiB)": 43.05, |
| "step": 3625, |
| "train_speed(iter/s)": 0.67489 |
| }, |
| { |
| "epoch": 2.352559948152949, |
| "grad_norm": 3.359375, |
| "learning_rate": 9.090939598018551e-05, |
| "loss": 0.0244140625, |
| "memory(GiB)": 43.05, |
| "step": 3630, |
| "train_speed(iter/s)": 0.675088 |
| }, |
| { |
| "epoch": 2.355800388852884, |
| "grad_norm": 11.4375, |
| "learning_rate": 9.08785657438042e-05, |
| "loss": 0.0556640625, |
| "memory(GiB)": 43.05, |
| "step": 3635, |
| "train_speed(iter/s)": 0.675286 |
| }, |
| { |
| "epoch": 2.359040829552819, |
| "grad_norm": 12.6875, |
| "learning_rate": 9.084768856279212e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 3640, |
| "train_speed(iter/s)": 0.675561 |
| }, |
| { |
| "epoch": 2.3622812702527543, |
| "grad_norm": 10.125, |
| "learning_rate": 9.081676447260838e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 3645, |
| "train_speed(iter/s)": 0.675396 |
| }, |
| { |
| "epoch": 2.3655217109526894, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.078579350876597e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 3650, |
| "train_speed(iter/s)": 0.675615 |
| }, |
| { |
| "epoch": 2.368762151652625, |
| "grad_norm": 1.2578125, |
| "learning_rate": 9.075477570683171e-05, |
| "loss": 0.0353515625, |
| "memory(GiB)": 43.05, |
| "step": 3655, |
| "train_speed(iter/s)": 0.675357 |
| }, |
| { |
| "epoch": 2.37200259235256, |
| "grad_norm": 4.125, |
| "learning_rate": 9.072371110242622e-05, |
| "loss": 0.0337890625, |
| "memory(GiB)": 43.05, |
| "step": 3660, |
| "train_speed(iter/s)": 0.675373 |
| }, |
| { |
| "epoch": 2.375243033052495, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.069259973122382e-05, |
| "loss": 0.0751953125, |
| "memory(GiB)": 43.05, |
| "step": 3665, |
| "train_speed(iter/s)": 0.675584 |
| }, |
| { |
| "epoch": 2.3784834737524303, |
| "grad_norm": 3.484375, |
| "learning_rate": 9.066144162895258e-05, |
| "loss": 0.029296875, |
| "memory(GiB)": 43.05, |
| "step": 3670, |
| "train_speed(iter/s)": 0.675409 |
| }, |
| { |
| "epoch": 2.3817239144523654, |
| "grad_norm": 13.25, |
| "learning_rate": 9.063023683139425e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 3675, |
| "train_speed(iter/s)": 0.675653 |
| }, |
| { |
| "epoch": 2.3849643551523005, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.059898537438415e-05, |
| "loss": 0.02578125, |
| "memory(GiB)": 43.05, |
| "step": 3680, |
| "train_speed(iter/s)": 0.675869 |
| }, |
| { |
| "epoch": 2.3882047958522357, |
| "grad_norm": 5.21875, |
| "learning_rate": 9.056768729381122e-05, |
| "loss": 0.052734375, |
| "memory(GiB)": 43.05, |
| "step": 3685, |
| "train_speed(iter/s)": 0.676039 |
| }, |
| { |
| "epoch": 2.3914452365521712, |
| "grad_norm": 15.3125, |
| "learning_rate": 9.053634262561794e-05, |
| "loss": 0.0232421875, |
| "memory(GiB)": 43.05, |
| "step": 3690, |
| "train_speed(iter/s)": 0.675961 |
| }, |
| { |
| "epoch": 2.3946856772521063, |
| "grad_norm": 4.90625, |
| "learning_rate": 9.050495140580029e-05, |
| "loss": 0.040625, |
| "memory(GiB)": 43.05, |
| "step": 3695, |
| "train_speed(iter/s)": 0.676292 |
| }, |
| { |
| "epoch": 2.3979261179520415, |
| "grad_norm": 15.375, |
| "learning_rate": 9.047351367040771e-05, |
| "loss": 0.0431640625, |
| "memory(GiB)": 43.05, |
| "step": 3700, |
| "train_speed(iter/s)": 0.676121 |
| }, |
| { |
| "epoch": 2.4011665586519766, |
| "grad_norm": 10.375, |
| "learning_rate": 9.044202945554302e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 3705, |
| "train_speed(iter/s)": 0.676301 |
| }, |
| { |
| "epoch": 2.4044069993519117, |
| "grad_norm": 14.625, |
| "learning_rate": 9.041049879736251e-05, |
| "loss": 0.0400390625, |
| "memory(GiB)": 43.05, |
| "step": 3710, |
| "train_speed(iter/s)": 0.676253 |
| }, |
| { |
| "epoch": 2.4076474400518473, |
| "grad_norm": 12.9375, |
| "learning_rate": 9.03789217320757e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 3715, |
| "train_speed(iter/s)": 0.676346 |
| }, |
| { |
| "epoch": 2.4108878807517824, |
| "grad_norm": 13.5625, |
| "learning_rate": 9.034729829594543e-05, |
| "loss": 0.0296875, |
| "memory(GiB)": 43.05, |
| "step": 3720, |
| "train_speed(iter/s)": 0.676561 |
| }, |
| { |
| "epoch": 2.4141283214517175, |
| "grad_norm": 4.75, |
| "learning_rate": 9.031562852528788e-05, |
| "loss": 0.0267578125, |
| "memory(GiB)": 43.05, |
| "step": 3725, |
| "train_speed(iter/s)": 0.67666 |
| }, |
| { |
| "epoch": 2.4173687621516526, |
| "grad_norm": 0.546875, |
| "learning_rate": 9.028391245647232e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 3730, |
| "train_speed(iter/s)": 0.676936 |
| }, |
| { |
| "epoch": 2.4206092028515878, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.025215012592123e-05, |
| "loss": 0.03212890625, |
| "memory(GiB)": 43.05, |
| "step": 3735, |
| "train_speed(iter/s)": 0.677255 |
| }, |
| { |
| "epoch": 2.423849643551523, |
| "grad_norm": 12.5, |
| "learning_rate": 9.022034157011028e-05, |
| "loss": 0.0435546875, |
| "memory(GiB)": 43.05, |
| "step": 3740, |
| "train_speed(iter/s)": 0.677439 |
| }, |
| { |
| "epoch": 2.427090084251458, |
| "grad_norm": 2.5, |
| "learning_rate": 9.018848682556812e-05, |
| "loss": 0.036328125, |
| "memory(GiB)": 43.05, |
| "step": 3745, |
| "train_speed(iter/s)": 0.677526 |
| }, |
| { |
| "epoch": 2.4303305249513936, |
| "grad_norm": 11.8125, |
| "learning_rate": 9.015658592887653e-05, |
| "loss": 0.0548828125, |
| "memory(GiB)": 43.05, |
| "step": 3750, |
| "train_speed(iter/s)": 0.677739 |
| }, |
| { |
| "epoch": 2.4335709656513287, |
| "grad_norm": 4.8125, |
| "learning_rate": 9.012463891667023e-05, |
| "loss": 0.024609375, |
| "memory(GiB)": 43.05, |
| "step": 3755, |
| "train_speed(iter/s)": 0.678015 |
| }, |
| { |
| "epoch": 2.436811406351264, |
| "grad_norm": 3.625, |
| "learning_rate": 9.009264582563691e-05, |
| "loss": 0.0564453125, |
| "memory(GiB)": 43.05, |
| "step": 3760, |
| "train_speed(iter/s)": 0.678127 |
| }, |
| { |
| "epoch": 2.440051847051199, |
| "grad_norm": 15.875, |
| "learning_rate": 9.006060669251723e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 3765, |
| "train_speed(iter/s)": 0.677994 |
| }, |
| { |
| "epoch": 2.443292287751134, |
| "grad_norm": 0.83203125, |
| "learning_rate": 9.002852155410466e-05, |
| "loss": 0.0708984375, |
| "memory(GiB)": 43.05, |
| "step": 3770, |
| "train_speed(iter/s)": 0.678195 |
| }, |
| { |
| "epoch": 2.446532728451069, |
| "grad_norm": 11.5625, |
| "learning_rate": 8.999639044724555e-05, |
| "loss": 0.041796875, |
| "memory(GiB)": 43.05, |
| "step": 3775, |
| "train_speed(iter/s)": 0.678062 |
| }, |
| { |
| "epoch": 2.4497731691510047, |
| "grad_norm": 2.65625, |
| "learning_rate": 8.996421340883898e-05, |
| "loss": 0.05234375, |
| "memory(GiB)": 43.05, |
| "step": 3780, |
| "train_speed(iter/s)": 0.678184 |
| }, |
| { |
| "epoch": 2.45301360985094, |
| "grad_norm": 8.6875, |
| "learning_rate": 8.993199047583682e-05, |
| "loss": 0.042578125, |
| "memory(GiB)": 43.05, |
| "step": 3785, |
| "train_speed(iter/s)": 0.678383 |
| }, |
| { |
| "epoch": 2.456254050550875, |
| "grad_norm": 12.1875, |
| "learning_rate": 8.989972168524367e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 3790, |
| "train_speed(iter/s)": 0.678511 |
| }, |
| { |
| "epoch": 2.45949449125081, |
| "grad_norm": 3.59375, |
| "learning_rate": 8.986740707411674e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 3795, |
| "train_speed(iter/s)": 0.678768 |
| }, |
| { |
| "epoch": 2.462734931950745, |
| "grad_norm": 2.578125, |
| "learning_rate": 8.983504667956588e-05, |
| "loss": 0.0583984375, |
| "memory(GiB)": 43.05, |
| "step": 3800, |
| "train_speed(iter/s)": 0.67896 |
| }, |
| { |
| "epoch": 2.4659753726506803, |
| "grad_norm": 10.75, |
| "learning_rate": 8.980264053875353e-05, |
| "loss": 0.0341796875, |
| "memory(GiB)": 43.05, |
| "step": 3805, |
| "train_speed(iter/s)": 0.679142 |
| }, |
| { |
| "epoch": 2.4692158133506155, |
| "grad_norm": 8.9375, |
| "learning_rate": 8.977018868889463e-05, |
| "loss": 0.023046875, |
| "memory(GiB)": 43.05, |
| "step": 3810, |
| "train_speed(iter/s)": 0.679118 |
| }, |
| { |
| "epoch": 2.472456254050551, |
| "grad_norm": 15.375, |
| "learning_rate": 8.973769116725666e-05, |
| "loss": 0.0537109375, |
| "memory(GiB)": 43.05, |
| "step": 3815, |
| "train_speed(iter/s)": 0.679129 |
| }, |
| { |
| "epoch": 2.475696694750486, |
| "grad_norm": 9.0, |
| "learning_rate": 8.97051480111595e-05, |
| "loss": 0.0283203125, |
| "memory(GiB)": 43.05, |
| "step": 3820, |
| "train_speed(iter/s)": 0.679249 |
| }, |
| { |
| "epoch": 2.4789371354504213, |
| "grad_norm": 10.5625, |
| "learning_rate": 8.967255925797549e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 3825, |
| "train_speed(iter/s)": 0.679519 |
| }, |
| { |
| "epoch": 2.4821775761503564, |
| "grad_norm": 2.0, |
| "learning_rate": 8.963992494512928e-05, |
| "loss": 0.0267578125, |
| "memory(GiB)": 43.05, |
| "step": 3830, |
| "train_speed(iter/s)": 0.679637 |
| }, |
| { |
| "epoch": 2.4854180168502915, |
| "grad_norm": 12.25, |
| "learning_rate": 8.960724511009787e-05, |
| "loss": 0.0296875, |
| "memory(GiB)": 43.05, |
| "step": 3835, |
| "train_speed(iter/s)": 0.679944 |
| }, |
| { |
| "epoch": 2.488658457550227, |
| "grad_norm": 3.53125, |
| "learning_rate": 8.957451979041052e-05, |
| "loss": 0.0517578125, |
| "memory(GiB)": 43.05, |
| "step": 3840, |
| "train_speed(iter/s)": 0.679987 |
| }, |
| { |
| "epoch": 2.491898898250162, |
| "grad_norm": 1.9296875, |
| "learning_rate": 8.954174902364876e-05, |
| "loss": 0.0484375, |
| "memory(GiB)": 43.05, |
| "step": 3845, |
| "train_speed(iter/s)": 0.679953 |
| }, |
| { |
| "epoch": 2.4951393389500973, |
| "grad_norm": 0.90625, |
| "learning_rate": 8.950893284744629e-05, |
| "loss": 0.0509765625, |
| "memory(GiB)": 43.05, |
| "step": 3850, |
| "train_speed(iter/s)": 0.679988 |
| }, |
| { |
| "epoch": 2.4983797796500324, |
| "grad_norm": 11.8125, |
| "learning_rate": 8.947607129948892e-05, |
| "loss": 0.036328125, |
| "memory(GiB)": 43.05, |
| "step": 3855, |
| "train_speed(iter/s)": 0.680303 |
| }, |
| { |
| "epoch": 2.5016202203499676, |
| "grad_norm": 15.125, |
| "learning_rate": 8.944316441751461e-05, |
| "loss": 0.0427734375, |
| "memory(GiB)": 43.05, |
| "step": 3860, |
| "train_speed(iter/s)": 0.680617 |
| }, |
| { |
| "epoch": 2.5048606610499027, |
| "grad_norm": 13.625, |
| "learning_rate": 8.94102122393134e-05, |
| "loss": 0.0349609375, |
| "memory(GiB)": 43.05, |
| "step": 3865, |
| "train_speed(iter/s)": 0.680536 |
| }, |
| { |
| "epoch": 2.508101101749838, |
| "grad_norm": 11.75, |
| "learning_rate": 8.937721480272729e-05, |
| "loss": 0.043359375, |
| "memory(GiB)": 43.05, |
| "step": 3870, |
| "train_speed(iter/s)": 0.680779 |
| }, |
| { |
| "epoch": 2.511341542449773, |
| "grad_norm": 10.3125, |
| "learning_rate": 8.934417214565029e-05, |
| "loss": 0.06328125, |
| "memory(GiB)": 43.05, |
| "step": 3875, |
| "train_speed(iter/s)": 0.680895 |
| }, |
| { |
| "epoch": 2.5145819831497085, |
| "grad_norm": 15.25, |
| "learning_rate": 8.931108430602834e-05, |
| "loss": 0.05546875, |
| "memory(GiB)": 43.05, |
| "step": 3880, |
| "train_speed(iter/s)": 0.680929 |
| }, |
| { |
| "epoch": 2.5178224238496436, |
| "grad_norm": 4.9375, |
| "learning_rate": 8.927795132185925e-05, |
| "loss": 0.038671875, |
| "memory(GiB)": 43.05, |
| "step": 3885, |
| "train_speed(iter/s)": 0.680959 |
| }, |
| { |
| "epoch": 2.5210628645495787, |
| "grad_norm": 11.9375, |
| "learning_rate": 8.924477323119269e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 3890, |
| "train_speed(iter/s)": 0.681138 |
| }, |
| { |
| "epoch": 2.524303305249514, |
| "grad_norm": 13.4375, |
| "learning_rate": 8.921155007213012e-05, |
| "loss": 0.0451171875, |
| "memory(GiB)": 43.05, |
| "step": 3895, |
| "train_speed(iter/s)": 0.681258 |
| }, |
| { |
| "epoch": 2.527543745949449, |
| "grad_norm": 11.4375, |
| "learning_rate": 8.917828188282476e-05, |
| "loss": 0.05234375, |
| "memory(GiB)": 43.05, |
| "step": 3900, |
| "train_speed(iter/s)": 0.681412 |
| }, |
| { |
| "epoch": 2.5307841866493845, |
| "grad_norm": 11.875, |
| "learning_rate": 8.914496870148156e-05, |
| "loss": 0.0517578125, |
| "memory(GiB)": 43.05, |
| "step": 3905, |
| "train_speed(iter/s)": 0.681379 |
| }, |
| { |
| "epoch": 2.5340246273493197, |
| "grad_norm": 12.875, |
| "learning_rate": 8.911161056635711e-05, |
| "loss": 0.048828125, |
| "memory(GiB)": 43.05, |
| "step": 3910, |
| "train_speed(iter/s)": 0.681674 |
| }, |
| { |
| "epoch": 2.537265068049255, |
| "grad_norm": 11.0, |
| "learning_rate": 8.907820751575961e-05, |
| "loss": 0.054296875, |
| "memory(GiB)": 43.05, |
| "step": 3915, |
| "train_speed(iter/s)": 0.681966 |
| }, |
| { |
| "epoch": 2.54050550874919, |
| "grad_norm": 7.0, |
| "learning_rate": 8.90447595880489e-05, |
| "loss": 0.0283203125, |
| "memory(GiB)": 43.05, |
| "step": 3920, |
| "train_speed(iter/s)": 0.682085 |
| }, |
| { |
| "epoch": 2.543745949449125, |
| "grad_norm": 12.9375, |
| "learning_rate": 8.901126682163632e-05, |
| "loss": 0.0365234375, |
| "memory(GiB)": 43.05, |
| "step": 3925, |
| "train_speed(iter/s)": 0.682319 |
| }, |
| { |
| "epoch": 2.54698639014906, |
| "grad_norm": 11.9375, |
| "learning_rate": 8.897772925498471e-05, |
| "loss": 0.050390625, |
| "memory(GiB)": 43.05, |
| "step": 3930, |
| "train_speed(iter/s)": 0.682379 |
| }, |
| { |
| "epoch": 2.5502268308489953, |
| "grad_norm": 1.1328125, |
| "learning_rate": 8.894414692660833e-05, |
| "loss": 0.0220703125, |
| "memory(GiB)": 43.05, |
| "step": 3935, |
| "train_speed(iter/s)": 0.682567 |
| }, |
| { |
| "epoch": 2.5534672715489304, |
| "grad_norm": 3.828125, |
| "learning_rate": 8.891051987507288e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 3940, |
| "train_speed(iter/s)": 0.68293 |
| }, |
| { |
| "epoch": 2.556707712248866, |
| "grad_norm": 1.09375, |
| "learning_rate": 8.887684813899542e-05, |
| "loss": 0.0447265625, |
| "memory(GiB)": 43.05, |
| "step": 3945, |
| "train_speed(iter/s)": 0.682891 |
| }, |
| { |
| "epoch": 2.559948152948801, |
| "grad_norm": 9.375, |
| "learning_rate": 8.884313175704428e-05, |
| "loss": 0.0466796875, |
| "memory(GiB)": 43.05, |
| "step": 3950, |
| "train_speed(iter/s)": 0.683066 |
| }, |
| { |
| "epoch": 2.563188593648736, |
| "grad_norm": 5.28125, |
| "learning_rate": 8.880937076793913e-05, |
| "loss": 0.0275390625, |
| "memory(GiB)": 43.05, |
| "step": 3955, |
| "train_speed(iter/s)": 0.683213 |
| }, |
| { |
| "epoch": 2.5664290343486713, |
| "grad_norm": 4.84375, |
| "learning_rate": 8.877556521045083e-05, |
| "loss": 0.0189453125, |
| "memory(GiB)": 43.05, |
| "step": 3960, |
| "train_speed(iter/s)": 0.68347 |
| }, |
| { |
| "epoch": 2.569669475048607, |
| "grad_norm": 4.9375, |
| "learning_rate": 8.87417151234014e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 3965, |
| "train_speed(iter/s)": 0.68316 |
| }, |
| { |
| "epoch": 2.572909915748542, |
| "grad_norm": 12.1875, |
| "learning_rate": 8.8707820545664e-05, |
| "loss": 0.0419921875, |
| "memory(GiB)": 43.05, |
| "step": 3970, |
| "train_speed(iter/s)": 0.683271 |
| }, |
| { |
| "epoch": 2.576150356448477, |
| "grad_norm": 14.9375, |
| "learning_rate": 8.867388151616296e-05, |
| "loss": 0.041796875, |
| "memory(GiB)": 43.05, |
| "step": 3975, |
| "train_speed(iter/s)": 0.683173 |
| }, |
| { |
| "epoch": 2.5793907971484122, |
| "grad_norm": 13.875, |
| "learning_rate": 8.863989807387356e-05, |
| "loss": 0.035546875, |
| "memory(GiB)": 43.05, |
| "step": 3980, |
| "train_speed(iter/s)": 0.683093 |
| }, |
| { |
| "epoch": 2.5826312378483474, |
| "grad_norm": 15.375, |
| "learning_rate": 8.860587025782214e-05, |
| "loss": 0.0546875, |
| "memory(GiB)": 43.05, |
| "step": 3985, |
| "train_speed(iter/s)": 0.683032 |
| }, |
| { |
| "epoch": 2.5858716785482825, |
| "grad_norm": 11.6875, |
| "learning_rate": 8.857179810708598e-05, |
| "loss": 0.0220703125, |
| "memory(GiB)": 43.05, |
| "step": 3990, |
| "train_speed(iter/s)": 0.683199 |
| }, |
| { |
| "epoch": 2.5891121192482176, |
| "grad_norm": 2.421875, |
| "learning_rate": 8.853768166079328e-05, |
| "loss": 0.0435546875, |
| "memory(GiB)": 43.05, |
| "step": 3995, |
| "train_speed(iter/s)": 0.682988 |
| }, |
| { |
| "epoch": 2.5923525599481527, |
| "grad_norm": 0.6171875, |
| "learning_rate": 8.850352095812309e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 4000, |
| "train_speed(iter/s)": 0.683171 |
| }, |
| { |
| "epoch": 2.5955930006480883, |
| "grad_norm": 12.125, |
| "learning_rate": 8.84693160383053e-05, |
| "loss": 0.03359375, |
| "memory(GiB)": 43.05, |
| "step": 4005, |
| "train_speed(iter/s)": 0.613524 |
| }, |
| { |
| "epoch": 2.5988334413480234, |
| "grad_norm": 1.1171875, |
| "learning_rate": 8.84350669406206e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 4010, |
| "train_speed(iter/s)": 0.613712 |
| }, |
| { |
| "epoch": 2.6020738820479585, |
| "grad_norm": 12.8125, |
| "learning_rate": 8.840077370440039e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 4015, |
| "train_speed(iter/s)": 0.613548 |
| }, |
| { |
| "epoch": 2.6053143227478937, |
| "grad_norm": 12.0, |
| "learning_rate": 8.83664363690267e-05, |
| "loss": 0.041796875, |
| "memory(GiB)": 43.05, |
| "step": 4020, |
| "train_speed(iter/s)": 0.61374 |
| }, |
| { |
| "epoch": 2.6085547634478288, |
| "grad_norm": 0.8984375, |
| "learning_rate": 8.833205497393234e-05, |
| "loss": 0.0263671875, |
| "memory(GiB)": 43.05, |
| "step": 4025, |
| "train_speed(iter/s)": 0.614105 |
| }, |
| { |
| "epoch": 2.6117952041477643, |
| "grad_norm": 16.75, |
| "learning_rate": 8.82976295586006e-05, |
| "loss": 0.0185546875, |
| "memory(GiB)": 43.05, |
| "step": 4030, |
| "train_speed(iter/s)": 0.61416 |
| }, |
| { |
| "epoch": 2.6150356448476995, |
| "grad_norm": 16.0, |
| "learning_rate": 8.826316016256536e-05, |
| "loss": 0.025, |
| "memory(GiB)": 43.05, |
| "step": 4035, |
| "train_speed(iter/s)": 0.614079 |
| }, |
| { |
| "epoch": 2.6182760855476346, |
| "grad_norm": 12.1875, |
| "learning_rate": 8.822864682541103e-05, |
| "loss": 0.0345703125, |
| "memory(GiB)": 43.05, |
| "step": 4040, |
| "train_speed(iter/s)": 0.614213 |
| }, |
| { |
| "epoch": 2.6215165262475697, |
| "grad_norm": 1.734375, |
| "learning_rate": 8.819408958677244e-05, |
| "loss": 0.04765625, |
| "memory(GiB)": 43.05, |
| "step": 4045, |
| "train_speed(iter/s)": 0.614431 |
| }, |
| { |
| "epoch": 2.624756966947505, |
| "grad_norm": 8.25, |
| "learning_rate": 8.815948848633487e-05, |
| "loss": 0.0603515625, |
| "memory(GiB)": 43.05, |
| "step": 4050, |
| "train_speed(iter/s)": 0.61471 |
| }, |
| { |
| "epoch": 2.62799740764744, |
| "grad_norm": 2.796875, |
| "learning_rate": 8.812484356383396e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 4055, |
| "train_speed(iter/s)": 0.614926 |
| }, |
| { |
| "epoch": 2.631237848347375, |
| "grad_norm": 2.640625, |
| "learning_rate": 8.809015485905565e-05, |
| "loss": 0.0337890625, |
| "memory(GiB)": 43.05, |
| "step": 4060, |
| "train_speed(iter/s)": 0.615193 |
| }, |
| { |
| "epoch": 2.63447828904731, |
| "grad_norm": 1.1875, |
| "learning_rate": 8.805542241183622e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 4065, |
| "train_speed(iter/s)": 0.615423 |
| }, |
| { |
| "epoch": 2.6377187297472457, |
| "grad_norm": 12.8125, |
| "learning_rate": 8.80206462620621e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 4070, |
| "train_speed(iter/s)": 0.61546 |
| }, |
| { |
| "epoch": 2.640959170447181, |
| "grad_norm": 3.265625, |
| "learning_rate": 8.798582644967e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 4075, |
| "train_speed(iter/s)": 0.615646 |
| }, |
| { |
| "epoch": 2.644199611147116, |
| "grad_norm": 10.625, |
| "learning_rate": 8.795096301464669e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 4080, |
| "train_speed(iter/s)": 0.615999 |
| }, |
| { |
| "epoch": 2.647440051847051, |
| "grad_norm": 14.9375, |
| "learning_rate": 8.79160559970291e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 4085, |
| "train_speed(iter/s)": 0.616085 |
| }, |
| { |
| "epoch": 2.6506804925469862, |
| "grad_norm": 0.66015625, |
| "learning_rate": 8.788110543690416e-05, |
| "loss": 0.029296875, |
| "memory(GiB)": 43.05, |
| "step": 4090, |
| "train_speed(iter/s)": 0.616129 |
| }, |
| { |
| "epoch": 2.653920933246922, |
| "grad_norm": 15.1875, |
| "learning_rate": 8.784611137440881e-05, |
| "loss": 0.033203125, |
| "memory(GiB)": 43.05, |
| "step": 4095, |
| "train_speed(iter/s)": 0.616351 |
| }, |
| { |
| "epoch": 2.657161373946857, |
| "grad_norm": 0.91015625, |
| "learning_rate": 8.781107384972999e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 4100, |
| "train_speed(iter/s)": 0.616673 |
| }, |
| { |
| "epoch": 2.660401814646792, |
| "grad_norm": 10.625, |
| "learning_rate": 8.777599290310454e-05, |
| "loss": 0.0484375, |
| "memory(GiB)": 43.05, |
| "step": 4105, |
| "train_speed(iter/s)": 0.616658 |
| }, |
| { |
| "epoch": 2.663642255346727, |
| "grad_norm": 14.9375, |
| "learning_rate": 8.77408685748191e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 4110, |
| "train_speed(iter/s)": 0.616972 |
| }, |
| { |
| "epoch": 2.6668826960466623, |
| "grad_norm": 0.96484375, |
| "learning_rate": 8.77057009052102e-05, |
| "loss": 0.0513671875, |
| "memory(GiB)": 43.05, |
| "step": 4115, |
| "train_speed(iter/s)": 0.616998 |
| }, |
| { |
| "epoch": 2.6701231367465974, |
| "grad_norm": 1.203125, |
| "learning_rate": 8.767048993466413e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 4120, |
| "train_speed(iter/s)": 0.617305 |
| }, |
| { |
| "epoch": 2.6733635774465325, |
| "grad_norm": 10.75, |
| "learning_rate": 8.763523570361691e-05, |
| "loss": 0.016796875, |
| "memory(GiB)": 43.05, |
| "step": 4125, |
| "train_speed(iter/s)": 0.617432 |
| }, |
| { |
| "epoch": 2.6766040181464676, |
| "grad_norm": 16.25, |
| "learning_rate": 8.75999382525542e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 4130, |
| "train_speed(iter/s)": 0.61743 |
| }, |
| { |
| "epoch": 2.679844458846403, |
| "grad_norm": 13.3125, |
| "learning_rate": 8.756459762201133e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 4135, |
| "train_speed(iter/s)": 0.617385 |
| }, |
| { |
| "epoch": 2.6830848995463383, |
| "grad_norm": 14.1875, |
| "learning_rate": 8.752921385257322e-05, |
| "loss": 0.0578125, |
| "memory(GiB)": 43.05, |
| "step": 4140, |
| "train_speed(iter/s)": 0.617612 |
| }, |
| { |
| "epoch": 2.6863253402462735, |
| "grad_norm": 14.75, |
| "learning_rate": 8.749378698487429e-05, |
| "loss": 0.0392578125, |
| "memory(GiB)": 43.05, |
| "step": 4145, |
| "train_speed(iter/s)": 0.617707 |
| }, |
| { |
| "epoch": 2.6895657809462086, |
| "grad_norm": 0.984375, |
| "learning_rate": 8.745831705959852e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 4150, |
| "train_speed(iter/s)": 0.61787 |
| }, |
| { |
| "epoch": 2.692806221646144, |
| "grad_norm": 2.859375, |
| "learning_rate": 8.74228041174793e-05, |
| "loss": 0.0361328125, |
| "memory(GiB)": 43.05, |
| "step": 4155, |
| "train_speed(iter/s)": 0.618179 |
| }, |
| { |
| "epoch": 2.6960466623460793, |
| "grad_norm": 3.34375, |
| "learning_rate": 8.738724819929938e-05, |
| "loss": 0.0419921875, |
| "memory(GiB)": 43.05, |
| "step": 4160, |
| "train_speed(iter/s)": 0.618286 |
| }, |
| { |
| "epoch": 2.6992871030460144, |
| "grad_norm": 11.0625, |
| "learning_rate": 8.735164934589092e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 4165, |
| "train_speed(iter/s)": 0.618624 |
| }, |
| { |
| "epoch": 2.7025275437459495, |
| "grad_norm": 3.375, |
| "learning_rate": 8.731600759813538e-05, |
| "loss": 0.0177734375, |
| "memory(GiB)": 43.05, |
| "step": 4170, |
| "train_speed(iter/s)": 0.618786 |
| }, |
| { |
| "epoch": 2.7057679844458846, |
| "grad_norm": 4.28125, |
| "learning_rate": 8.728032299696348e-05, |
| "loss": 0.042578125, |
| "memory(GiB)": 43.05, |
| "step": 4175, |
| "train_speed(iter/s)": 0.618852 |
| }, |
| { |
| "epoch": 2.7090084251458197, |
| "grad_norm": 17.375, |
| "learning_rate": 8.724459558335512e-05, |
| "loss": 0.0630859375, |
| "memory(GiB)": 43.05, |
| "step": 4180, |
| "train_speed(iter/s)": 0.618825 |
| }, |
| { |
| "epoch": 2.712248865845755, |
| "grad_norm": 8.9375, |
| "learning_rate": 8.72088253983394e-05, |
| "loss": 0.0353515625, |
| "memory(GiB)": 43.05, |
| "step": 4185, |
| "train_speed(iter/s)": 0.618844 |
| }, |
| { |
| "epoch": 2.71548930654569, |
| "grad_norm": 12.25, |
| "learning_rate": 8.71730124829945e-05, |
| "loss": 0.0630859375, |
| "memory(GiB)": 43.05, |
| "step": 4190, |
| "train_speed(iter/s)": 0.618973 |
| }, |
| { |
| "epoch": 2.7187297472456255, |
| "grad_norm": 15.5625, |
| "learning_rate": 8.713715687844772e-05, |
| "loss": 0.0265625, |
| "memory(GiB)": 43.05, |
| "step": 4195, |
| "train_speed(iter/s)": 0.61914 |
| }, |
| { |
| "epoch": 2.7219701879455607, |
| "grad_norm": 1.5, |
| "learning_rate": 8.710125862587537e-05, |
| "loss": 0.034765625, |
| "memory(GiB)": 43.05, |
| "step": 4200, |
| "train_speed(iter/s)": 0.619254 |
| }, |
| { |
| "epoch": 2.725210628645496, |
| "grad_norm": 9.0, |
| "learning_rate": 8.706531776650271e-05, |
| "loss": 0.0296875, |
| "memory(GiB)": 43.05, |
| "step": 4205, |
| "train_speed(iter/s)": 0.619512 |
| }, |
| { |
| "epoch": 2.728451069345431, |
| "grad_norm": 4.90625, |
| "learning_rate": 8.702933434160395e-05, |
| "loss": 0.0244140625, |
| "memory(GiB)": 43.05, |
| "step": 4210, |
| "train_speed(iter/s)": 0.619649 |
| }, |
| { |
| "epoch": 2.731691510045366, |
| "grad_norm": 13.875, |
| "learning_rate": 8.699330839250217e-05, |
| "loss": 0.035546875, |
| "memory(GiB)": 43.05, |
| "step": 4215, |
| "train_speed(iter/s)": 0.619626 |
| }, |
| { |
| "epoch": 2.7349319507453016, |
| "grad_norm": 8.1875, |
| "learning_rate": 8.69572399605693e-05, |
| "loss": 0.048828125, |
| "memory(GiB)": 43.05, |
| "step": 4220, |
| "train_speed(iter/s)": 0.619676 |
| }, |
| { |
| "epoch": 2.7381723914452367, |
| "grad_norm": 12.1875, |
| "learning_rate": 8.692112908722607e-05, |
| "loss": 0.019921875, |
| "memory(GiB)": 43.05, |
| "step": 4225, |
| "train_speed(iter/s)": 0.61986 |
| }, |
| { |
| "epoch": 2.741412832145172, |
| "grad_norm": 3.15625, |
| "learning_rate": 8.68849758139419e-05, |
| "loss": 0.0443359375, |
| "memory(GiB)": 43.05, |
| "step": 4230, |
| "train_speed(iter/s)": 0.619941 |
| }, |
| { |
| "epoch": 2.744653272845107, |
| "grad_norm": 9.875, |
| "learning_rate": 8.684878018223497e-05, |
| "loss": 0.0369140625, |
| "memory(GiB)": 43.05, |
| "step": 4235, |
| "train_speed(iter/s)": 0.620209 |
| }, |
| { |
| "epoch": 2.747893713545042, |
| "grad_norm": 1.640625, |
| "learning_rate": 8.6812542233672e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 4240, |
| "train_speed(iter/s)": 0.620321 |
| }, |
| { |
| "epoch": 2.751134154244977, |
| "grad_norm": 2.140625, |
| "learning_rate": 8.677626200986844e-05, |
| "loss": 0.0478515625, |
| "memory(GiB)": 43.05, |
| "step": 4245, |
| "train_speed(iter/s)": 0.620371 |
| }, |
| { |
| "epoch": 2.7543745949449123, |
| "grad_norm": 1.2578125, |
| "learning_rate": 8.673993955248818e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 4250, |
| "train_speed(iter/s)": 0.620395 |
| }, |
| { |
| "epoch": 2.7576150356448474, |
| "grad_norm": 2.078125, |
| "learning_rate": 8.670357490324365e-05, |
| "loss": 0.0212890625, |
| "memory(GiB)": 43.05, |
| "step": 4255, |
| "train_speed(iter/s)": 0.620735 |
| }, |
| { |
| "epoch": 2.760855476344783, |
| "grad_norm": 3.859375, |
| "learning_rate": 8.666716810389577e-05, |
| "loss": 0.0431640625, |
| "memory(GiB)": 43.05, |
| "step": 4260, |
| "train_speed(iter/s)": 0.620936 |
| }, |
| { |
| "epoch": 2.764095917044718, |
| "grad_norm": 15.25, |
| "learning_rate": 8.663071919625378e-05, |
| "loss": 0.02578125, |
| "memory(GiB)": 43.05, |
| "step": 4265, |
| "train_speed(iter/s)": 0.621061 |
| }, |
| { |
| "epoch": 2.7673363577446533, |
| "grad_norm": 9.8125, |
| "learning_rate": 8.659422822217536e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 4270, |
| "train_speed(iter/s)": 0.621354 |
| }, |
| { |
| "epoch": 2.7705767984445884, |
| "grad_norm": 5.15625, |
| "learning_rate": 8.655769522356646e-05, |
| "loss": 0.0458984375, |
| "memory(GiB)": 43.05, |
| "step": 4275, |
| "train_speed(iter/s)": 0.621649 |
| }, |
| { |
| "epoch": 2.7738172391445235, |
| "grad_norm": 1.015625, |
| "learning_rate": 8.652112024238129e-05, |
| "loss": 0.0392578125, |
| "memory(GiB)": 43.05, |
| "step": 4280, |
| "train_speed(iter/s)": 0.621851 |
| }, |
| { |
| "epoch": 2.777057679844459, |
| "grad_norm": 14.75, |
| "learning_rate": 8.648450332062226e-05, |
| "loss": 0.023828125, |
| "memory(GiB)": 43.05, |
| "step": 4285, |
| "train_speed(iter/s)": 0.621832 |
| }, |
| { |
| "epoch": 2.780298120544394, |
| "grad_norm": 5.25, |
| "learning_rate": 8.644784450033999e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 4290, |
| "train_speed(iter/s)": 0.621946 |
| }, |
| { |
| "epoch": 2.7835385612443293, |
| "grad_norm": 9.25, |
| "learning_rate": 8.641114382363318e-05, |
| "loss": 0.024609375, |
| "memory(GiB)": 43.05, |
| "step": 4295, |
| "train_speed(iter/s)": 0.62205 |
| }, |
| { |
| "epoch": 2.7867790019442644, |
| "grad_norm": 0.66796875, |
| "learning_rate": 8.637440133264858e-05, |
| "loss": 0.04287109375, |
| "memory(GiB)": 43.05, |
| "step": 4300, |
| "train_speed(iter/s)": 0.6219 |
| }, |
| { |
| "epoch": 2.7900194426441995, |
| "grad_norm": 2.03125, |
| "learning_rate": 8.633761706958102e-05, |
| "loss": 0.04609375, |
| "memory(GiB)": 43.05, |
| "step": 4305, |
| "train_speed(iter/s)": 0.622058 |
| }, |
| { |
| "epoch": 2.7932598833441347, |
| "grad_norm": 3.25, |
| "learning_rate": 8.630079107667324e-05, |
| "loss": 0.0390625, |
| "memory(GiB)": 43.05, |
| "step": 4310, |
| "train_speed(iter/s)": 0.622265 |
| }, |
| { |
| "epoch": 2.79650032404407, |
| "grad_norm": 9.9375, |
| "learning_rate": 8.626392339621595e-05, |
| "loss": 0.0154296875, |
| "memory(GiB)": 43.05, |
| "step": 4315, |
| "train_speed(iter/s)": 0.622562 |
| }, |
| { |
| "epoch": 2.7997407647440054, |
| "grad_norm": 1.4375, |
| "learning_rate": 8.622701407054769e-05, |
| "loss": 0.021875, |
| "memory(GiB)": 43.05, |
| "step": 4320, |
| "train_speed(iter/s)": 0.622627 |
| }, |
| { |
| "epoch": 2.8029812054439405, |
| "grad_norm": 14.5625, |
| "learning_rate": 8.619006314205484e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 4325, |
| "train_speed(iter/s)": 0.622871 |
| }, |
| { |
| "epoch": 2.8062216461438756, |
| "grad_norm": 0.423828125, |
| "learning_rate": 8.61530706531716e-05, |
| "loss": 0.0513671875, |
| "memory(GiB)": 43.05, |
| "step": 4330, |
| "train_speed(iter/s)": 0.62312 |
| }, |
| { |
| "epoch": 2.8094620868438107, |
| "grad_norm": 11.25, |
| "learning_rate": 8.611603664637983e-05, |
| "loss": 0.05146484375, |
| "memory(GiB)": 43.05, |
| "step": 4335, |
| "train_speed(iter/s)": 0.623218 |
| }, |
| { |
| "epoch": 2.812702527543746, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.607896116420911e-05, |
| "loss": 0.0650390625, |
| "memory(GiB)": 43.05, |
| "step": 4340, |
| "train_speed(iter/s)": 0.623092 |
| }, |
| { |
| "epoch": 2.8159429682436814, |
| "grad_norm": 11.0, |
| "learning_rate": 8.60418442492366e-05, |
| "loss": 0.0603515625, |
| "memory(GiB)": 43.05, |
| "step": 4345, |
| "train_speed(iter/s)": 0.623057 |
| }, |
| { |
| "epoch": 2.8191834089436165, |
| "grad_norm": 2.65625, |
| "learning_rate": 8.600468594408715e-05, |
| "loss": 0.0490234375, |
| "memory(GiB)": 43.05, |
| "step": 4350, |
| "train_speed(iter/s)": 0.623127 |
| }, |
| { |
| "epoch": 2.8224238496435516, |
| "grad_norm": 11.375, |
| "learning_rate": 8.596748629143302e-05, |
| "loss": 0.05078125, |
| "memory(GiB)": 43.05, |
| "step": 4355, |
| "train_speed(iter/s)": 0.623246 |
| }, |
| { |
| "epoch": 2.8256642903434868, |
| "grad_norm": 2.859375, |
| "learning_rate": 8.593024533399403e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 4360, |
| "train_speed(iter/s)": 0.623443 |
| }, |
| { |
| "epoch": 2.828904731043422, |
| "grad_norm": 15.0625, |
| "learning_rate": 8.589296311453738e-05, |
| "loss": 0.0439453125, |
| "memory(GiB)": 43.05, |
| "step": 4365, |
| "train_speed(iter/s)": 0.623511 |
| }, |
| { |
| "epoch": 2.832145171743357, |
| "grad_norm": 2.453125, |
| "learning_rate": 8.585563967587773e-05, |
| "loss": 0.0328125, |
| "memory(GiB)": 43.05, |
| "step": 4370, |
| "train_speed(iter/s)": 0.623641 |
| }, |
| { |
| "epoch": 2.835385612443292, |
| "grad_norm": 1.9453125, |
| "learning_rate": 8.581827506087699e-05, |
| "loss": 0.0310546875, |
| "memory(GiB)": 43.05, |
| "step": 4375, |
| "train_speed(iter/s)": 0.623852 |
| }, |
| { |
| "epoch": 2.8386260531432272, |
| "grad_norm": 15.9375, |
| "learning_rate": 8.578086931244443e-05, |
| "loss": 0.0607421875, |
| "memory(GiB)": 43.05, |
| "step": 4380, |
| "train_speed(iter/s)": 0.623774 |
| }, |
| { |
| "epoch": 2.841866493843163, |
| "grad_norm": 1.609375, |
| "learning_rate": 8.574342247353648e-05, |
| "loss": 0.04921875, |
| "memory(GiB)": 43.05, |
| "step": 4385, |
| "train_speed(iter/s)": 0.62397 |
| }, |
| { |
| "epoch": 2.845106934543098, |
| "grad_norm": 6.875, |
| "learning_rate": 8.570593458715683e-05, |
| "loss": 0.0216796875, |
| "memory(GiB)": 43.05, |
| "step": 4390, |
| "train_speed(iter/s)": 0.624128 |
| }, |
| { |
| "epoch": 2.848347375243033, |
| "grad_norm": 10.9375, |
| "learning_rate": 8.566840569635629e-05, |
| "loss": 0.0353515625, |
| "memory(GiB)": 43.05, |
| "step": 4395, |
| "train_speed(iter/s)": 0.624316 |
| }, |
| { |
| "epoch": 2.851587815942968, |
| "grad_norm": 12.3125, |
| "learning_rate": 8.563083584423274e-05, |
| "loss": 0.0423828125, |
| "memory(GiB)": 43.05, |
| "step": 4400, |
| "train_speed(iter/s)": 0.62439 |
| }, |
| { |
| "epoch": 2.8548282566429033, |
| "grad_norm": 3.9375, |
| "learning_rate": 8.55932250739311e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 4405, |
| "train_speed(iter/s)": 0.624502 |
| }, |
| { |
| "epoch": 2.858068697342839, |
| "grad_norm": 0.703125, |
| "learning_rate": 8.555557342864329e-05, |
| "loss": 0.0498046875, |
| "memory(GiB)": 43.05, |
| "step": 4410, |
| "train_speed(iter/s)": 0.6247 |
| }, |
| { |
| "epoch": 2.861309138042774, |
| "grad_norm": 5.78125, |
| "learning_rate": 8.55178809516082e-05, |
| "loss": 0.0416015625, |
| "memory(GiB)": 43.05, |
| "step": 4415, |
| "train_speed(iter/s)": 0.62471 |
| }, |
| { |
| "epoch": 2.864549578742709, |
| "grad_norm": 12.0, |
| "learning_rate": 8.548014768611154e-05, |
| "loss": 0.037890625, |
| "memory(GiB)": 43.05, |
| "step": 4420, |
| "train_speed(iter/s)": 0.624841 |
| }, |
| { |
| "epoch": 2.8677900194426442, |
| "grad_norm": 14.5, |
| "learning_rate": 8.544237367548591e-05, |
| "loss": 0.025, |
| "memory(GiB)": 43.05, |
| "step": 4425, |
| "train_speed(iter/s)": 0.625088 |
| }, |
| { |
| "epoch": 2.8710304601425793, |
| "grad_norm": 0.416015625, |
| "learning_rate": 8.540455896311073e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 4430, |
| "train_speed(iter/s)": 0.625286 |
| }, |
| { |
| "epoch": 2.8742709008425145, |
| "grad_norm": 7.84375, |
| "learning_rate": 8.536670359241208e-05, |
| "loss": 0.0458984375, |
| "memory(GiB)": 43.05, |
| "step": 4435, |
| "train_speed(iter/s)": 0.62535 |
| }, |
| { |
| "epoch": 2.8775113415424496, |
| "grad_norm": 15.3125, |
| "learning_rate": 8.532880760686281e-05, |
| "loss": 0.0205078125, |
| "memory(GiB)": 43.05, |
| "step": 4440, |
| "train_speed(iter/s)": 0.625458 |
| }, |
| { |
| "epoch": 2.8807517822423847, |
| "grad_norm": 2.78125, |
| "learning_rate": 8.529087104998235e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 4445, |
| "train_speed(iter/s)": 0.625444 |
| }, |
| { |
| "epoch": 2.8839922229423203, |
| "grad_norm": 5.375, |
| "learning_rate": 8.525289396533678e-05, |
| "loss": 0.0490234375, |
| "memory(GiB)": 43.05, |
| "step": 4450, |
| "train_speed(iter/s)": 0.625487 |
| }, |
| { |
| "epoch": 2.8872326636422554, |
| "grad_norm": 10.75, |
| "learning_rate": 8.521487639653866e-05, |
| "loss": 0.0416015625, |
| "memory(GiB)": 43.05, |
| "step": 4455, |
| "train_speed(iter/s)": 0.625693 |
| }, |
| { |
| "epoch": 2.8904731043421905, |
| "grad_norm": 4.28125, |
| "learning_rate": 8.517681838724709e-05, |
| "loss": 0.0384765625, |
| "memory(GiB)": 43.05, |
| "step": 4460, |
| "train_speed(iter/s)": 0.625807 |
| }, |
| { |
| "epoch": 2.8937135450421256, |
| "grad_norm": 1.6328125, |
| "learning_rate": 8.513871998116763e-05, |
| "loss": 0.01796875, |
| "memory(GiB)": 43.05, |
| "step": 4465, |
| "train_speed(iter/s)": 0.625957 |
| }, |
| { |
| "epoch": 2.8969539857420608, |
| "grad_norm": 10.3125, |
| "learning_rate": 8.510058122205213e-05, |
| "loss": 0.044921875, |
| "memory(GiB)": 43.05, |
| "step": 4470, |
| "train_speed(iter/s)": 0.626276 |
| }, |
| { |
| "epoch": 2.9001944264419963, |
| "grad_norm": 9.4375, |
| "learning_rate": 8.506240215369888e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 4475, |
| "train_speed(iter/s)": 0.626521 |
| }, |
| { |
| "epoch": 2.9034348671419314, |
| "grad_norm": 6.0625, |
| "learning_rate": 8.502418281995245e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 4480, |
| "train_speed(iter/s)": 0.626578 |
| }, |
| { |
| "epoch": 2.9066753078418666, |
| "grad_norm": 1.359375, |
| "learning_rate": 8.498592326470361e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 4485, |
| "train_speed(iter/s)": 0.626811 |
| }, |
| { |
| "epoch": 2.9099157485418017, |
| "grad_norm": 12.3125, |
| "learning_rate": 8.494762353188931e-05, |
| "loss": 0.04375, |
| "memory(GiB)": 43.05, |
| "step": 4490, |
| "train_speed(iter/s)": 0.627084 |
| }, |
| { |
| "epoch": 2.913156189241737, |
| "grad_norm": 12.6875, |
| "learning_rate": 8.490928366549272e-05, |
| "loss": 0.0671875, |
| "memory(GiB)": 43.05, |
| "step": 4495, |
| "train_speed(iter/s)": 0.626934 |
| }, |
| { |
| "epoch": 2.916396629941672, |
| "grad_norm": 10.9375, |
| "learning_rate": 8.487090370954301e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 4500, |
| "train_speed(iter/s)": 0.627216 |
| }, |
| { |
| "epoch": 2.919637070641607, |
| "grad_norm": 9.625, |
| "learning_rate": 8.483248370811545e-05, |
| "loss": 0.043359375, |
| "memory(GiB)": 43.05, |
| "step": 4505, |
| "train_speed(iter/s)": 0.627221 |
| }, |
| { |
| "epoch": 2.9228775113415426, |
| "grad_norm": 8.6875, |
| "learning_rate": 8.479402370533127e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 4510, |
| "train_speed(iter/s)": 0.627494 |
| }, |
| { |
| "epoch": 2.9261179520414777, |
| "grad_norm": 1.84375, |
| "learning_rate": 8.475552374535763e-05, |
| "loss": 0.0306640625, |
| "memory(GiB)": 43.05, |
| "step": 4515, |
| "train_speed(iter/s)": 0.627641 |
| }, |
| { |
| "epoch": 2.929358392741413, |
| "grad_norm": 12.375, |
| "learning_rate": 8.47169838724076e-05, |
| "loss": 0.0369140625, |
| "memory(GiB)": 43.05, |
| "step": 4520, |
| "train_speed(iter/s)": 0.627674 |
| }, |
| { |
| "epoch": 2.932598833441348, |
| "grad_norm": 2.359375, |
| "learning_rate": 8.467840413074007e-05, |
| "loss": 0.048046875, |
| "memory(GiB)": 43.05, |
| "step": 4525, |
| "train_speed(iter/s)": 0.627871 |
| }, |
| { |
| "epoch": 2.935839274141283, |
| "grad_norm": 13.1875, |
| "learning_rate": 8.463978456465971e-05, |
| "loss": 0.0341796875, |
| "memory(GiB)": 43.05, |
| "step": 4530, |
| "train_speed(iter/s)": 0.627937 |
| }, |
| { |
| "epoch": 2.9390797148412187, |
| "grad_norm": 0.8359375, |
| "learning_rate": 8.460112521851695e-05, |
| "loss": 0.0626953125, |
| "memory(GiB)": 43.05, |
| "step": 4535, |
| "train_speed(iter/s)": 0.628024 |
| }, |
| { |
| "epoch": 2.942320155541154, |
| "grad_norm": 10.1875, |
| "learning_rate": 8.456242613670788e-05, |
| "loss": 0.0517578125, |
| "memory(GiB)": 43.05, |
| "step": 4540, |
| "train_speed(iter/s)": 0.628172 |
| }, |
| { |
| "epoch": 2.945560596241089, |
| "grad_norm": 11.0, |
| "learning_rate": 8.452368736367422e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 4545, |
| "train_speed(iter/s)": 0.628269 |
| }, |
| { |
| "epoch": 2.948801036941024, |
| "grad_norm": 1.828125, |
| "learning_rate": 8.448490894390328e-05, |
| "loss": 0.0439453125, |
| "memory(GiB)": 43.05, |
| "step": 4550, |
| "train_speed(iter/s)": 0.628152 |
| }, |
| { |
| "epoch": 2.952041477640959, |
| "grad_norm": 13.5, |
| "learning_rate": 8.44460909219279e-05, |
| "loss": 0.03203125, |
| "memory(GiB)": 43.05, |
| "step": 4555, |
| "train_speed(iter/s)": 0.628304 |
| }, |
| { |
| "epoch": 2.9552819183408943, |
| "grad_norm": 18.375, |
| "learning_rate": 8.440723334232641e-05, |
| "loss": 0.0271484375, |
| "memory(GiB)": 43.05, |
| "step": 4560, |
| "train_speed(iter/s)": 0.628485 |
| }, |
| { |
| "epoch": 2.9585223590408294, |
| "grad_norm": 10.5, |
| "learning_rate": 8.436833624972255e-05, |
| "loss": 0.038671875, |
| "memory(GiB)": 43.05, |
| "step": 4565, |
| "train_speed(iter/s)": 0.628602 |
| }, |
| { |
| "epoch": 2.9617627997407645, |
| "grad_norm": 2.109375, |
| "learning_rate": 8.432939968878546e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 4570, |
| "train_speed(iter/s)": 0.628877 |
| }, |
| { |
| "epoch": 2.9650032404407, |
| "grad_norm": 1.3671875, |
| "learning_rate": 8.429042370422953e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 4575, |
| "train_speed(iter/s)": 0.628863 |
| }, |
| { |
| "epoch": 2.968243681140635, |
| "grad_norm": 10.3125, |
| "learning_rate": 8.425140834081455e-05, |
| "loss": 0.0587890625, |
| "memory(GiB)": 43.05, |
| "step": 4580, |
| "train_speed(iter/s)": 0.62893 |
| }, |
| { |
| "epoch": 2.9714841218405703, |
| "grad_norm": 13.0625, |
| "learning_rate": 8.421235364334541e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 4585, |
| "train_speed(iter/s)": 0.629116 |
| }, |
| { |
| "epoch": 2.9747245625405054, |
| "grad_norm": 16.125, |
| "learning_rate": 8.417325965667226e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 4590, |
| "train_speed(iter/s)": 0.629361 |
| }, |
| { |
| "epoch": 2.9779650032404406, |
| "grad_norm": 13.6875, |
| "learning_rate": 8.413412642569032e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 4595, |
| "train_speed(iter/s)": 0.629532 |
| }, |
| { |
| "epoch": 2.981205443940376, |
| "grad_norm": 3.21875, |
| "learning_rate": 8.409495399533989e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 4600, |
| "train_speed(iter/s)": 0.62976 |
| }, |
| { |
| "epoch": 2.9844458846403112, |
| "grad_norm": 1.4296875, |
| "learning_rate": 8.405574241060628e-05, |
| "loss": 0.054296875, |
| "memory(GiB)": 43.05, |
| "step": 4605, |
| "train_speed(iter/s)": 0.629953 |
| }, |
| { |
| "epoch": 2.9876863253402464, |
| "grad_norm": 15.5, |
| "learning_rate": 8.40164917165198e-05, |
| "loss": 0.05546875, |
| "memory(GiB)": 43.05, |
| "step": 4610, |
| "train_speed(iter/s)": 0.629931 |
| }, |
| { |
| "epoch": 2.9909267660401815, |
| "grad_norm": 0.0, |
| "learning_rate": 8.397720195815562e-05, |
| "loss": 0.022265625, |
| "memory(GiB)": 43.05, |
| "step": 4615, |
| "train_speed(iter/s)": 0.630124 |
| }, |
| { |
| "epoch": 2.9941672067401166, |
| "grad_norm": 0.7265625, |
| "learning_rate": 8.39378731806338e-05, |
| "loss": 0.04296875, |
| "memory(GiB)": 43.05, |
| "step": 4620, |
| "train_speed(iter/s)": 0.630453 |
| }, |
| { |
| "epoch": 2.9974076474400517, |
| "grad_norm": 9.25, |
| "learning_rate": 8.389850542911921e-05, |
| "loss": 0.0203125, |
| "memory(GiB)": 43.05, |
| "step": 4625, |
| "train_speed(iter/s)": 0.630426 |
| }, |
| { |
| "epoch": 3.000648088139987, |
| "grad_norm": 0.66015625, |
| "learning_rate": 8.38590987488215e-05, |
| "loss": 0.041796875, |
| "memory(GiB)": 43.05, |
| "step": 4630, |
| "train_speed(iter/s)": 0.630339 |
| }, |
| { |
| "epoch": 3.0038885288399224, |
| "grad_norm": 12.75, |
| "learning_rate": 8.381965318499493e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 4635, |
| "train_speed(iter/s)": 0.630271 |
| }, |
| { |
| "epoch": 3.0071289695398575, |
| "grad_norm": 12.0, |
| "learning_rate": 8.378016878293855e-05, |
| "loss": 0.0349609375, |
| "memory(GiB)": 43.05, |
| "step": 4640, |
| "train_speed(iter/s)": 0.630251 |
| }, |
| { |
| "epoch": 3.0103694102397927, |
| "grad_norm": 3.734375, |
| "learning_rate": 8.374064558799593e-05, |
| "loss": 0.0376953125, |
| "memory(GiB)": 43.05, |
| "step": 4645, |
| "train_speed(iter/s)": 0.630261 |
| }, |
| { |
| "epoch": 3.0136098509397278, |
| "grad_norm": 17.375, |
| "learning_rate": 8.370108364555518e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 4650, |
| "train_speed(iter/s)": 0.630327 |
| }, |
| { |
| "epoch": 3.016850291639663, |
| "grad_norm": 0.92578125, |
| "learning_rate": 8.366148300104894e-05, |
| "loss": 0.0171875, |
| "memory(GiB)": 43.05, |
| "step": 4655, |
| "train_speed(iter/s)": 0.630595 |
| }, |
| { |
| "epoch": 3.020090732339598, |
| "grad_norm": 10.6875, |
| "learning_rate": 8.362184369995429e-05, |
| "loss": 0.0474609375, |
| "memory(GiB)": 43.05, |
| "step": 4660, |
| "train_speed(iter/s)": 0.630768 |
| }, |
| { |
| "epoch": 3.0233311730395336, |
| "grad_norm": 1.0234375, |
| "learning_rate": 8.358216578779271e-05, |
| "loss": 0.0275390625, |
| "memory(GiB)": 43.05, |
| "step": 4665, |
| "train_speed(iter/s)": 0.630757 |
| }, |
| { |
| "epoch": 3.0265716137394687, |
| "grad_norm": 4.46875, |
| "learning_rate": 8.354244931013e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 4670, |
| "train_speed(iter/s)": 0.630896 |
| }, |
| { |
| "epoch": 3.029812054439404, |
| "grad_norm": 5.0625, |
| "learning_rate": 8.350269431257624e-05, |
| "loss": 0.022265625, |
| "memory(GiB)": 43.05, |
| "step": 4675, |
| "train_speed(iter/s)": 0.631085 |
| }, |
| { |
| "epoch": 3.033052495139339, |
| "grad_norm": 13.125, |
| "learning_rate": 8.346290084078579e-05, |
| "loss": 0.043359375, |
| "memory(GiB)": 43.05, |
| "step": 4680, |
| "train_speed(iter/s)": 0.630968 |
| }, |
| { |
| "epoch": 3.036292935839274, |
| "grad_norm": 5.78125, |
| "learning_rate": 8.342306894045715e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 4685, |
| "train_speed(iter/s)": 0.631221 |
| }, |
| { |
| "epoch": 3.039533376539209, |
| "grad_norm": 15.125, |
| "learning_rate": 8.338319865733297e-05, |
| "loss": 0.0380859375, |
| "memory(GiB)": 43.05, |
| "step": 4690, |
| "train_speed(iter/s)": 0.631403 |
| }, |
| { |
| "epoch": 3.0427738172391443, |
| "grad_norm": 0.65234375, |
| "learning_rate": 8.334329003719998e-05, |
| "loss": 0.022265625, |
| "memory(GiB)": 43.05, |
| "step": 4695, |
| "train_speed(iter/s)": 0.631499 |
| }, |
| { |
| "epoch": 3.04601425793908, |
| "grad_norm": 1.15625, |
| "learning_rate": 8.330334312588895e-05, |
| "loss": 0.0423828125, |
| "memory(GiB)": 43.05, |
| "step": 4700, |
| "train_speed(iter/s)": 0.631581 |
| }, |
| { |
| "epoch": 3.049254698639015, |
| "grad_norm": 2.96875, |
| "learning_rate": 8.326335796927458e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 4705, |
| "train_speed(iter/s)": 0.631734 |
| }, |
| { |
| "epoch": 3.05249513933895, |
| "grad_norm": 11.6875, |
| "learning_rate": 8.322333461327552e-05, |
| "loss": 0.021484375, |
| "memory(GiB)": 43.05, |
| "step": 4710, |
| "train_speed(iter/s)": 0.63169 |
| }, |
| { |
| "epoch": 3.0557355800388852, |
| "grad_norm": 10.625, |
| "learning_rate": 8.31832731038543e-05, |
| "loss": 0.0337890625, |
| "memory(GiB)": 43.05, |
| "step": 4715, |
| "train_speed(iter/s)": 0.631917 |
| }, |
| { |
| "epoch": 3.0589760207388204, |
| "grad_norm": 0.81640625, |
| "learning_rate": 8.314317348701723e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 4720, |
| "train_speed(iter/s)": 0.6321 |
| }, |
| { |
| "epoch": 3.0622164614387555, |
| "grad_norm": 7.5625, |
| "learning_rate": 8.310303580881442e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 4725, |
| "train_speed(iter/s)": 0.632284 |
| }, |
| { |
| "epoch": 3.065456902138691, |
| "grad_norm": 3.765625, |
| "learning_rate": 8.306286011533968e-05, |
| "loss": 0.0572265625, |
| "memory(GiB)": 43.05, |
| "step": 4730, |
| "train_speed(iter/s)": 0.632202 |
| }, |
| { |
| "epoch": 3.068697342838626, |
| "grad_norm": 4.09375, |
| "learning_rate": 8.302264645273042e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 4735, |
| "train_speed(iter/s)": 0.632424 |
| }, |
| { |
| "epoch": 3.0719377835385613, |
| "grad_norm": 0.86328125, |
| "learning_rate": 8.298239486716776e-05, |
| "loss": 0.0361328125, |
| "memory(GiB)": 43.05, |
| "step": 4740, |
| "train_speed(iter/s)": 0.632756 |
| }, |
| { |
| "epoch": 3.0751782242384964, |
| "grad_norm": 4.09375, |
| "learning_rate": 8.294210540487627e-05, |
| "loss": 0.032421875, |
| "memory(GiB)": 43.05, |
| "step": 4745, |
| "train_speed(iter/s)": 0.632749 |
| }, |
| { |
| "epoch": 3.0784186649384315, |
| "grad_norm": 13.8125, |
| "learning_rate": 8.290177811212407e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 4750, |
| "train_speed(iter/s)": 0.632882 |
| }, |
| { |
| "epoch": 3.0816591056383666, |
| "grad_norm": 0.90234375, |
| "learning_rate": 8.286141303522273e-05, |
| "loss": 0.0212890625, |
| "memory(GiB)": 43.05, |
| "step": 4755, |
| "train_speed(iter/s)": 0.633143 |
| }, |
| { |
| "epoch": 3.084899546338302, |
| "grad_norm": 1.1640625, |
| "learning_rate": 8.282101022052717e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 4760, |
| "train_speed(iter/s)": 0.633077 |
| }, |
| { |
| "epoch": 3.0881399870382373, |
| "grad_norm": 4.90625, |
| "learning_rate": 8.278056971443567e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 4765, |
| "train_speed(iter/s)": 0.63317 |
| }, |
| { |
| "epoch": 3.0913804277381725, |
| "grad_norm": 11.125, |
| "learning_rate": 8.274009156338982e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 4770, |
| "train_speed(iter/s)": 0.633145 |
| }, |
| { |
| "epoch": 3.0946208684381076, |
| "grad_norm": 11.4375, |
| "learning_rate": 8.269957581387442e-05, |
| "loss": 0.0287109375, |
| "memory(GiB)": 43.05, |
| "step": 4775, |
| "train_speed(iter/s)": 0.633361 |
| }, |
| { |
| "epoch": 3.0978613091380427, |
| "grad_norm": 0.55859375, |
| "learning_rate": 8.265902251241741e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 4780, |
| "train_speed(iter/s)": 0.633194 |
| }, |
| { |
| "epoch": 3.101101749837978, |
| "grad_norm": 1.1015625, |
| "learning_rate": 8.261843170558991e-05, |
| "loss": 0.029296875, |
| "memory(GiB)": 43.05, |
| "step": 4785, |
| "train_speed(iter/s)": 0.633418 |
| }, |
| { |
| "epoch": 3.1043421905379134, |
| "grad_norm": 2.015625, |
| "learning_rate": 8.257780344000611e-05, |
| "loss": 0.0263671875, |
| "memory(GiB)": 43.05, |
| "step": 4790, |
| "train_speed(iter/s)": 0.63364 |
| }, |
| { |
| "epoch": 3.1075826312378485, |
| "grad_norm": 10.125, |
| "learning_rate": 8.253713776232317e-05, |
| "loss": 0.0271484375, |
| "memory(GiB)": 43.05, |
| "step": 4795, |
| "train_speed(iter/s)": 0.633929 |
| }, |
| { |
| "epoch": 3.1108230719377836, |
| "grad_norm": 10.0625, |
| "learning_rate": 8.249643471924124e-05, |
| "loss": 0.048828125, |
| "memory(GiB)": 43.05, |
| "step": 4800, |
| "train_speed(iter/s)": 0.634026 |
| }, |
| { |
| "epoch": 3.1140635126377187, |
| "grad_norm": 12.3125, |
| "learning_rate": 8.245569435750342e-05, |
| "loss": 0.0365234375, |
| "memory(GiB)": 43.05, |
| "step": 4805, |
| "train_speed(iter/s)": 0.634167 |
| }, |
| { |
| "epoch": 3.117303953337654, |
| "grad_norm": 9.625, |
| "learning_rate": 8.241491672389558e-05, |
| "loss": 0.058203125, |
| "memory(GiB)": 43.05, |
| "step": 4810, |
| "train_speed(iter/s)": 0.634336 |
| }, |
| { |
| "epoch": 3.120544394037589, |
| "grad_norm": 2.734375, |
| "learning_rate": 8.237410186524648e-05, |
| "loss": 0.042578125, |
| "memory(GiB)": 43.05, |
| "step": 4815, |
| "train_speed(iter/s)": 0.634383 |
| }, |
| { |
| "epoch": 3.123784834737524, |
| "grad_norm": 13.3125, |
| "learning_rate": 8.233324982842756e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 4820, |
| "train_speed(iter/s)": 0.634436 |
| }, |
| { |
| "epoch": 3.1270252754374597, |
| "grad_norm": 14.4375, |
| "learning_rate": 8.2292360660353e-05, |
| "loss": 0.0474609375, |
| "memory(GiB)": 43.05, |
| "step": 4825, |
| "train_speed(iter/s)": 0.634559 |
| }, |
| { |
| "epoch": 3.130265716137395, |
| "grad_norm": 4.15625, |
| "learning_rate": 8.22514344079796e-05, |
| "loss": 0.040234375, |
| "memory(GiB)": 43.05, |
| "step": 4830, |
| "train_speed(iter/s)": 0.634789 |
| }, |
| { |
| "epoch": 3.13350615683733, |
| "grad_norm": 11.9375, |
| "learning_rate": 8.221047111830677e-05, |
| "loss": 0.04609375, |
| "memory(GiB)": 43.05, |
| "step": 4835, |
| "train_speed(iter/s)": 0.634768 |
| }, |
| { |
| "epoch": 3.136746597537265, |
| "grad_norm": 12.8125, |
| "learning_rate": 8.216947083837643e-05, |
| "loss": 0.03642578125, |
| "memory(GiB)": 43.05, |
| "step": 4840, |
| "train_speed(iter/s)": 0.635024 |
| }, |
| { |
| "epoch": 3.1399870382372, |
| "grad_norm": 0.859375, |
| "learning_rate": 8.212843361527296e-05, |
| "loss": 0.02265625, |
| "memory(GiB)": 43.05, |
| "step": 4845, |
| "train_speed(iter/s)": 0.635157 |
| }, |
| { |
| "epoch": 3.1432274789371353, |
| "grad_norm": 9.9375, |
| "learning_rate": 8.208735949612323e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 4850, |
| "train_speed(iter/s)": 0.635384 |
| }, |
| { |
| "epoch": 3.146467919637071, |
| "grad_norm": 2.5625, |
| "learning_rate": 8.204624852809641e-05, |
| "loss": 0.0306640625, |
| "memory(GiB)": 43.05, |
| "step": 4855, |
| "train_speed(iter/s)": 0.635432 |
| }, |
| { |
| "epoch": 3.149708360337006, |
| "grad_norm": 0.5, |
| "learning_rate": 8.200510075840406e-05, |
| "loss": 0.0419921875, |
| "memory(GiB)": 43.05, |
| "step": 4860, |
| "train_speed(iter/s)": 0.635528 |
| }, |
| { |
| "epoch": 3.152948801036941, |
| "grad_norm": 10.5625, |
| "learning_rate": 8.196391623429992e-05, |
| "loss": 0.0251953125, |
| "memory(GiB)": 43.05, |
| "step": 4865, |
| "train_speed(iter/s)": 0.635744 |
| }, |
| { |
| "epoch": 3.156189241736876, |
| "grad_norm": 0.5546875, |
| "learning_rate": 8.192269500308001e-05, |
| "loss": 0.040234375, |
| "memory(GiB)": 43.05, |
| "step": 4870, |
| "train_speed(iter/s)": 0.635911 |
| }, |
| { |
| "epoch": 3.1594296824368113, |
| "grad_norm": 1.1171875, |
| "learning_rate": 8.188143711208246e-05, |
| "loss": 0.0306640625, |
| "memory(GiB)": 43.05, |
| "step": 4875, |
| "train_speed(iter/s)": 0.635898 |
| }, |
| { |
| "epoch": 3.1626701231367464, |
| "grad_norm": 9.625, |
| "learning_rate": 8.18401426086875e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 4880, |
| "train_speed(iter/s)": 0.63608 |
| }, |
| { |
| "epoch": 3.1659105638366816, |
| "grad_norm": 12.875, |
| "learning_rate": 8.179881154031748e-05, |
| "loss": 0.0287109375, |
| "memory(GiB)": 43.05, |
| "step": 4885, |
| "train_speed(iter/s)": 0.636136 |
| }, |
| { |
| "epoch": 3.169151004536617, |
| "grad_norm": 0.4375, |
| "learning_rate": 8.175744395443662e-05, |
| "loss": 0.0408203125, |
| "memory(GiB)": 43.05, |
| "step": 4890, |
| "train_speed(iter/s)": 0.636321 |
| }, |
| { |
| "epoch": 3.1723914452365523, |
| "grad_norm": 14.1875, |
| "learning_rate": 8.171603989855115e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 4895, |
| "train_speed(iter/s)": 0.636546 |
| }, |
| { |
| "epoch": 3.1756318859364874, |
| "grad_norm": 6.28125, |
| "learning_rate": 8.167459942020919e-05, |
| "loss": 0.034765625, |
| "memory(GiB)": 43.05, |
| "step": 4900, |
| "train_speed(iter/s)": 0.636452 |
| }, |
| { |
| "epoch": 3.1788723266364225, |
| "grad_norm": 2.953125, |
| "learning_rate": 8.163312256700067e-05, |
| "loss": 0.04375, |
| "memory(GiB)": 43.05, |
| "step": 4905, |
| "train_speed(iter/s)": 0.636629 |
| }, |
| { |
| "epoch": 3.1821127673363576, |
| "grad_norm": 4.15625, |
| "learning_rate": 8.159160938655726e-05, |
| "loss": 0.0263671875, |
| "memory(GiB)": 43.05, |
| "step": 4910, |
| "train_speed(iter/s)": 0.636829 |
| }, |
| { |
| "epoch": 3.1853532080362927, |
| "grad_norm": 10.8125, |
| "learning_rate": 8.155005992655238e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 4915, |
| "train_speed(iter/s)": 0.637 |
| }, |
| { |
| "epoch": 3.1885936487362283, |
| "grad_norm": 1.8828125, |
| "learning_rate": 8.150847423470114e-05, |
| "loss": 0.0306640625, |
| "memory(GiB)": 43.05, |
| "step": 4920, |
| "train_speed(iter/s)": 0.637005 |
| }, |
| { |
| "epoch": 3.1918340894361634, |
| "grad_norm": 14.5, |
| "learning_rate": 8.14668523587602e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 4925, |
| "train_speed(iter/s)": 0.637222 |
| }, |
| { |
| "epoch": 3.1950745301360985, |
| "grad_norm": 12.75, |
| "learning_rate": 8.142519434652782e-05, |
| "loss": 0.0369140625, |
| "memory(GiB)": 43.05, |
| "step": 4930, |
| "train_speed(iter/s)": 0.637378 |
| }, |
| { |
| "epoch": 3.1983149708360337, |
| "grad_norm": 10.5625, |
| "learning_rate": 8.138350024584373e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 4935, |
| "train_speed(iter/s)": 0.63747 |
| }, |
| { |
| "epoch": 3.201555411535969, |
| "grad_norm": 9.3125, |
| "learning_rate": 8.134177010458914e-05, |
| "loss": 0.049609375, |
| "memory(GiB)": 43.05, |
| "step": 4940, |
| "train_speed(iter/s)": 0.637558 |
| }, |
| { |
| "epoch": 3.204795852235904, |
| "grad_norm": 14.25, |
| "learning_rate": 8.130000397068658e-05, |
| "loss": 0.0328125, |
| "memory(GiB)": 43.05, |
| "step": 4945, |
| "train_speed(iter/s)": 0.637705 |
| }, |
| { |
| "epoch": 3.2080362929358395, |
| "grad_norm": 0.7421875, |
| "learning_rate": 8.12582018921e-05, |
| "loss": 0.0408203125, |
| "memory(GiB)": 43.05, |
| "step": 4950, |
| "train_speed(iter/s)": 0.63785 |
| }, |
| { |
| "epoch": 3.2112767336357746, |
| "grad_norm": 10.0625, |
| "learning_rate": 8.121636391683456e-05, |
| "loss": 0.0400390625, |
| "memory(GiB)": 43.05, |
| "step": 4955, |
| "train_speed(iter/s)": 0.638016 |
| }, |
| { |
| "epoch": 3.2145171743357097, |
| "grad_norm": 11.0, |
| "learning_rate": 8.117449009293668e-05, |
| "loss": 0.0396484375, |
| "memory(GiB)": 43.05, |
| "step": 4960, |
| "train_speed(iter/s)": 0.638147 |
| }, |
| { |
| "epoch": 3.217757615035645, |
| "grad_norm": 17.25, |
| "learning_rate": 8.113258046849392e-05, |
| "loss": 0.051953125, |
| "memory(GiB)": 43.05, |
| "step": 4965, |
| "train_speed(iter/s)": 0.638285 |
| }, |
| { |
| "epoch": 3.22099805573558, |
| "grad_norm": 11.1875, |
| "learning_rate": 8.109063509163501e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 4970, |
| "train_speed(iter/s)": 0.638338 |
| }, |
| { |
| "epoch": 3.224238496435515, |
| "grad_norm": 3.609375, |
| "learning_rate": 8.104865401052965e-05, |
| "loss": 0.0365234375, |
| "memory(GiB)": 43.05, |
| "step": 4975, |
| "train_speed(iter/s)": 0.638632 |
| }, |
| { |
| "epoch": 3.2274789371354506, |
| "grad_norm": 2.390625, |
| "learning_rate": 8.100663727338863e-05, |
| "loss": 0.0435546875, |
| "memory(GiB)": 43.05, |
| "step": 4980, |
| "train_speed(iter/s)": 0.63887 |
| }, |
| { |
| "epoch": 3.2307193778353858, |
| "grad_norm": 17.875, |
| "learning_rate": 8.096458492846362e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 4985, |
| "train_speed(iter/s)": 0.638997 |
| }, |
| { |
| "epoch": 3.233959818535321, |
| "grad_norm": 8.4375, |
| "learning_rate": 8.092249702404724e-05, |
| "loss": 0.04765625, |
| "memory(GiB)": 43.05, |
| "step": 4990, |
| "train_speed(iter/s)": 0.639111 |
| }, |
| { |
| "epoch": 3.237200259235256, |
| "grad_norm": 10.8125, |
| "learning_rate": 8.088037360847287e-05, |
| "loss": 0.021875, |
| "memory(GiB)": 43.05, |
| "step": 4995, |
| "train_speed(iter/s)": 0.639087 |
| }, |
| { |
| "epoch": 3.240440699935191, |
| "grad_norm": 15.75, |
| "learning_rate": 8.083821473011477e-05, |
| "loss": 0.01875, |
| "memory(GiB)": 43.05, |
| "step": 5000, |
| "train_speed(iter/s)": 0.639051 |
| }, |
| { |
| "epoch": 3.2436811406351262, |
| "grad_norm": 13.5, |
| "learning_rate": 8.079602043738783e-05, |
| "loss": 0.04765625, |
| "memory(GiB)": 43.05, |
| "step": 5005, |
| "train_speed(iter/s)": 0.639135 |
| }, |
| { |
| "epoch": 3.2469215813350614, |
| "grad_norm": 1.90625, |
| "learning_rate": 8.075379077874768e-05, |
| "loss": 0.02265625, |
| "memory(GiB)": 43.05, |
| "step": 5010, |
| "train_speed(iter/s)": 0.639406 |
| }, |
| { |
| "epoch": 3.250162022034997, |
| "grad_norm": 2.21875, |
| "learning_rate": 8.07115258026905e-05, |
| "loss": 0.044140625, |
| "memory(GiB)": 43.05, |
| "step": 5015, |
| "train_speed(iter/s)": 0.639572 |
| }, |
| { |
| "epoch": 3.253402462734932, |
| "grad_norm": 2.734375, |
| "learning_rate": 8.066922555775311e-05, |
| "loss": 0.0341796875, |
| "memory(GiB)": 43.05, |
| "step": 5020, |
| "train_speed(iter/s)": 0.639713 |
| }, |
| { |
| "epoch": 3.256642903434867, |
| "grad_norm": 1.5703125, |
| "learning_rate": 8.062689009251277e-05, |
| "loss": 0.0458984375, |
| "memory(GiB)": 43.05, |
| "step": 5025, |
| "train_speed(iter/s)": 0.640003 |
| }, |
| { |
| "epoch": 3.2598833441348023, |
| "grad_norm": 14.125, |
| "learning_rate": 8.058451945558719e-05, |
| "loss": 0.04375, |
| "memory(GiB)": 43.05, |
| "step": 5030, |
| "train_speed(iter/s)": 0.640001 |
| }, |
| { |
| "epoch": 3.2631237848347374, |
| "grad_norm": 3.25, |
| "learning_rate": 8.054211369563447e-05, |
| "loss": 0.030859375, |
| "memory(GiB)": 43.05, |
| "step": 5035, |
| "train_speed(iter/s)": 0.640051 |
| }, |
| { |
| "epoch": 3.2663642255346725, |
| "grad_norm": 13.3125, |
| "learning_rate": 8.049967286135309e-05, |
| "loss": 0.0419921875, |
| "memory(GiB)": 43.05, |
| "step": 5040, |
| "train_speed(iter/s)": 0.640257 |
| }, |
| { |
| "epoch": 3.269604666234608, |
| "grad_norm": 3.59375, |
| "learning_rate": 8.045719700148177e-05, |
| "loss": 0.0234375, |
| "memory(GiB)": 43.05, |
| "step": 5045, |
| "train_speed(iter/s)": 0.640452 |
| }, |
| { |
| "epoch": 3.2728451069345432, |
| "grad_norm": 5.75, |
| "learning_rate": 8.041468616479945e-05, |
| "loss": 0.03388671875, |
| "memory(GiB)": 43.05, |
| "step": 5050, |
| "train_speed(iter/s)": 0.640688 |
| }, |
| { |
| "epoch": 3.2760855476344783, |
| "grad_norm": 0.578125, |
| "learning_rate": 8.037214040012528e-05, |
| "loss": 0.0310546875, |
| "memory(GiB)": 43.05, |
| "step": 5055, |
| "train_speed(iter/s)": 0.640637 |
| }, |
| { |
| "epoch": 3.2793259883344135, |
| "grad_norm": 13.5625, |
| "learning_rate": 8.032955975631847e-05, |
| "loss": 0.0416015625, |
| "memory(GiB)": 43.05, |
| "step": 5060, |
| "train_speed(iter/s)": 0.640924 |
| }, |
| { |
| "epoch": 3.2825664290343486, |
| "grad_norm": 12.0, |
| "learning_rate": 8.028694428227828e-05, |
| "loss": 0.0423828125, |
| "memory(GiB)": 43.05, |
| "step": 5065, |
| "train_speed(iter/s)": 0.641007 |
| }, |
| { |
| "epoch": 3.2858068697342837, |
| "grad_norm": 7.4375, |
| "learning_rate": 8.0244294026944e-05, |
| "loss": 0.01630859375, |
| "memory(GiB)": 43.05, |
| "step": 5070, |
| "train_speed(iter/s)": 0.641295 |
| }, |
| { |
| "epoch": 3.289047310434219, |
| "grad_norm": 2.84375, |
| "learning_rate": 8.02016090392949e-05, |
| "loss": 0.0328125, |
| "memory(GiB)": 43.05, |
| "step": 5075, |
| "train_speed(iter/s)": 0.641411 |
| }, |
| { |
| "epoch": 3.2922877511341544, |
| "grad_norm": 1.921875, |
| "learning_rate": 8.015888936835003e-05, |
| "loss": 0.0287109375, |
| "memory(GiB)": 43.05, |
| "step": 5080, |
| "train_speed(iter/s)": 0.641628 |
| }, |
| { |
| "epoch": 3.2955281918340895, |
| "grad_norm": 0.78125, |
| "learning_rate": 8.011613506316838e-05, |
| "loss": 0.0296875, |
| "memory(GiB)": 43.05, |
| "step": 5085, |
| "train_speed(iter/s)": 0.64172 |
| }, |
| { |
| "epoch": 3.2987686325340246, |
| "grad_norm": 1.640625, |
| "learning_rate": 8.007334617284864e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 5090, |
| "train_speed(iter/s)": 0.64188 |
| }, |
| { |
| "epoch": 3.3020090732339598, |
| "grad_norm": 3.078125, |
| "learning_rate": 8.003052274652924e-05, |
| "loss": 0.0345703125, |
| "memory(GiB)": 43.05, |
| "step": 5095, |
| "train_speed(iter/s)": 0.642041 |
| }, |
| { |
| "epoch": 3.305249513933895, |
| "grad_norm": 2.359375, |
| "learning_rate": 7.998766483338831e-05, |
| "loss": 0.040234375, |
| "memory(GiB)": 43.05, |
| "step": 5100, |
| "train_speed(iter/s)": 0.642165 |
| }, |
| { |
| "epoch": 3.3084899546338304, |
| "grad_norm": 13.875, |
| "learning_rate": 7.99447724826435e-05, |
| "loss": 0.0416015625, |
| "memory(GiB)": 43.05, |
| "step": 5105, |
| "train_speed(iter/s)": 0.641982 |
| }, |
| { |
| "epoch": 3.3117303953337656, |
| "grad_norm": 7.09375, |
| "learning_rate": 7.990184574355209e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 5110, |
| "train_speed(iter/s)": 0.642198 |
| }, |
| { |
| "epoch": 3.3149708360337007, |
| "grad_norm": 13.25, |
| "learning_rate": 7.98588846654108e-05, |
| "loss": 0.045703125, |
| "memory(GiB)": 43.05, |
| "step": 5115, |
| "train_speed(iter/s)": 0.641561 |
| }, |
| { |
| "epoch": 3.318211276733636, |
| "grad_norm": 3.59375, |
| "learning_rate": 7.981588929755581e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 5120, |
| "train_speed(iter/s)": 0.641573 |
| }, |
| { |
| "epoch": 3.321451717433571, |
| "grad_norm": 3.703125, |
| "learning_rate": 7.977285968936266e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 5125, |
| "train_speed(iter/s)": 0.641596 |
| }, |
| { |
| "epoch": 3.324692158133506, |
| "grad_norm": 16.375, |
| "learning_rate": 7.972979589024624e-05, |
| "loss": 0.0478515625, |
| "memory(GiB)": 43.05, |
| "step": 5130, |
| "train_speed(iter/s)": 0.641786 |
| }, |
| { |
| "epoch": 3.327932598833441, |
| "grad_norm": 5.40625, |
| "learning_rate": 7.968669794966067e-05, |
| "loss": 0.030859375, |
| "memory(GiB)": 43.05, |
| "step": 5135, |
| "train_speed(iter/s)": 0.641917 |
| }, |
| { |
| "epoch": 3.3311730395333763, |
| "grad_norm": 0.828125, |
| "learning_rate": 7.96435659170993e-05, |
| "loss": 0.0396484375, |
| "memory(GiB)": 43.05, |
| "step": 5140, |
| "train_speed(iter/s)": 0.641959 |
| }, |
| { |
| "epoch": 3.334413480233312, |
| "grad_norm": 1.8359375, |
| "learning_rate": 7.960039984209462e-05, |
| "loss": 0.0490234375, |
| "memory(GiB)": 43.05, |
| "step": 5145, |
| "train_speed(iter/s)": 0.642049 |
| }, |
| { |
| "epoch": 3.337653920933247, |
| "grad_norm": 1.0, |
| "learning_rate": 7.955719977421823e-05, |
| "loss": 0.01875, |
| "memory(GiB)": 43.05, |
| "step": 5150, |
| "train_speed(iter/s)": 0.642241 |
| }, |
| { |
| "epoch": 3.340894361633182, |
| "grad_norm": 7.59375, |
| "learning_rate": 7.951396576308074e-05, |
| "loss": 0.0263671875, |
| "memory(GiB)": 43.05, |
| "step": 5155, |
| "train_speed(iter/s)": 0.642317 |
| }, |
| { |
| "epoch": 3.344134802333117, |
| "grad_norm": 0.734375, |
| "learning_rate": 7.947069785833176e-05, |
| "loss": 0.0193359375, |
| "memory(GiB)": 43.05, |
| "step": 5160, |
| "train_speed(iter/s)": 0.642442 |
| }, |
| { |
| "epoch": 3.3473752430330523, |
| "grad_norm": 0.439453125, |
| "learning_rate": 7.942739610965984e-05, |
| "loss": 0.0349609375, |
| "memory(GiB)": 43.05, |
| "step": 5165, |
| "train_speed(iter/s)": 0.642501 |
| }, |
| { |
| "epoch": 3.350615683732988, |
| "grad_norm": 0.6015625, |
| "learning_rate": 7.938406056679234e-05, |
| "loss": 0.040234375, |
| "memory(GiB)": 43.05, |
| "step": 5170, |
| "train_speed(iter/s)": 0.642582 |
| }, |
| { |
| "epoch": 3.353856124432923, |
| "grad_norm": 8.9375, |
| "learning_rate": 7.93406912794955e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 5175, |
| "train_speed(iter/s)": 0.642524 |
| }, |
| { |
| "epoch": 3.357096565132858, |
| "grad_norm": 10.125, |
| "learning_rate": 7.929728829757426e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 5180, |
| "train_speed(iter/s)": 0.642761 |
| }, |
| { |
| "epoch": 3.3603370058327933, |
| "grad_norm": 8.6875, |
| "learning_rate": 7.925385167087225e-05, |
| "loss": 0.0423828125, |
| "memory(GiB)": 43.05, |
| "step": 5185, |
| "train_speed(iter/s)": 0.642755 |
| }, |
| { |
| "epoch": 3.3635774465327284, |
| "grad_norm": 14.625, |
| "learning_rate": 7.92103814492718e-05, |
| "loss": 0.025390625, |
| "memory(GiB)": 43.05, |
| "step": 5190, |
| "train_speed(iter/s)": 0.642756 |
| }, |
| { |
| "epoch": 3.3668178872326635, |
| "grad_norm": 2.28125, |
| "learning_rate": 7.916687768269374e-05, |
| "loss": 0.0416015625, |
| "memory(GiB)": 43.05, |
| "step": 5195, |
| "train_speed(iter/s)": 0.642826 |
| }, |
| { |
| "epoch": 3.3700583279325986, |
| "grad_norm": 1.921875, |
| "learning_rate": 7.912334042109747e-05, |
| "loss": 0.034765625, |
| "memory(GiB)": 43.05, |
| "step": 5200, |
| "train_speed(iter/s)": 0.642835 |
| }, |
| { |
| "epoch": 3.373298768632534, |
| "grad_norm": 2.203125, |
| "learning_rate": 7.907976971448091e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 5205, |
| "train_speed(iter/s)": 0.64264 |
| }, |
| { |
| "epoch": 3.3765392093324693, |
| "grad_norm": 9.5, |
| "learning_rate": 7.903616561288021e-05, |
| "loss": 0.017578125, |
| "memory(GiB)": 43.05, |
| "step": 5210, |
| "train_speed(iter/s)": 0.642804 |
| }, |
| { |
| "epoch": 3.3797796500324044, |
| "grad_norm": 17.0, |
| "learning_rate": 7.899252816637007e-05, |
| "loss": 0.0271484375, |
| "memory(GiB)": 43.05, |
| "step": 5215, |
| "train_speed(iter/s)": 0.642852 |
| }, |
| { |
| "epoch": 3.3830200907323396, |
| "grad_norm": 6.75, |
| "learning_rate": 7.894885742506337e-05, |
| "loss": 0.0482421875, |
| "memory(GiB)": 43.05, |
| "step": 5220, |
| "train_speed(iter/s)": 0.642941 |
| }, |
| { |
| "epoch": 3.3862605314322747, |
| "grad_norm": 10.5625, |
| "learning_rate": 7.890515343911127e-05, |
| "loss": 0.0416015625, |
| "memory(GiB)": 43.05, |
| "step": 5225, |
| "train_speed(iter/s)": 0.642969 |
| }, |
| { |
| "epoch": 3.38950097213221, |
| "grad_norm": 0.7890625, |
| "learning_rate": 7.886141625870307e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 5230, |
| "train_speed(iter/s)": 0.643161 |
| }, |
| { |
| "epoch": 3.3927414128321454, |
| "grad_norm": 10.25, |
| "learning_rate": 7.881764593406622e-05, |
| "loss": 0.0423828125, |
| "memory(GiB)": 43.05, |
| "step": 5235, |
| "train_speed(iter/s)": 0.643358 |
| }, |
| { |
| "epoch": 3.3959818535320805, |
| "grad_norm": 11.625, |
| "learning_rate": 7.87738425154662e-05, |
| "loss": 0.0306640625, |
| "memory(GiB)": 43.05, |
| "step": 5240, |
| "train_speed(iter/s)": 0.643484 |
| }, |
| { |
| "epoch": 3.3992222942320156, |
| "grad_norm": 12.75, |
| "learning_rate": 7.873000605320659e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 5245, |
| "train_speed(iter/s)": 0.643764 |
| }, |
| { |
| "epoch": 3.4024627349319507, |
| "grad_norm": 11.375, |
| "learning_rate": 7.868613659762878e-05, |
| "loss": 0.0392578125, |
| "memory(GiB)": 43.05, |
| "step": 5250, |
| "train_speed(iter/s)": 0.643963 |
| }, |
| { |
| "epoch": 3.405703175631886, |
| "grad_norm": 1.34375, |
| "learning_rate": 7.864223419911211e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 5255, |
| "train_speed(iter/s)": 0.644049 |
| }, |
| { |
| "epoch": 3.408943616331821, |
| "grad_norm": 14.0, |
| "learning_rate": 7.859829890807382e-05, |
| "loss": 0.028125, |
| "memory(GiB)": 43.05, |
| "step": 5260, |
| "train_speed(iter/s)": 0.644215 |
| }, |
| { |
| "epoch": 3.412184057031756, |
| "grad_norm": 15.5, |
| "learning_rate": 7.855433077496882e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 5265, |
| "train_speed(iter/s)": 0.644236 |
| }, |
| { |
| "epoch": 3.4154244977316917, |
| "grad_norm": 14.1875, |
| "learning_rate": 7.851032985028976e-05, |
| "loss": 0.0263671875, |
| "memory(GiB)": 43.05, |
| "step": 5270, |
| "train_speed(iter/s)": 0.64435 |
| }, |
| { |
| "epoch": 3.4186649384316268, |
| "grad_norm": 0.5625, |
| "learning_rate": 7.846629618456702e-05, |
| "loss": 0.0599609375, |
| "memory(GiB)": 43.05, |
| "step": 5275, |
| "train_speed(iter/s)": 0.644495 |
| }, |
| { |
| "epoch": 3.421905379131562, |
| "grad_norm": 15.0625, |
| "learning_rate": 7.842222982836847e-05, |
| "loss": 0.046484375, |
| "memory(GiB)": 43.05, |
| "step": 5280, |
| "train_speed(iter/s)": 0.644542 |
| }, |
| { |
| "epoch": 3.425145819831497, |
| "grad_norm": 0.57421875, |
| "learning_rate": 7.837813083229957e-05, |
| "loss": 0.0306640625, |
| "memory(GiB)": 43.05, |
| "step": 5285, |
| "train_speed(iter/s)": 0.644519 |
| }, |
| { |
| "epoch": 3.428386260531432, |
| "grad_norm": 3.453125, |
| "learning_rate": 7.833399924700331e-05, |
| "loss": 0.036328125, |
| "memory(GiB)": 43.05, |
| "step": 5290, |
| "train_speed(iter/s)": 0.644511 |
| }, |
| { |
| "epoch": 3.4316267012313677, |
| "grad_norm": 17.375, |
| "learning_rate": 7.828983512316006e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 5295, |
| "train_speed(iter/s)": 0.644495 |
| }, |
| { |
| "epoch": 3.434867141931303, |
| "grad_norm": 0.55859375, |
| "learning_rate": 7.824563851148752e-05, |
| "loss": 0.0453125, |
| "memory(GiB)": 43.05, |
| "step": 5300, |
| "train_speed(iter/s)": 0.644602 |
| }, |
| { |
| "epoch": 3.438107582631238, |
| "grad_norm": 5.90625, |
| "learning_rate": 7.820140946274076e-05, |
| "loss": 0.0369140625, |
| "memory(GiB)": 43.05, |
| "step": 5305, |
| "train_speed(iter/s)": 0.644735 |
| }, |
| { |
| "epoch": 3.441348023331173, |
| "grad_norm": 4.375, |
| "learning_rate": 7.815714802771211e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 5310, |
| "train_speed(iter/s)": 0.644901 |
| }, |
| { |
| "epoch": 3.444588464031108, |
| "grad_norm": 2.171875, |
| "learning_rate": 7.811285425723101e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 5315, |
| "train_speed(iter/s)": 0.644867 |
| }, |
| { |
| "epoch": 3.4478289047310433, |
| "grad_norm": 0.57421875, |
| "learning_rate": 7.806852820216412e-05, |
| "loss": 0.016796875, |
| "memory(GiB)": 43.05, |
| "step": 5320, |
| "train_speed(iter/s)": 0.645099 |
| }, |
| { |
| "epoch": 3.4510693454309784, |
| "grad_norm": 1.4453125, |
| "learning_rate": 7.802416991341512e-05, |
| "loss": 0.062109375, |
| "memory(GiB)": 43.05, |
| "step": 5325, |
| "train_speed(iter/s)": 0.645222 |
| }, |
| { |
| "epoch": 3.454309786130914, |
| "grad_norm": 13.125, |
| "learning_rate": 7.797977944192476e-05, |
| "loss": 0.05546875, |
| "memory(GiB)": 43.05, |
| "step": 5330, |
| "train_speed(iter/s)": 0.645274 |
| }, |
| { |
| "epoch": 3.457550226830849, |
| "grad_norm": 0.412109375, |
| "learning_rate": 7.79353568386707e-05, |
| "loss": 0.0306640625, |
| "memory(GiB)": 43.05, |
| "step": 5335, |
| "train_speed(iter/s)": 0.645431 |
| }, |
| { |
| "epoch": 3.4607906675307842, |
| "grad_norm": 0.484375, |
| "learning_rate": 7.78909021546675e-05, |
| "loss": 0.0251953125, |
| "memory(GiB)": 43.05, |
| "step": 5340, |
| "train_speed(iter/s)": 0.645563 |
| }, |
| { |
| "epoch": 3.4640311082307194, |
| "grad_norm": 14.0625, |
| "learning_rate": 7.784641544096658e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 5345, |
| "train_speed(iter/s)": 0.64551 |
| }, |
| { |
| "epoch": 3.4672715489306545, |
| "grad_norm": 1.421875, |
| "learning_rate": 7.780189674865616e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 5350, |
| "train_speed(iter/s)": 0.645597 |
| }, |
| { |
| "epoch": 3.4705119896305896, |
| "grad_norm": 1.1640625, |
| "learning_rate": 7.775734612886116e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 5355, |
| "train_speed(iter/s)": 0.645617 |
| }, |
| { |
| "epoch": 3.473752430330525, |
| "grad_norm": 2.078125, |
| "learning_rate": 7.771276363274316e-05, |
| "loss": 0.034375, |
| "memory(GiB)": 43.05, |
| "step": 5360, |
| "train_speed(iter/s)": 0.645662 |
| }, |
| { |
| "epoch": 3.4769928710304603, |
| "grad_norm": 0.89453125, |
| "learning_rate": 7.766814931150035e-05, |
| "loss": 0.024609375, |
| "memory(GiB)": 43.05, |
| "step": 5365, |
| "train_speed(iter/s)": 0.645772 |
| }, |
| { |
| "epoch": 3.4802333117303954, |
| "grad_norm": 0.59765625, |
| "learning_rate": 7.76235032163675e-05, |
| "loss": 0.0486328125, |
| "memory(GiB)": 43.05, |
| "step": 5370, |
| "train_speed(iter/s)": 0.645739 |
| }, |
| { |
| "epoch": 3.4834737524303305, |
| "grad_norm": 10.6875, |
| "learning_rate": 7.757882539861582e-05, |
| "loss": 0.021875, |
| "memory(GiB)": 43.05, |
| "step": 5375, |
| "train_speed(iter/s)": 0.645837 |
| }, |
| { |
| "epoch": 3.4867141931302656, |
| "grad_norm": 4.03125, |
| "learning_rate": 7.753411590955299e-05, |
| "loss": 0.046484375, |
| "memory(GiB)": 43.05, |
| "step": 5380, |
| "train_speed(iter/s)": 0.646036 |
| }, |
| { |
| "epoch": 3.4899546338302008, |
| "grad_norm": 8.1875, |
| "learning_rate": 7.7489374800523e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 5385, |
| "train_speed(iter/s)": 0.646104 |
| }, |
| { |
| "epoch": 3.493195074530136, |
| "grad_norm": 4.03125, |
| "learning_rate": 7.744460212290625e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 5390, |
| "train_speed(iter/s)": 0.646305 |
| }, |
| { |
| "epoch": 3.4964355152300715, |
| "grad_norm": 12.5, |
| "learning_rate": 7.739979792811933e-05, |
| "loss": 0.0310546875, |
| "memory(GiB)": 43.05, |
| "step": 5395, |
| "train_speed(iter/s)": 0.646219 |
| }, |
| { |
| "epoch": 3.4996759559300066, |
| "grad_norm": 7.46875, |
| "learning_rate": 7.735496226761499e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 5400, |
| "train_speed(iter/s)": 0.646368 |
| }, |
| { |
| "epoch": 3.5029163966299417, |
| "grad_norm": 1.0234375, |
| "learning_rate": 7.73100951928822e-05, |
| "loss": 0.034375, |
| "memory(GiB)": 43.05, |
| "step": 5405, |
| "train_speed(iter/s)": 0.646371 |
| }, |
| { |
| "epoch": 3.506156837329877, |
| "grad_norm": 1.8046875, |
| "learning_rate": 7.726519675544597e-05, |
| "loss": 0.036328125, |
| "memory(GiB)": 43.05, |
| "step": 5410, |
| "train_speed(iter/s)": 0.646382 |
| }, |
| { |
| "epoch": 3.509397278029812, |
| "grad_norm": 5.53125, |
| "learning_rate": 7.722026700686727e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 5415, |
| "train_speed(iter/s)": 0.646589 |
| }, |
| { |
| "epoch": 3.5126377187297475, |
| "grad_norm": 13.0625, |
| "learning_rate": 7.717530599874311e-05, |
| "loss": 0.0271484375, |
| "memory(GiB)": 43.05, |
| "step": 5420, |
| "train_speed(iter/s)": 0.646705 |
| }, |
| { |
| "epoch": 3.5158781594296826, |
| "grad_norm": 1.65625, |
| "learning_rate": 7.71303137827064e-05, |
| "loss": 0.056640625, |
| "memory(GiB)": 43.05, |
| "step": 5425, |
| "train_speed(iter/s)": 0.64682 |
| }, |
| { |
| "epoch": 3.5191186001296177, |
| "grad_norm": 2.953125, |
| "learning_rate": 7.708529041042581e-05, |
| "loss": 0.0337890625, |
| "memory(GiB)": 43.05, |
| "step": 5430, |
| "train_speed(iter/s)": 0.647051 |
| }, |
| { |
| "epoch": 3.522359040829553, |
| "grad_norm": 1.9375, |
| "learning_rate": 7.704023593360583e-05, |
| "loss": 0.0412109375, |
| "memory(GiB)": 43.05, |
| "step": 5435, |
| "train_speed(iter/s)": 0.64715 |
| }, |
| { |
| "epoch": 3.525599481529488, |
| "grad_norm": 0.58984375, |
| "learning_rate": 7.69951504039867e-05, |
| "loss": 0.0578125, |
| "memory(GiB)": 43.05, |
| "step": 5440, |
| "train_speed(iter/s)": 0.647371 |
| }, |
| { |
| "epoch": 3.528839922229423, |
| "grad_norm": 1.3828125, |
| "learning_rate": 7.69500338733443e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 5445, |
| "train_speed(iter/s)": 0.647472 |
| }, |
| { |
| "epoch": 3.5320803629293582, |
| "grad_norm": 14.25, |
| "learning_rate": 7.690488639349008e-05, |
| "loss": 0.0421875, |
| "memory(GiB)": 43.05, |
| "step": 5450, |
| "train_speed(iter/s)": 0.647435 |
| }, |
| { |
| "epoch": 3.5353208036292934, |
| "grad_norm": 10.0625, |
| "learning_rate": 7.685970801627108e-05, |
| "loss": 0.04765625, |
| "memory(GiB)": 43.05, |
| "step": 5455, |
| "train_speed(iter/s)": 0.647517 |
| }, |
| { |
| "epoch": 3.538561244329229, |
| "grad_norm": 12.75, |
| "learning_rate": 7.681449879356979e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 5460, |
| "train_speed(iter/s)": 0.647619 |
| }, |
| { |
| "epoch": 3.541801685029164, |
| "grad_norm": 10.3125, |
| "learning_rate": 7.676925877730413e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 5465, |
| "train_speed(iter/s)": 0.647615 |
| }, |
| { |
| "epoch": 3.545042125729099, |
| "grad_norm": 12.25, |
| "learning_rate": 7.67239880194274e-05, |
| "loss": 0.021875, |
| "memory(GiB)": 43.05, |
| "step": 5470, |
| "train_speed(iter/s)": 0.647878 |
| }, |
| { |
| "epoch": 3.5482825664290343, |
| "grad_norm": 16.375, |
| "learning_rate": 7.66786865719282e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 5475, |
| "train_speed(iter/s)": 0.648057 |
| }, |
| { |
| "epoch": 3.5515230071289694, |
| "grad_norm": 14.3125, |
| "learning_rate": 7.663335448683035e-05, |
| "loss": 0.032421875, |
| "memory(GiB)": 43.05, |
| "step": 5480, |
| "train_speed(iter/s)": 0.648051 |
| }, |
| { |
| "epoch": 3.554763447828905, |
| "grad_norm": 12.625, |
| "learning_rate": 7.658799181619284e-05, |
| "loss": 0.0263671875, |
| "memory(GiB)": 43.05, |
| "step": 5485, |
| "train_speed(iter/s)": 0.64819 |
| }, |
| { |
| "epoch": 3.55800388852884, |
| "grad_norm": 1.0703125, |
| "learning_rate": 7.654259861210987e-05, |
| "loss": 0.040625, |
| "memory(GiB)": 43.05, |
| "step": 5490, |
| "train_speed(iter/s)": 0.648173 |
| }, |
| { |
| "epoch": 3.561244329228775, |
| "grad_norm": 2.09375, |
| "learning_rate": 7.64971749267106e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 5495, |
| "train_speed(iter/s)": 0.648254 |
| }, |
| { |
| "epoch": 3.5644847699287103, |
| "grad_norm": 0.6875, |
| "learning_rate": 7.645172081215926e-05, |
| "loss": 0.0533203125, |
| "memory(GiB)": 43.05, |
| "step": 5500, |
| "train_speed(iter/s)": 0.648366 |
| }, |
| { |
| "epoch": 3.5677252106286454, |
| "grad_norm": 14.375, |
| "learning_rate": 7.640623632065502e-05, |
| "loss": 0.051171875, |
| "memory(GiB)": 43.05, |
| "step": 5505, |
| "train_speed(iter/s)": 0.648634 |
| }, |
| { |
| "epoch": 3.5709656513285806, |
| "grad_norm": 2.6875, |
| "learning_rate": 7.63607215044319e-05, |
| "loss": 0.02421875, |
| "memory(GiB)": 43.05, |
| "step": 5510, |
| "train_speed(iter/s)": 0.648752 |
| }, |
| { |
| "epoch": 3.5742060920285157, |
| "grad_norm": 16.5, |
| "learning_rate": 7.631517641575875e-05, |
| "loss": 0.062109375, |
| "memory(GiB)": 43.05, |
| "step": 5515, |
| "train_speed(iter/s)": 0.648739 |
| }, |
| { |
| "epoch": 3.577446532728451, |
| "grad_norm": 11.75, |
| "learning_rate": 7.626960110693923e-05, |
| "loss": 0.036328125, |
| "memory(GiB)": 43.05, |
| "step": 5520, |
| "train_speed(iter/s)": 0.648853 |
| }, |
| { |
| "epoch": 3.5806869734283864, |
| "grad_norm": 0.66796875, |
| "learning_rate": 7.622399563031168e-05, |
| "loss": 0.0458984375, |
| "memory(GiB)": 43.05, |
| "step": 5525, |
| "train_speed(iter/s)": 0.648817 |
| }, |
| { |
| "epoch": 3.5839274141283215, |
| "grad_norm": 9.4375, |
| "learning_rate": 7.617836003824905e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 5530, |
| "train_speed(iter/s)": 0.648813 |
| }, |
| { |
| "epoch": 3.5871678548282566, |
| "grad_norm": 15.625, |
| "learning_rate": 7.613269438315892e-05, |
| "loss": 0.033203125, |
| "memory(GiB)": 43.05, |
| "step": 5535, |
| "train_speed(iter/s)": 0.648733 |
| }, |
| { |
| "epoch": 3.5904082955281917, |
| "grad_norm": 3.40625, |
| "learning_rate": 7.608699871748338e-05, |
| "loss": 0.02265625, |
| "memory(GiB)": 43.05, |
| "step": 5540, |
| "train_speed(iter/s)": 0.648878 |
| }, |
| { |
| "epoch": 3.593648736228127, |
| "grad_norm": 12.1875, |
| "learning_rate": 7.604127309369897e-05, |
| "loss": 0.0439453125, |
| "memory(GiB)": 43.05, |
| "step": 5545, |
| "train_speed(iter/s)": 0.648849 |
| }, |
| { |
| "epoch": 3.5968891769280624, |
| "grad_norm": 0.8203125, |
| "learning_rate": 7.599551756431665e-05, |
| "loss": 0.0345703125, |
| "memory(GiB)": 43.05, |
| "step": 5550, |
| "train_speed(iter/s)": 0.649039 |
| }, |
| { |
| "epoch": 3.6001296176279975, |
| "grad_norm": 8.0, |
| "learning_rate": 7.594973218188172e-05, |
| "loss": 0.0275390625, |
| "memory(GiB)": 43.05, |
| "step": 5555, |
| "train_speed(iter/s)": 0.649097 |
| }, |
| { |
| "epoch": 3.6033700583279327, |
| "grad_norm": 11.1875, |
| "learning_rate": 7.590391699897375e-05, |
| "loss": 0.0462890625, |
| "memory(GiB)": 43.05, |
| "step": 5560, |
| "train_speed(iter/s)": 0.649188 |
| }, |
| { |
| "epoch": 3.606610499027868, |
| "grad_norm": 3.65625, |
| "learning_rate": 7.585807206820656e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 5565, |
| "train_speed(iter/s)": 0.6493 |
| }, |
| { |
| "epoch": 3.609850939727803, |
| "grad_norm": 1.796875, |
| "learning_rate": 7.581219744222812e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 5570, |
| "train_speed(iter/s)": 0.649432 |
| }, |
| { |
| "epoch": 3.613091380427738, |
| "grad_norm": 4.46875, |
| "learning_rate": 7.576629317372047e-05, |
| "loss": 0.025390625, |
| "memory(GiB)": 43.05, |
| "step": 5575, |
| "train_speed(iter/s)": 0.649619 |
| }, |
| { |
| "epoch": 3.616331821127673, |
| "grad_norm": 1.21875, |
| "learning_rate": 7.572035931539975e-05, |
| "loss": 0.060546875, |
| "memory(GiB)": 43.05, |
| "step": 5580, |
| "train_speed(iter/s)": 0.649764 |
| }, |
| { |
| "epoch": 3.6195722618276087, |
| "grad_norm": 2.421875, |
| "learning_rate": 7.567439592001604e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 5585, |
| "train_speed(iter/s)": 0.649826 |
| }, |
| { |
| "epoch": 3.622812702527544, |
| "grad_norm": 10.6875, |
| "learning_rate": 7.562840304035334e-05, |
| "loss": 0.01513671875, |
| "memory(GiB)": 43.05, |
| "step": 5590, |
| "train_speed(iter/s)": 0.650042 |
| }, |
| { |
| "epoch": 3.626053143227479, |
| "grad_norm": 1.78125, |
| "learning_rate": 7.558238072922952e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 5595, |
| "train_speed(iter/s)": 0.65019 |
| }, |
| { |
| "epoch": 3.629293583927414, |
| "grad_norm": 2.953125, |
| "learning_rate": 7.553632903949626e-05, |
| "loss": 0.0345703125, |
| "memory(GiB)": 43.05, |
| "step": 5600, |
| "train_speed(iter/s)": 0.650337 |
| }, |
| { |
| "epoch": 3.632534024627349, |
| "grad_norm": 15.625, |
| "learning_rate": 7.549024802403897e-05, |
| "loss": 0.0248046875, |
| "memory(GiB)": 43.05, |
| "step": 5605, |
| "train_speed(iter/s)": 0.650391 |
| }, |
| { |
| "epoch": 3.6357744653272848, |
| "grad_norm": 1.5234375, |
| "learning_rate": 7.544413773577673e-05, |
| "loss": 0.021875, |
| "memory(GiB)": 43.05, |
| "step": 5610, |
| "train_speed(iter/s)": 0.650397 |
| }, |
| { |
| "epoch": 3.63901490602722, |
| "grad_norm": 8.5625, |
| "learning_rate": 7.539799822766223e-05, |
| "loss": 0.025390625, |
| "memory(GiB)": 43.05, |
| "step": 5615, |
| "train_speed(iter/s)": 0.650585 |
| }, |
| { |
| "epoch": 3.642255346727155, |
| "grad_norm": 15.9375, |
| "learning_rate": 7.535182955268173e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 5620, |
| "train_speed(iter/s)": 0.650687 |
| }, |
| { |
| "epoch": 3.64549578742709, |
| "grad_norm": 3.765625, |
| "learning_rate": 7.530563176385499e-05, |
| "loss": 0.0380859375, |
| "memory(GiB)": 43.05, |
| "step": 5625, |
| "train_speed(iter/s)": 0.650823 |
| }, |
| { |
| "epoch": 3.6487362281270252, |
| "grad_norm": 10.9375, |
| "learning_rate": 7.525940491423519e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 5630, |
| "train_speed(iter/s)": 0.651083 |
| }, |
| { |
| "epoch": 3.6519766688269604, |
| "grad_norm": 0.515625, |
| "learning_rate": 7.521314905690888e-05, |
| "loss": 0.0384765625, |
| "memory(GiB)": 43.05, |
| "step": 5635, |
| "train_speed(iter/s)": 0.651231 |
| }, |
| { |
| "epoch": 3.6552171095268955, |
| "grad_norm": 17.75, |
| "learning_rate": 7.516686424499595e-05, |
| "loss": 0.030859375, |
| "memory(GiB)": 43.05, |
| "step": 5640, |
| "train_speed(iter/s)": 0.651137 |
| }, |
| { |
| "epoch": 3.6584575502268306, |
| "grad_norm": 4.09375, |
| "learning_rate": 7.51205505316495e-05, |
| "loss": 0.0265625, |
| "memory(GiB)": 43.05, |
| "step": 5645, |
| "train_speed(iter/s)": 0.651233 |
| }, |
| { |
| "epoch": 3.661697990926766, |
| "grad_norm": 3.28125, |
| "learning_rate": 7.507420797005588e-05, |
| "loss": 0.0427734375, |
| "memory(GiB)": 43.05, |
| "step": 5650, |
| "train_speed(iter/s)": 0.651156 |
| }, |
| { |
| "epoch": 3.6649384316267013, |
| "grad_norm": 12.25, |
| "learning_rate": 7.502783661343449e-05, |
| "loss": 0.055078125, |
| "memory(GiB)": 43.05, |
| "step": 5655, |
| "train_speed(iter/s)": 0.651298 |
| }, |
| { |
| "epoch": 3.6681788723266364, |
| "grad_norm": 10.3125, |
| "learning_rate": 7.498143651503787e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 5660, |
| "train_speed(iter/s)": 0.651328 |
| }, |
| { |
| "epoch": 3.6714193130265715, |
| "grad_norm": 13.25, |
| "learning_rate": 7.493500772815149e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 5665, |
| "train_speed(iter/s)": 0.65134 |
| }, |
| { |
| "epoch": 3.6746597537265067, |
| "grad_norm": 1.765625, |
| "learning_rate": 7.488855030609387e-05, |
| "loss": 0.034765625, |
| "memory(GiB)": 43.05, |
| "step": 5670, |
| "train_speed(iter/s)": 0.651437 |
| }, |
| { |
| "epoch": 3.6779001944264422, |
| "grad_norm": 11.0625, |
| "learning_rate": 7.484206430221634e-05, |
| "loss": 0.0361328125, |
| "memory(GiB)": 43.05, |
| "step": 5675, |
| "train_speed(iter/s)": 0.651513 |
| }, |
| { |
| "epoch": 3.6811406351263773, |
| "grad_norm": 1.875, |
| "learning_rate": 7.479554976990306e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 5680, |
| "train_speed(iter/s)": 0.651771 |
| }, |
| { |
| "epoch": 3.6843810758263125, |
| "grad_norm": 4.90625, |
| "learning_rate": 7.474900676257094e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 5685, |
| "train_speed(iter/s)": 0.651846 |
| }, |
| { |
| "epoch": 3.6876215165262476, |
| "grad_norm": 8.5, |
| "learning_rate": 7.470243533366966e-05, |
| "loss": 0.034375, |
| "memory(GiB)": 43.05, |
| "step": 5690, |
| "train_speed(iter/s)": 0.651883 |
| }, |
| { |
| "epoch": 3.6908619572261827, |
| "grad_norm": 4.6875, |
| "learning_rate": 7.465583553668144e-05, |
| "loss": 0.0361328125, |
| "memory(GiB)": 43.05, |
| "step": 5695, |
| "train_speed(iter/s)": 0.652021 |
| }, |
| { |
| "epoch": 3.694102397926118, |
| "grad_norm": 12.0, |
| "learning_rate": 7.460920742512118e-05, |
| "loss": 0.046875, |
| "memory(GiB)": 43.05, |
| "step": 5700, |
| "train_speed(iter/s)": 0.652161 |
| }, |
| { |
| "epoch": 3.697342838626053, |
| "grad_norm": 3.421875, |
| "learning_rate": 7.45625510525362e-05, |
| "loss": 0.040625, |
| "memory(GiB)": 43.05, |
| "step": 5705, |
| "train_speed(iter/s)": 0.652185 |
| }, |
| { |
| "epoch": 3.700583279325988, |
| "grad_norm": 12.125, |
| "learning_rate": 7.451586647250635e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 5710, |
| "train_speed(iter/s)": 0.652326 |
| }, |
| { |
| "epoch": 3.7038237200259236, |
| "grad_norm": 8.6875, |
| "learning_rate": 7.446915373864384e-05, |
| "loss": 0.0380859375, |
| "memory(GiB)": 43.05, |
| "step": 5715, |
| "train_speed(iter/s)": 0.652367 |
| }, |
| { |
| "epoch": 3.7070641607258588, |
| "grad_norm": 5.21875, |
| "learning_rate": 7.442241290459318e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 5720, |
| "train_speed(iter/s)": 0.65226 |
| }, |
| { |
| "epoch": 3.710304601425794, |
| "grad_norm": 0.63671875, |
| "learning_rate": 7.437564402403123e-05, |
| "loss": 0.0220703125, |
| "memory(GiB)": 43.05, |
| "step": 5725, |
| "train_speed(iter/s)": 0.652272 |
| }, |
| { |
| "epoch": 3.713545042125729, |
| "grad_norm": 5.875, |
| "learning_rate": 7.4328847150667e-05, |
| "loss": 0.036328125, |
| "memory(GiB)": 43.05, |
| "step": 5730, |
| "train_speed(iter/s)": 0.652303 |
| }, |
| { |
| "epoch": 3.7167854828256646, |
| "grad_norm": 1.9765625, |
| "learning_rate": 7.428202233824164e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 5735, |
| "train_speed(iter/s)": 0.652285 |
| }, |
| { |
| "epoch": 3.7200259235255997, |
| "grad_norm": 6.0625, |
| "learning_rate": 7.423516964052844e-05, |
| "loss": 0.0306640625, |
| "memory(GiB)": 43.05, |
| "step": 5740, |
| "train_speed(iter/s)": 0.652526 |
| }, |
| { |
| "epoch": 3.723266364225535, |
| "grad_norm": 13.375, |
| "learning_rate": 7.418828911133263e-05, |
| "loss": 0.030859375, |
| "memory(GiB)": 43.05, |
| "step": 5745, |
| "train_speed(iter/s)": 0.652675 |
| }, |
| { |
| "epoch": 3.72650680492547, |
| "grad_norm": 1.984375, |
| "learning_rate": 7.414138080449149e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 5750, |
| "train_speed(iter/s)": 0.652823 |
| }, |
| { |
| "epoch": 3.729747245625405, |
| "grad_norm": 10.125, |
| "learning_rate": 7.409444477387416e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 5755, |
| "train_speed(iter/s)": 0.652839 |
| }, |
| { |
| "epoch": 3.73298768632534, |
| "grad_norm": 4.15625, |
| "learning_rate": 7.404748107338157e-05, |
| "loss": 0.0228515625, |
| "memory(GiB)": 43.05, |
| "step": 5760, |
| "train_speed(iter/s)": 0.652819 |
| }, |
| { |
| "epoch": 3.7362281270252753, |
| "grad_norm": 13.375, |
| "learning_rate": 7.400048975694653e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 5765, |
| "train_speed(iter/s)": 0.653069 |
| }, |
| { |
| "epoch": 3.7394685677252104, |
| "grad_norm": 3.9375, |
| "learning_rate": 7.395347087853349e-05, |
| "loss": 0.0380859375, |
| "memory(GiB)": 43.05, |
| "step": 5770, |
| "train_speed(iter/s)": 0.653094 |
| }, |
| { |
| "epoch": 3.742709008425146, |
| "grad_norm": 2.84375, |
| "learning_rate": 7.390642449213852e-05, |
| "loss": 0.03515625, |
| "memory(GiB)": 43.05, |
| "step": 5775, |
| "train_speed(iter/s)": 0.653279 |
| }, |
| { |
| "epoch": 3.745949449125081, |
| "grad_norm": 13.5625, |
| "learning_rate": 7.385935065178941e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 5780, |
| "train_speed(iter/s)": 0.653289 |
| }, |
| { |
| "epoch": 3.749189889825016, |
| "grad_norm": 9.875, |
| "learning_rate": 7.381224941154535e-05, |
| "loss": 0.03359375, |
| "memory(GiB)": 43.05, |
| "step": 5785, |
| "train_speed(iter/s)": 0.653497 |
| }, |
| { |
| "epoch": 3.7524303305249513, |
| "grad_norm": 3.5625, |
| "learning_rate": 7.376512082549702e-05, |
| "loss": 0.0384765625, |
| "memory(GiB)": 43.05, |
| "step": 5790, |
| "train_speed(iter/s)": 0.653597 |
| }, |
| { |
| "epoch": 3.7556707712248865, |
| "grad_norm": 0.7421875, |
| "learning_rate": 7.371796494776659e-05, |
| "loss": 0.028125, |
| "memory(GiB)": 43.05, |
| "step": 5795, |
| "train_speed(iter/s)": 0.653667 |
| }, |
| { |
| "epoch": 3.758911211924822, |
| "grad_norm": 1.8828125, |
| "learning_rate": 7.367078183250746e-05, |
| "loss": 0.04453125, |
| "memory(GiB)": 43.05, |
| "step": 5800, |
| "train_speed(iter/s)": 0.653779 |
| }, |
| { |
| "epoch": 3.762151652624757, |
| "grad_norm": 4.03125, |
| "learning_rate": 7.362357153390436e-05, |
| "loss": 0.0296875, |
| "memory(GiB)": 43.05, |
| "step": 5805, |
| "train_speed(iter/s)": 0.653809 |
| }, |
| { |
| "epoch": 3.7653920933246923, |
| "grad_norm": 5.84375, |
| "learning_rate": 7.357633410617324e-05, |
| "loss": 0.0265625, |
| "memory(GiB)": 43.05, |
| "step": 5810, |
| "train_speed(iter/s)": 0.65388 |
| }, |
| { |
| "epoch": 3.7686325340246274, |
| "grad_norm": 1.7734375, |
| "learning_rate": 7.352906960356122e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 5815, |
| "train_speed(iter/s)": 0.653861 |
| }, |
| { |
| "epoch": 3.7718729747245625, |
| "grad_norm": 0.5859375, |
| "learning_rate": 7.348177808034646e-05, |
| "loss": 0.038671875, |
| "memory(GiB)": 43.05, |
| "step": 5820, |
| "train_speed(iter/s)": 0.654005 |
| }, |
| { |
| "epoch": 3.7751134154244976, |
| "grad_norm": 3.96875, |
| "learning_rate": 7.34344595908382e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 5825, |
| "train_speed(iter/s)": 0.654134 |
| }, |
| { |
| "epoch": 3.7783538561244328, |
| "grad_norm": 2.03125, |
| "learning_rate": 7.338711418937663e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 5830, |
| "train_speed(iter/s)": 0.654145 |
| }, |
| { |
| "epoch": 3.781594296824368, |
| "grad_norm": 11.3125, |
| "learning_rate": 7.333974193033281e-05, |
| "loss": 0.05390625, |
| "memory(GiB)": 43.05, |
| "step": 5835, |
| "train_speed(iter/s)": 0.654288 |
| }, |
| { |
| "epoch": 3.7848347375243034, |
| "grad_norm": 5.15625, |
| "learning_rate": 7.329234286810876e-05, |
| "loss": 0.0458984375, |
| "memory(GiB)": 43.05, |
| "step": 5840, |
| "train_speed(iter/s)": 0.654426 |
| }, |
| { |
| "epoch": 3.7880751782242386, |
| "grad_norm": 13.125, |
| "learning_rate": 7.324491705713712e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 5845, |
| "train_speed(iter/s)": 0.654567 |
| }, |
| { |
| "epoch": 3.7913156189241737, |
| "grad_norm": 10.6875, |
| "learning_rate": 7.319746455188135e-05, |
| "loss": 0.025, |
| "memory(GiB)": 43.05, |
| "step": 5850, |
| "train_speed(iter/s)": 0.654819 |
| }, |
| { |
| "epoch": 3.794556059624109, |
| "grad_norm": 16.5, |
| "learning_rate": 7.314998540683556e-05, |
| "loss": 0.041796875, |
| "memory(GiB)": 43.05, |
| "step": 5855, |
| "train_speed(iter/s)": 0.654923 |
| }, |
| { |
| "epoch": 3.797796500324044, |
| "grad_norm": 11.5, |
| "learning_rate": 7.310247967652442e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 5860, |
| "train_speed(iter/s)": 0.654971 |
| }, |
| { |
| "epoch": 3.8010369410239795, |
| "grad_norm": 11.0, |
| "learning_rate": 7.305494741550313e-05, |
| "loss": 0.0619140625, |
| "memory(GiB)": 43.05, |
| "step": 5865, |
| "train_speed(iter/s)": 0.654998 |
| }, |
| { |
| "epoch": 3.8042773817239146, |
| "grad_norm": 0.60546875, |
| "learning_rate": 7.30073886783574e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 5870, |
| "train_speed(iter/s)": 0.655134 |
| }, |
| { |
| "epoch": 3.8075178224238497, |
| "grad_norm": 12.1875, |
| "learning_rate": 7.29598035197033e-05, |
| "loss": 0.02578125, |
| "memory(GiB)": 43.05, |
| "step": 5875, |
| "train_speed(iter/s)": 0.654989 |
| }, |
| { |
| "epoch": 3.810758263123785, |
| "grad_norm": 5.6875, |
| "learning_rate": 7.29121919941873e-05, |
| "loss": 0.024609375, |
| "memory(GiB)": 43.05, |
| "step": 5880, |
| "train_speed(iter/s)": 0.655133 |
| }, |
| { |
| "epoch": 3.81399870382372, |
| "grad_norm": 0.78125, |
| "learning_rate": 7.286455415648607e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 5885, |
| "train_speed(iter/s)": 0.655036 |
| }, |
| { |
| "epoch": 3.817239144523655, |
| "grad_norm": 1.9765625, |
| "learning_rate": 7.281689006130653e-05, |
| "loss": 0.0310546875, |
| "memory(GiB)": 43.05, |
| "step": 5890, |
| "train_speed(iter/s)": 0.655153 |
| }, |
| { |
| "epoch": 3.82047958522359, |
| "grad_norm": 2.4375, |
| "learning_rate": 7.276919976338579e-05, |
| "loss": 0.0392578125, |
| "memory(GiB)": 43.05, |
| "step": 5895, |
| "train_speed(iter/s)": 0.655211 |
| }, |
| { |
| "epoch": 3.8237200259235253, |
| "grad_norm": 11.625, |
| "learning_rate": 7.2721483317491e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 5900, |
| "train_speed(iter/s)": 0.655411 |
| }, |
| { |
| "epoch": 3.826960466623461, |
| "grad_norm": 2.484375, |
| "learning_rate": 7.267374077841937e-05, |
| "loss": 0.0521484375, |
| "memory(GiB)": 43.05, |
| "step": 5905, |
| "train_speed(iter/s)": 0.65545 |
| }, |
| { |
| "epoch": 3.830200907323396, |
| "grad_norm": 15.25, |
| "learning_rate": 7.262597220099807e-05, |
| "loss": 0.03828125, |
| "memory(GiB)": 43.05, |
| "step": 5910, |
| "train_speed(iter/s)": 0.655409 |
| }, |
| { |
| "epoch": 3.833441348023331, |
| "grad_norm": 12.875, |
| "learning_rate": 7.257817764008417e-05, |
| "loss": 0.0267578125, |
| "memory(GiB)": 43.05, |
| "step": 5915, |
| "train_speed(iter/s)": 0.655582 |
| }, |
| { |
| "epoch": 3.8366817887232663, |
| "grad_norm": 11.4375, |
| "learning_rate": 7.253035715056456e-05, |
| "loss": 0.02421875, |
| "memory(GiB)": 43.05, |
| "step": 5920, |
| "train_speed(iter/s)": 0.655653 |
| }, |
| { |
| "epoch": 3.839922229423202, |
| "grad_norm": 1.6953125, |
| "learning_rate": 7.248251078735592e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 5925, |
| "train_speed(iter/s)": 0.655897 |
| }, |
| { |
| "epoch": 3.843162670123137, |
| "grad_norm": 15.125, |
| "learning_rate": 7.243463860540467e-05, |
| "loss": 0.0408203125, |
| "memory(GiB)": 43.05, |
| "step": 5930, |
| "train_speed(iter/s)": 0.655988 |
| }, |
| { |
| "epoch": 3.846403110823072, |
| "grad_norm": 17.0, |
| "learning_rate": 7.238674065968683e-05, |
| "loss": 0.021875, |
| "memory(GiB)": 43.05, |
| "step": 5935, |
| "train_speed(iter/s)": 0.656155 |
| }, |
| { |
| "epoch": 3.849643551523007, |
| "grad_norm": 13.625, |
| "learning_rate": 7.233881700520805e-05, |
| "loss": 0.0185546875, |
| "memory(GiB)": 43.05, |
| "step": 5940, |
| "train_speed(iter/s)": 0.656297 |
| }, |
| { |
| "epoch": 3.8528839922229423, |
| "grad_norm": 13.8125, |
| "learning_rate": 7.229086769700348e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 5945, |
| "train_speed(iter/s)": 0.656436 |
| }, |
| { |
| "epoch": 3.8561244329228774, |
| "grad_norm": 10.0, |
| "learning_rate": 7.224289279013773e-05, |
| "loss": 0.018359375, |
| "memory(GiB)": 43.05, |
| "step": 5950, |
| "train_speed(iter/s)": 0.656551 |
| }, |
| { |
| "epoch": 3.8593648736228126, |
| "grad_norm": 0.61328125, |
| "learning_rate": 7.219489233970485e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 5955, |
| "train_speed(iter/s)": 0.656645 |
| }, |
| { |
| "epoch": 3.8626053143227477, |
| "grad_norm": 3.53125, |
| "learning_rate": 7.214686640082815e-05, |
| "loss": 0.0296875, |
| "memory(GiB)": 43.05, |
| "step": 5960, |
| "train_speed(iter/s)": 0.656492 |
| }, |
| { |
| "epoch": 3.8658457550226832, |
| "grad_norm": 10.5, |
| "learning_rate": 7.209881502866024e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 5965, |
| "train_speed(iter/s)": 0.65655 |
| }, |
| { |
| "epoch": 3.8690861957226184, |
| "grad_norm": 11.375, |
| "learning_rate": 7.205073827838298e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 5970, |
| "train_speed(iter/s)": 0.656575 |
| }, |
| { |
| "epoch": 3.8723266364225535, |
| "grad_norm": 12.4375, |
| "learning_rate": 7.200263620520732e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 5975, |
| "train_speed(iter/s)": 0.656699 |
| }, |
| { |
| "epoch": 3.8755670771224886, |
| "grad_norm": 14.875, |
| "learning_rate": 7.195450886437334e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 5980, |
| "train_speed(iter/s)": 0.65679 |
| }, |
| { |
| "epoch": 3.8788075178224237, |
| "grad_norm": 10.5625, |
| "learning_rate": 7.190635631115007e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 5985, |
| "train_speed(iter/s)": 0.656961 |
| }, |
| { |
| "epoch": 3.8820479585223593, |
| "grad_norm": 4.21875, |
| "learning_rate": 7.185817860083555e-05, |
| "loss": 0.0390625, |
| "memory(GiB)": 43.05, |
| "step": 5990, |
| "train_speed(iter/s)": 0.657 |
| }, |
| { |
| "epoch": 3.8852883992222944, |
| "grad_norm": 4.90625, |
| "learning_rate": 7.18099757887567e-05, |
| "loss": 0.0306640625, |
| "memory(GiB)": 43.05, |
| "step": 5995, |
| "train_speed(iter/s)": 0.657169 |
| }, |
| { |
| "epoch": 3.8885288399222295, |
| "grad_norm": 10.375, |
| "learning_rate": 7.176174793026924e-05, |
| "loss": 0.0435546875, |
| "memory(GiB)": 43.05, |
| "step": 6000, |
| "train_speed(iter/s)": 0.65731 |
| }, |
| { |
| "epoch": 3.8917692806221647, |
| "grad_norm": 8.6875, |
| "learning_rate": 7.171349508075768e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 6005, |
| "train_speed(iter/s)": 0.61235 |
| }, |
| { |
| "epoch": 3.8950097213220998, |
| "grad_norm": 15.0, |
| "learning_rate": 7.166521729563523e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 6010, |
| "train_speed(iter/s)": 0.612313 |
| }, |
| { |
| "epoch": 3.898250162022035, |
| "grad_norm": 9.375, |
| "learning_rate": 7.161691463034374e-05, |
| "loss": 0.03515625, |
| "memory(GiB)": 43.05, |
| "step": 6015, |
| "train_speed(iter/s)": 0.612442 |
| }, |
| { |
| "epoch": 3.90149060272197, |
| "grad_norm": 2.015625, |
| "learning_rate": 7.156858714035356e-05, |
| "loss": 0.0435546875, |
| "memory(GiB)": 43.05, |
| "step": 6020, |
| "train_speed(iter/s)": 0.612558 |
| }, |
| { |
| "epoch": 3.904731043421905, |
| "grad_norm": 2.421875, |
| "learning_rate": 7.152023488116368e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 6025, |
| "train_speed(iter/s)": 0.61267 |
| }, |
| { |
| "epoch": 3.9079714841218407, |
| "grad_norm": 4.0625, |
| "learning_rate": 7.147185790830144e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 6030, |
| "train_speed(iter/s)": 0.612772 |
| }, |
| { |
| "epoch": 3.911211924821776, |
| "grad_norm": 1.7109375, |
| "learning_rate": 7.142345627732255e-05, |
| "loss": 0.0447265625, |
| "memory(GiB)": 43.05, |
| "step": 6035, |
| "train_speed(iter/s)": 0.612813 |
| }, |
| { |
| "epoch": 3.914452365521711, |
| "grad_norm": 14.75, |
| "learning_rate": 7.137503004381111e-05, |
| "loss": 0.0228515625, |
| "memory(GiB)": 43.05, |
| "step": 6040, |
| "train_speed(iter/s)": 0.612797 |
| }, |
| { |
| "epoch": 3.917692806221646, |
| "grad_norm": 13.0625, |
| "learning_rate": 7.132657926337942e-05, |
| "loss": 0.0373046875, |
| "memory(GiB)": 43.05, |
| "step": 6045, |
| "train_speed(iter/s)": 0.612879 |
| }, |
| { |
| "epoch": 3.920933246921581, |
| "grad_norm": 11.375, |
| "learning_rate": 7.127810399166798e-05, |
| "loss": 0.04609375, |
| "memory(GiB)": 43.05, |
| "step": 6050, |
| "train_speed(iter/s)": 0.613019 |
| }, |
| { |
| "epoch": 3.9241736876215167, |
| "grad_norm": 3.046875, |
| "learning_rate": 7.122960428434544e-05, |
| "loss": 0.0408203125, |
| "memory(GiB)": 43.05, |
| "step": 6055, |
| "train_speed(iter/s)": 0.610811 |
| }, |
| { |
| "epoch": 3.927414128321452, |
| "grad_norm": 13.6875, |
| "learning_rate": 7.118108019710847e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 6060, |
| "train_speed(iter/s)": 0.610559 |
| }, |
| { |
| "epoch": 3.930654569021387, |
| "grad_norm": 8.3125, |
| "learning_rate": 7.113253178568176e-05, |
| "loss": 0.0484375, |
| "memory(GiB)": 43.05, |
| "step": 6065, |
| "train_speed(iter/s)": 0.6108 |
| }, |
| { |
| "epoch": 3.933895009721322, |
| "grad_norm": 13.875, |
| "learning_rate": 7.108395910581793e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 6070, |
| "train_speed(iter/s)": 0.610517 |
| }, |
| { |
| "epoch": 3.9371354504212572, |
| "grad_norm": 4.96875, |
| "learning_rate": 7.10353622132975e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 6075, |
| "train_speed(iter/s)": 0.610727 |
| }, |
| { |
| "epoch": 3.9403758911211924, |
| "grad_norm": 1.4609375, |
| "learning_rate": 7.098674116392873e-05, |
| "loss": 0.0255859375, |
| "memory(GiB)": 43.05, |
| "step": 6080, |
| "train_speed(iter/s)": 0.610834 |
| }, |
| { |
| "epoch": 3.9436163318211275, |
| "grad_norm": 2.078125, |
| "learning_rate": 7.093809601354769e-05, |
| "loss": 0.02734375, |
| "memory(GiB)": 43.05, |
| "step": 6085, |
| "train_speed(iter/s)": 0.610838 |
| }, |
| { |
| "epoch": 3.9468567725210626, |
| "grad_norm": 0.63671875, |
| "learning_rate": 7.08894268180181e-05, |
| "loss": 0.048828125, |
| "memory(GiB)": 43.05, |
| "step": 6090, |
| "train_speed(iter/s)": 0.610448 |
| }, |
| { |
| "epoch": 3.950097213220998, |
| "grad_norm": 10.5625, |
| "learning_rate": 7.084073363323124e-05, |
| "loss": 0.0283203125, |
| "memory(GiB)": 43.05, |
| "step": 6095, |
| "train_speed(iter/s)": 0.610269 |
| }, |
| { |
| "epoch": 3.9533376539209333, |
| "grad_norm": 11.0625, |
| "learning_rate": 7.079201651510602e-05, |
| "loss": 0.037890625, |
| "memory(GiB)": 43.05, |
| "step": 6100, |
| "train_speed(iter/s)": 0.610368 |
| }, |
| { |
| "epoch": 3.9565780946208684, |
| "grad_norm": 12.0625, |
| "learning_rate": 7.074327551958883e-05, |
| "loss": 0.04453125, |
| "memory(GiB)": 43.05, |
| "step": 6105, |
| "train_speed(iter/s)": 0.610338 |
| }, |
| { |
| "epoch": 3.9598185353208035, |
| "grad_norm": 6.9375, |
| "learning_rate": 7.069451070265342e-05, |
| "loss": 0.02578125, |
| "memory(GiB)": 43.05, |
| "step": 6110, |
| "train_speed(iter/s)": 0.610249 |
| }, |
| { |
| "epoch": 3.963058976020739, |
| "grad_norm": 1.390625, |
| "learning_rate": 7.064572212030097e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 6115, |
| "train_speed(iter/s)": 0.610338 |
| }, |
| { |
| "epoch": 3.966299416720674, |
| "grad_norm": 12.625, |
| "learning_rate": 7.059690982855988e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 6120, |
| "train_speed(iter/s)": 0.610286 |
| }, |
| { |
| "epoch": 3.9695398574206093, |
| "grad_norm": 4.4375, |
| "learning_rate": 7.054807388348579e-05, |
| "loss": 0.019140625, |
| "memory(GiB)": 43.05, |
| "step": 6125, |
| "train_speed(iter/s)": 0.610403 |
| }, |
| { |
| "epoch": 3.9727802981205445, |
| "grad_norm": 5.65625, |
| "learning_rate": 7.049921434116158e-05, |
| "loss": 0.0181640625, |
| "memory(GiB)": 43.05, |
| "step": 6130, |
| "train_speed(iter/s)": 0.610555 |
| }, |
| { |
| "epoch": 3.9760207388204796, |
| "grad_norm": 2.734375, |
| "learning_rate": 7.045033125769713e-05, |
| "loss": 0.0212890625, |
| "memory(GiB)": 43.05, |
| "step": 6135, |
| "train_speed(iter/s)": 0.610652 |
| }, |
| { |
| "epoch": 3.9792611795204147, |
| "grad_norm": 2.28125, |
| "learning_rate": 7.04014246892294e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 6140, |
| "train_speed(iter/s)": 0.610823 |
| }, |
| { |
| "epoch": 3.98250162022035, |
| "grad_norm": 2.390625, |
| "learning_rate": 7.035249469192236e-05, |
| "loss": 0.0384765625, |
| "memory(GiB)": 43.05, |
| "step": 6145, |
| "train_speed(iter/s)": 0.610933 |
| }, |
| { |
| "epoch": 3.985742060920285, |
| "grad_norm": 11.375, |
| "learning_rate": 7.030354132196678e-05, |
| "loss": 0.021484375, |
| "memory(GiB)": 43.05, |
| "step": 6150, |
| "train_speed(iter/s)": 0.611014 |
| }, |
| { |
| "epoch": 3.9889825016202205, |
| "grad_norm": 0.76171875, |
| "learning_rate": 7.025456463558039e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 6155, |
| "train_speed(iter/s)": 0.610982 |
| }, |
| { |
| "epoch": 3.9922229423201556, |
| "grad_norm": 2.390625, |
| "learning_rate": 7.020556468900761e-05, |
| "loss": 0.0421875, |
| "memory(GiB)": 43.05, |
| "step": 6160, |
| "train_speed(iter/s)": 0.610946 |
| }, |
| { |
| "epoch": 3.9954633830200907, |
| "grad_norm": 1.1015625, |
| "learning_rate": 7.01565415385196e-05, |
| "loss": 0.021875, |
| "memory(GiB)": 43.05, |
| "step": 6165, |
| "train_speed(iter/s)": 0.611032 |
| }, |
| { |
| "epoch": 3.998703823720026, |
| "grad_norm": 6.84375, |
| "learning_rate": 7.010749524041417e-05, |
| "loss": 0.0248046875, |
| "memory(GiB)": 43.05, |
| "step": 6170, |
| "train_speed(iter/s)": 0.611242 |
| }, |
| { |
| "epoch": 4.001944264419961, |
| "grad_norm": 6.84375, |
| "learning_rate": 7.005842585101575e-05, |
| "loss": 0.0287109375, |
| "memory(GiB)": 43.05, |
| "step": 6175, |
| "train_speed(iter/s)": 0.611346 |
| }, |
| { |
| "epoch": 4.0051847051198965, |
| "grad_norm": 13.5625, |
| "learning_rate": 7.00093334266752e-05, |
| "loss": 0.04453125, |
| "memory(GiB)": 43.05, |
| "step": 6180, |
| "train_speed(iter/s)": 0.61137 |
| }, |
| { |
| "epoch": 4.008425145819832, |
| "grad_norm": 2.265625, |
| "learning_rate": 6.996021802376991e-05, |
| "loss": 0.0287109375, |
| "memory(GiB)": 43.05, |
| "step": 6185, |
| "train_speed(iter/s)": 0.611606 |
| }, |
| { |
| "epoch": 4.011665586519767, |
| "grad_norm": 0.98046875, |
| "learning_rate": 6.991107969870363e-05, |
| "loss": 0.034375, |
| "memory(GiB)": 43.05, |
| "step": 6190, |
| "train_speed(iter/s)": 0.611784 |
| }, |
| { |
| "epoch": 4.014906027219702, |
| "grad_norm": 14.3125, |
| "learning_rate": 6.986191850790641e-05, |
| "loss": 0.038671875, |
| "memory(GiB)": 43.05, |
| "step": 6195, |
| "train_speed(iter/s)": 0.611802 |
| }, |
| { |
| "epoch": 4.018146467919637, |
| "grad_norm": 1.4375, |
| "learning_rate": 6.981273450783462e-05, |
| "loss": 0.0341796875, |
| "memory(GiB)": 43.05, |
| "step": 6200, |
| "train_speed(iter/s)": 0.611746 |
| }, |
| { |
| "epoch": 4.021386908619572, |
| "grad_norm": 0.99609375, |
| "learning_rate": 6.976352775497075e-05, |
| "loss": 0.03515625, |
| "memory(GiB)": 43.05, |
| "step": 6205, |
| "train_speed(iter/s)": 0.611922 |
| }, |
| { |
| "epoch": 4.024627349319507, |
| "grad_norm": 14.4375, |
| "learning_rate": 6.971429830582347e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 6210, |
| "train_speed(iter/s)": 0.611948 |
| }, |
| { |
| "epoch": 4.027867790019442, |
| "grad_norm": 3.328125, |
| "learning_rate": 6.966504621692753e-05, |
| "loss": 0.0228515625, |
| "memory(GiB)": 43.05, |
| "step": 6215, |
| "train_speed(iter/s)": 0.612064 |
| }, |
| { |
| "epoch": 4.0311082307193775, |
| "grad_norm": 2.78125, |
| "learning_rate": 6.961577154484363e-05, |
| "loss": 0.0474609375, |
| "memory(GiB)": 43.05, |
| "step": 6220, |
| "train_speed(iter/s)": 0.61229 |
| }, |
| { |
| "epoch": 4.034348671419313, |
| "grad_norm": 10.75, |
| "learning_rate": 6.956647434615841e-05, |
| "loss": 0.0251953125, |
| "memory(GiB)": 43.05, |
| "step": 6225, |
| "train_speed(iter/s)": 0.612516 |
| }, |
| { |
| "epoch": 4.037589112119249, |
| "grad_norm": 3.078125, |
| "learning_rate": 6.951715467748442e-05, |
| "loss": 0.023828125, |
| "memory(GiB)": 43.05, |
| "step": 6230, |
| "train_speed(iter/s)": 0.612694 |
| }, |
| { |
| "epoch": 4.040829552819184, |
| "grad_norm": 14.5, |
| "learning_rate": 6.946781259545996e-05, |
| "loss": 0.025, |
| "memory(GiB)": 43.05, |
| "step": 6235, |
| "train_speed(iter/s)": 0.612841 |
| }, |
| { |
| "epoch": 4.044069993519119, |
| "grad_norm": 2.671875, |
| "learning_rate": 6.941844815674912e-05, |
| "loss": 0.0462890625, |
| "memory(GiB)": 43.05, |
| "step": 6240, |
| "train_speed(iter/s)": 0.612874 |
| }, |
| { |
| "epoch": 4.047310434219054, |
| "grad_norm": 3.640625, |
| "learning_rate": 6.936906141804164e-05, |
| "loss": 0.03515625, |
| "memory(GiB)": 43.05, |
| "step": 6245, |
| "train_speed(iter/s)": 0.612924 |
| }, |
| { |
| "epoch": 4.050550874918989, |
| "grad_norm": 9.9375, |
| "learning_rate": 6.931965243605286e-05, |
| "loss": 0.01796875, |
| "memory(GiB)": 43.05, |
| "step": 6250, |
| "train_speed(iter/s)": 0.613039 |
| }, |
| { |
| "epoch": 4.053791315618924, |
| "grad_norm": 13.125, |
| "learning_rate": 6.927022126752368e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 6255, |
| "train_speed(iter/s)": 0.613154 |
| }, |
| { |
| "epoch": 4.057031756318859, |
| "grad_norm": 8.5625, |
| "learning_rate": 6.922076796922049e-05, |
| "loss": 0.0171875, |
| "memory(GiB)": 43.05, |
| "step": 6260, |
| "train_speed(iter/s)": 0.613357 |
| }, |
| { |
| "epoch": 4.0602721970187945, |
| "grad_norm": 9.4375, |
| "learning_rate": 6.917129259793506e-05, |
| "loss": 0.02265625, |
| "memory(GiB)": 43.05, |
| "step": 6265, |
| "train_speed(iter/s)": 0.613563 |
| }, |
| { |
| "epoch": 4.06351263771873, |
| "grad_norm": 14.0, |
| "learning_rate": 6.912179521048452e-05, |
| "loss": 0.0455078125, |
| "memory(GiB)": 43.05, |
| "step": 6270, |
| "train_speed(iter/s)": 0.613557 |
| }, |
| { |
| "epoch": 4.066753078418665, |
| "grad_norm": 13.375, |
| "learning_rate": 6.90722758637113e-05, |
| "loss": 0.022265625, |
| "memory(GiB)": 43.05, |
| "step": 6275, |
| "train_speed(iter/s)": 0.613633 |
| }, |
| { |
| "epoch": 4.0699935191186, |
| "grad_norm": 3.484375, |
| "learning_rate": 6.902273461448305e-05, |
| "loss": 0.0474609375, |
| "memory(GiB)": 43.05, |
| "step": 6280, |
| "train_speed(iter/s)": 0.613476 |
| }, |
| { |
| "epoch": 4.073233959818535, |
| "grad_norm": 8.625, |
| "learning_rate": 6.897317151969254e-05, |
| "loss": 0.0416015625, |
| "memory(GiB)": 43.05, |
| "step": 6285, |
| "train_speed(iter/s)": 0.61347 |
| }, |
| { |
| "epoch": 4.07647440051847, |
| "grad_norm": 1.171875, |
| "learning_rate": 6.892358663625766e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 6290, |
| "train_speed(iter/s)": 0.613416 |
| }, |
| { |
| "epoch": 4.079714841218406, |
| "grad_norm": 16.375, |
| "learning_rate": 6.887398002112129e-05, |
| "loss": 0.036328125, |
| "memory(GiB)": 43.05, |
| "step": 6295, |
| "train_speed(iter/s)": 0.613491 |
| }, |
| { |
| "epoch": 4.082955281918341, |
| "grad_norm": 9.1875, |
| "learning_rate": 6.88243517312513e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 6300, |
| "train_speed(iter/s)": 0.613596 |
| }, |
| { |
| "epoch": 4.086195722618276, |
| "grad_norm": 10.0625, |
| "learning_rate": 6.877470182364042e-05, |
| "loss": 0.0255859375, |
| "memory(GiB)": 43.05, |
| "step": 6305, |
| "train_speed(iter/s)": 0.613591 |
| }, |
| { |
| "epoch": 4.0894361633182115, |
| "grad_norm": 9.25, |
| "learning_rate": 6.872503035530626e-05, |
| "loss": 0.0349609375, |
| "memory(GiB)": 43.05, |
| "step": 6310, |
| "train_speed(iter/s)": 0.613738 |
| }, |
| { |
| "epoch": 4.092676604018147, |
| "grad_norm": 1.9921875, |
| "learning_rate": 6.867533738329113e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 6315, |
| "train_speed(iter/s)": 0.613884 |
| }, |
| { |
| "epoch": 4.095917044718082, |
| "grad_norm": 14.75, |
| "learning_rate": 6.862562296466208e-05, |
| "loss": 0.03203125, |
| "memory(GiB)": 43.05, |
| "step": 6320, |
| "train_speed(iter/s)": 0.613993 |
| }, |
| { |
| "epoch": 4.099157485418017, |
| "grad_norm": 9.875, |
| "learning_rate": 6.857588715651072e-05, |
| "loss": 0.033203125, |
| "memory(GiB)": 43.05, |
| "step": 6325, |
| "train_speed(iter/s)": 0.614134 |
| }, |
| { |
| "epoch": 4.102397926117952, |
| "grad_norm": 0.51953125, |
| "learning_rate": 6.852613001595329e-05, |
| "loss": 0.0244140625, |
| "memory(GiB)": 43.05, |
| "step": 6330, |
| "train_speed(iter/s)": 0.614223 |
| }, |
| { |
| "epoch": 4.105638366817887, |
| "grad_norm": 2.359375, |
| "learning_rate": 6.847635160013051e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 6335, |
| "train_speed(iter/s)": 0.614396 |
| }, |
| { |
| "epoch": 4.108878807517822, |
| "grad_norm": 4.59375, |
| "learning_rate": 6.842655196620753e-05, |
| "loss": 0.0181640625, |
| "memory(GiB)": 43.05, |
| "step": 6340, |
| "train_speed(iter/s)": 0.614538 |
| }, |
| { |
| "epoch": 4.112119248217757, |
| "grad_norm": 11.3125, |
| "learning_rate": 6.837673117137388e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 6345, |
| "train_speed(iter/s)": 0.614612 |
| }, |
| { |
| "epoch": 4.115359688917692, |
| "grad_norm": 10.25, |
| "learning_rate": 6.832688927284336e-05, |
| "loss": 0.021484375, |
| "memory(GiB)": 43.05, |
| "step": 6350, |
| "train_speed(iter/s)": 0.6147 |
| }, |
| { |
| "epoch": 4.118600129617628, |
| "grad_norm": 1.0703125, |
| "learning_rate": 6.827702632785402e-05, |
| "loss": 0.0279296875, |
| "memory(GiB)": 43.05, |
| "step": 6355, |
| "train_speed(iter/s)": 0.614879 |
| }, |
| { |
| "epoch": 4.121840570317564, |
| "grad_norm": 7.96875, |
| "learning_rate": 6.822714239366811e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 6360, |
| "train_speed(iter/s)": 0.615015 |
| }, |
| { |
| "epoch": 4.125081011017499, |
| "grad_norm": 14.0, |
| "learning_rate": 6.817723752757195e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 6365, |
| "train_speed(iter/s)": 0.615194 |
| }, |
| { |
| "epoch": 4.128321451717434, |
| "grad_norm": 13.4375, |
| "learning_rate": 6.812731178687587e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 6370, |
| "train_speed(iter/s)": 0.61525 |
| }, |
| { |
| "epoch": 4.131561892417369, |
| "grad_norm": 10.9375, |
| "learning_rate": 6.807736522891424e-05, |
| "loss": 0.048828125, |
| "memory(GiB)": 43.05, |
| "step": 6375, |
| "train_speed(iter/s)": 0.61541 |
| }, |
| { |
| "epoch": 4.134802333117304, |
| "grad_norm": 8.5, |
| "learning_rate": 6.802739791104529e-05, |
| "loss": 0.037890625, |
| "memory(GiB)": 43.05, |
| "step": 6380, |
| "train_speed(iter/s)": 0.615546 |
| }, |
| { |
| "epoch": 4.138042773817239, |
| "grad_norm": 14.8125, |
| "learning_rate": 6.79774098906511e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 6385, |
| "train_speed(iter/s)": 0.615397 |
| }, |
| { |
| "epoch": 4.141283214517174, |
| "grad_norm": 0.88671875, |
| "learning_rate": 6.792740122513755e-05, |
| "loss": 0.035546875, |
| "memory(GiB)": 43.05, |
| "step": 6390, |
| "train_speed(iter/s)": 0.615513 |
| }, |
| { |
| "epoch": 4.144523655217109, |
| "grad_norm": 0.56640625, |
| "learning_rate": 6.78773719719342e-05, |
| "loss": 0.0458984375, |
| "memory(GiB)": 43.05, |
| "step": 6395, |
| "train_speed(iter/s)": 0.615534 |
| }, |
| { |
| "epoch": 4.1477640959170445, |
| "grad_norm": 10.5, |
| "learning_rate": 6.782732218849424e-05, |
| "loss": 0.0482421875, |
| "memory(GiB)": 43.05, |
| "step": 6400, |
| "train_speed(iter/s)": 0.615526 |
| }, |
| { |
| "epoch": 4.15100453661698, |
| "grad_norm": 14.0625, |
| "learning_rate": 6.777725193229448e-05, |
| "loss": 0.02275390625, |
| "memory(GiB)": 43.05, |
| "step": 6405, |
| "train_speed(iter/s)": 0.615642 |
| }, |
| { |
| "epoch": 4.154244977316915, |
| "grad_norm": 13.0625, |
| "learning_rate": 6.772716126083521e-05, |
| "loss": 0.0296875, |
| "memory(GiB)": 43.05, |
| "step": 6410, |
| "train_speed(iter/s)": 0.615752 |
| }, |
| { |
| "epoch": 4.15748541801685, |
| "grad_norm": 13.0625, |
| "learning_rate": 6.767705023164016e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 6415, |
| "train_speed(iter/s)": 0.615863 |
| }, |
| { |
| "epoch": 4.160725858716786, |
| "grad_norm": 0.625, |
| "learning_rate": 6.762691890225647e-05, |
| "loss": 0.029296875, |
| "memory(GiB)": 43.05, |
| "step": 6420, |
| "train_speed(iter/s)": 0.616061 |
| }, |
| { |
| "epoch": 4.163966299416721, |
| "grad_norm": 1.7265625, |
| "learning_rate": 6.757676733025456e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 6425, |
| "train_speed(iter/s)": 0.616173 |
| }, |
| { |
| "epoch": 4.167206740116656, |
| "grad_norm": 6.375, |
| "learning_rate": 6.752659557322812e-05, |
| "loss": 0.035546875, |
| "memory(GiB)": 43.05, |
| "step": 6430, |
| "train_speed(iter/s)": 0.616291 |
| }, |
| { |
| "epoch": 4.170447180816591, |
| "grad_norm": 4.1875, |
| "learning_rate": 6.747640368879401e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 6435, |
| "train_speed(iter/s)": 0.616452 |
| }, |
| { |
| "epoch": 4.173687621516526, |
| "grad_norm": 2.171875, |
| "learning_rate": 6.742619173459218e-05, |
| "loss": 0.0275390625, |
| "memory(GiB)": 43.05, |
| "step": 6440, |
| "train_speed(iter/s)": 0.61667 |
| }, |
| { |
| "epoch": 4.1769280622164615, |
| "grad_norm": 0.87890625, |
| "learning_rate": 6.737595976828568e-05, |
| "loss": 0.0306640625, |
| "memory(GiB)": 43.05, |
| "step": 6445, |
| "train_speed(iter/s)": 0.616679 |
| }, |
| { |
| "epoch": 4.180168502916397, |
| "grad_norm": 16.25, |
| "learning_rate": 6.732570784756051e-05, |
| "loss": 0.0279296875, |
| "memory(GiB)": 43.05, |
| "step": 6450, |
| "train_speed(iter/s)": 0.616704 |
| }, |
| { |
| "epoch": 4.183408943616332, |
| "grad_norm": 1.1171875, |
| "learning_rate": 6.727543603012559e-05, |
| "loss": 0.0328125, |
| "memory(GiB)": 43.05, |
| "step": 6455, |
| "train_speed(iter/s)": 0.616801 |
| }, |
| { |
| "epoch": 4.186649384316267, |
| "grad_norm": 10.5, |
| "learning_rate": 6.722514437371267e-05, |
| "loss": 0.017578125, |
| "memory(GiB)": 43.05, |
| "step": 6460, |
| "train_speed(iter/s)": 0.616851 |
| }, |
| { |
| "epoch": 4.189889825016202, |
| "grad_norm": 3.765625, |
| "learning_rate": 6.717483293607633e-05, |
| "loss": 0.032421875, |
| "memory(GiB)": 43.05, |
| "step": 6465, |
| "train_speed(iter/s)": 0.616955 |
| }, |
| { |
| "epoch": 4.193130265716137, |
| "grad_norm": 12.0, |
| "learning_rate": 6.71245017749938e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 6470, |
| "train_speed(iter/s)": 0.616829 |
| }, |
| { |
| "epoch": 4.196370706416072, |
| "grad_norm": 4.71875, |
| "learning_rate": 6.707415094826505e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 6475, |
| "train_speed(iter/s)": 0.61682 |
| }, |
| { |
| "epoch": 4.199611147116007, |
| "grad_norm": 14.5, |
| "learning_rate": 6.702378051371254e-05, |
| "loss": 0.0185546875, |
| "memory(GiB)": 43.05, |
| "step": 6480, |
| "train_speed(iter/s)": 0.616836 |
| }, |
| { |
| "epoch": 4.202851587815943, |
| "grad_norm": 2.828125, |
| "learning_rate": 6.697339052918131e-05, |
| "loss": 0.033203125, |
| "memory(GiB)": 43.05, |
| "step": 6485, |
| "train_speed(iter/s)": 0.61684 |
| }, |
| { |
| "epoch": 4.2060920285158785, |
| "grad_norm": 2.3125, |
| "learning_rate": 6.692298105253883e-05, |
| "loss": 0.0447265625, |
| "memory(GiB)": 43.05, |
| "step": 6490, |
| "train_speed(iter/s)": 0.616881 |
| }, |
| { |
| "epoch": 4.209332469215814, |
| "grad_norm": 13.75, |
| "learning_rate": 6.687255214167496e-05, |
| "loss": 0.0390625, |
| "memory(GiB)": 43.05, |
| "step": 6495, |
| "train_speed(iter/s)": 0.616836 |
| }, |
| { |
| "epoch": 4.212572909915749, |
| "grad_norm": 1.6328125, |
| "learning_rate": 6.682210385450185e-05, |
| "loss": 0.0279296875, |
| "memory(GiB)": 43.05, |
| "step": 6500, |
| "train_speed(iter/s)": 0.617034 |
| }, |
| { |
| "epoch": 4.215813350615684, |
| "grad_norm": 3.109375, |
| "learning_rate": 6.677163624895393e-05, |
| "loss": 0.0240234375, |
| "memory(GiB)": 43.05, |
| "step": 6505, |
| "train_speed(iter/s)": 0.6172 |
| }, |
| { |
| "epoch": 4.219053791315619, |
| "grad_norm": 16.0, |
| "learning_rate": 6.672114938298785e-05, |
| "loss": 0.040625, |
| "memory(GiB)": 43.05, |
| "step": 6510, |
| "train_speed(iter/s)": 0.617218 |
| }, |
| { |
| "epoch": 4.222294232015554, |
| "grad_norm": 1.6875, |
| "learning_rate": 6.667064331458228e-05, |
| "loss": 0.0439453125, |
| "memory(GiB)": 43.05, |
| "step": 6515, |
| "train_speed(iter/s)": 0.61736 |
| }, |
| { |
| "epoch": 4.225534672715489, |
| "grad_norm": 15.875, |
| "learning_rate": 6.662011810173806e-05, |
| "loss": 0.0361328125, |
| "memory(GiB)": 43.05, |
| "step": 6520, |
| "train_speed(iter/s)": 0.617442 |
| }, |
| { |
| "epoch": 4.228775113415424, |
| "grad_norm": 6.90625, |
| "learning_rate": 6.656957380247792e-05, |
| "loss": 0.0234375, |
| "memory(GiB)": 43.05, |
| "step": 6525, |
| "train_speed(iter/s)": 0.617564 |
| }, |
| { |
| "epoch": 4.2320155541153595, |
| "grad_norm": 11.375, |
| "learning_rate": 6.651901047484654e-05, |
| "loss": 0.0296875, |
| "memory(GiB)": 43.05, |
| "step": 6530, |
| "train_speed(iter/s)": 0.61773 |
| }, |
| { |
| "epoch": 4.235255994815295, |
| "grad_norm": 3.796875, |
| "learning_rate": 6.646842817691047e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 6535, |
| "train_speed(iter/s)": 0.617777 |
| }, |
| { |
| "epoch": 4.23849643551523, |
| "grad_norm": 3.515625, |
| "learning_rate": 6.641782696675805e-05, |
| "loss": 0.0412109375, |
| "memory(GiB)": 43.05, |
| "step": 6540, |
| "train_speed(iter/s)": 0.617863 |
| }, |
| { |
| "epoch": 4.241736876215166, |
| "grad_norm": 2.21875, |
| "learning_rate": 6.636720690249928e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 6545, |
| "train_speed(iter/s)": 0.617916 |
| }, |
| { |
| "epoch": 4.244977316915101, |
| "grad_norm": 10.0, |
| "learning_rate": 6.631656804226589e-05, |
| "loss": 0.0263671875, |
| "memory(GiB)": 43.05, |
| "step": 6550, |
| "train_speed(iter/s)": 0.617946 |
| }, |
| { |
| "epoch": 4.248217757615036, |
| "grad_norm": 1.046875, |
| "learning_rate": 6.626591044421113e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 6555, |
| "train_speed(iter/s)": 0.618107 |
| }, |
| { |
| "epoch": 4.251458198314971, |
| "grad_norm": 0.6328125, |
| "learning_rate": 6.621523416650983e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 6560, |
| "train_speed(iter/s)": 0.618259 |
| }, |
| { |
| "epoch": 4.254698639014906, |
| "grad_norm": 13.125, |
| "learning_rate": 6.616453926735821e-05, |
| "loss": 0.0169921875, |
| "memory(GiB)": 43.05, |
| "step": 6565, |
| "train_speed(iter/s)": 0.618288 |
| }, |
| { |
| "epoch": 4.257939079714841, |
| "grad_norm": 2.796875, |
| "learning_rate": 6.611382580497389e-05, |
| "loss": 0.018359375, |
| "memory(GiB)": 43.05, |
| "step": 6570, |
| "train_speed(iter/s)": 0.618287 |
| }, |
| { |
| "epoch": 4.261179520414776, |
| "grad_norm": 9.125, |
| "learning_rate": 6.606309383759586e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 6575, |
| "train_speed(iter/s)": 0.618423 |
| }, |
| { |
| "epoch": 4.2644199611147116, |
| "grad_norm": 0.48828125, |
| "learning_rate": 6.60123434234843e-05, |
| "loss": 0.02734375, |
| "memory(GiB)": 43.05, |
| "step": 6580, |
| "train_speed(iter/s)": 0.618644 |
| }, |
| { |
| "epoch": 4.267660401814647, |
| "grad_norm": 1.0390625, |
| "learning_rate": 6.596157462092059e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 6585, |
| "train_speed(iter/s)": 0.618697 |
| }, |
| { |
| "epoch": 4.270900842514582, |
| "grad_norm": 3.984375, |
| "learning_rate": 6.591078748820725e-05, |
| "loss": 0.030859375, |
| "memory(GiB)": 43.05, |
| "step": 6590, |
| "train_speed(iter/s)": 0.618772 |
| }, |
| { |
| "epoch": 4.274141283214517, |
| "grad_norm": 18.125, |
| "learning_rate": 6.585998208366781e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 6595, |
| "train_speed(iter/s)": 0.618836 |
| }, |
| { |
| "epoch": 4.277381723914452, |
| "grad_norm": 16.25, |
| "learning_rate": 6.580915846564683e-05, |
| "loss": 0.0462890625, |
| "memory(GiB)": 43.05, |
| "step": 6600, |
| "train_speed(iter/s)": 0.618855 |
| }, |
| { |
| "epoch": 4.280622164614387, |
| "grad_norm": 2.515625, |
| "learning_rate": 6.575831669250976e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 6605, |
| "train_speed(iter/s)": 0.618769 |
| }, |
| { |
| "epoch": 4.283862605314322, |
| "grad_norm": 11.3125, |
| "learning_rate": 6.570745682264288e-05, |
| "loss": 0.0205078125, |
| "memory(GiB)": 43.05, |
| "step": 6610, |
| "train_speed(iter/s)": 0.618837 |
| }, |
| { |
| "epoch": 4.287103046014258, |
| "grad_norm": 5.0, |
| "learning_rate": 6.565657891445326e-05, |
| "loss": 0.04375, |
| "memory(GiB)": 43.05, |
| "step": 6615, |
| "train_speed(iter/s)": 0.618891 |
| }, |
| { |
| "epoch": 4.290343486714193, |
| "grad_norm": 11.125, |
| "learning_rate": 6.560568302636877e-05, |
| "loss": 0.02109375, |
| "memory(GiB)": 43.05, |
| "step": 6620, |
| "train_speed(iter/s)": 0.618961 |
| }, |
| { |
| "epoch": 4.2935839274141285, |
| "grad_norm": 0.515625, |
| "learning_rate": 6.555476921683781e-05, |
| "loss": 0.0408203125, |
| "memory(GiB)": 43.05, |
| "step": 6625, |
| "train_speed(iter/s)": 0.619123 |
| }, |
| { |
| "epoch": 4.296824368114064, |
| "grad_norm": 14.0625, |
| "learning_rate": 6.55038375443294e-05, |
| "loss": 0.030859375, |
| "memory(GiB)": 43.05, |
| "step": 6630, |
| "train_speed(iter/s)": 0.619174 |
| }, |
| { |
| "epoch": 4.300064808813999, |
| "grad_norm": 10.5, |
| "learning_rate": 6.545288806733309e-05, |
| "loss": 0.036328125, |
| "memory(GiB)": 43.05, |
| "step": 6635, |
| "train_speed(iter/s)": 0.619255 |
| }, |
| { |
| "epoch": 4.303305249513934, |
| "grad_norm": 15.3125, |
| "learning_rate": 6.540192084435886e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 6640, |
| "train_speed(iter/s)": 0.619438 |
| }, |
| { |
| "epoch": 4.306545690213869, |
| "grad_norm": 8.9375, |
| "learning_rate": 6.535093593393708e-05, |
| "loss": 0.0228515625, |
| "memory(GiB)": 43.05, |
| "step": 6645, |
| "train_speed(iter/s)": 0.619513 |
| }, |
| { |
| "epoch": 4.309786130913804, |
| "grad_norm": 1.0546875, |
| "learning_rate": 6.529993339461846e-05, |
| "loss": 0.0435546875, |
| "memory(GiB)": 43.05, |
| "step": 6650, |
| "train_speed(iter/s)": 0.619554 |
| }, |
| { |
| "epoch": 4.313026571613739, |
| "grad_norm": 9.625, |
| "learning_rate": 6.52489132849739e-05, |
| "loss": 0.0208984375, |
| "memory(GiB)": 43.05, |
| "step": 6655, |
| "train_speed(iter/s)": 0.619772 |
| }, |
| { |
| "epoch": 4.316267012313674, |
| "grad_norm": 6.25, |
| "learning_rate": 6.519787566359448e-05, |
| "loss": 0.023828125, |
| "memory(GiB)": 43.05, |
| "step": 6660, |
| "train_speed(iter/s)": 0.619866 |
| }, |
| { |
| "epoch": 4.3195074530136095, |
| "grad_norm": 8.125, |
| "learning_rate": 6.514682058909146e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 6665, |
| "train_speed(iter/s)": 0.620005 |
| }, |
| { |
| "epoch": 4.3227478937135455, |
| "grad_norm": 0.8046875, |
| "learning_rate": 6.509574812009606e-05, |
| "loss": 0.0634765625, |
| "memory(GiB)": 43.05, |
| "step": 6670, |
| "train_speed(iter/s)": 0.620046 |
| }, |
| { |
| "epoch": 4.325988334413481, |
| "grad_norm": 11.125, |
| "learning_rate": 6.504465831525949e-05, |
| "loss": 0.0134765625, |
| "memory(GiB)": 43.05, |
| "step": 6675, |
| "train_speed(iter/s)": 0.620261 |
| }, |
| { |
| "epoch": 4.329228775113416, |
| "grad_norm": 12.5, |
| "learning_rate": 6.499355123325296e-05, |
| "loss": 0.0427734375, |
| "memory(GiB)": 43.05, |
| "step": 6680, |
| "train_speed(iter/s)": 0.620359 |
| }, |
| { |
| "epoch": 4.332469215813351, |
| "grad_norm": 0.7578125, |
| "learning_rate": 6.49424269327674e-05, |
| "loss": 0.048828125, |
| "memory(GiB)": 43.05, |
| "step": 6685, |
| "train_speed(iter/s)": 0.620398 |
| }, |
| { |
| "epoch": 4.335709656513286, |
| "grad_norm": 3.53125, |
| "learning_rate": 6.489128547251357e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 6690, |
| "train_speed(iter/s)": 0.620478 |
| }, |
| { |
| "epoch": 4.338950097213221, |
| "grad_norm": 2.171875, |
| "learning_rate": 6.484012691122194e-05, |
| "loss": 0.016015625, |
| "memory(GiB)": 43.05, |
| "step": 6695, |
| "train_speed(iter/s)": 0.620475 |
| }, |
| { |
| "epoch": 4.342190537913156, |
| "grad_norm": 10.1875, |
| "learning_rate": 6.47889513076426e-05, |
| "loss": 0.0240234375, |
| "memory(GiB)": 43.05, |
| "step": 6700, |
| "train_speed(iter/s)": 0.620606 |
| }, |
| { |
| "epoch": 4.345430978613091, |
| "grad_norm": 0.94921875, |
| "learning_rate": 6.473775872054521e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 6705, |
| "train_speed(iter/s)": 0.620706 |
| }, |
| { |
| "epoch": 4.3486714193130265, |
| "grad_norm": 11.5625, |
| "learning_rate": 6.468654920871897e-05, |
| "loss": 0.04296875, |
| "memory(GiB)": 43.05, |
| "step": 6710, |
| "train_speed(iter/s)": 0.62086 |
| }, |
| { |
| "epoch": 4.351911860012962, |
| "grad_norm": 1.140625, |
| "learning_rate": 6.463532283097247e-05, |
| "loss": 0.0203125, |
| "memory(GiB)": 43.05, |
| "step": 6715, |
| "train_speed(iter/s)": 0.620984 |
| }, |
| { |
| "epoch": 4.355152300712897, |
| "grad_norm": 6.75, |
| "learning_rate": 6.458407964613369e-05, |
| "loss": 0.0154296875, |
| "memory(GiB)": 43.05, |
| "step": 6720, |
| "train_speed(iter/s)": 0.620961 |
| }, |
| { |
| "epoch": 4.358392741412832, |
| "grad_norm": 0.9140625, |
| "learning_rate": 6.453281971304993e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 6725, |
| "train_speed(iter/s)": 0.621143 |
| }, |
| { |
| "epoch": 4.361633182112767, |
| "grad_norm": 3.90625, |
| "learning_rate": 6.448154309058767e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 6730, |
| "train_speed(iter/s)": 0.621279 |
| }, |
| { |
| "epoch": 4.364873622812702, |
| "grad_norm": 0.5703125, |
| "learning_rate": 6.443024983763262e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 6735, |
| "train_speed(iter/s)": 0.621353 |
| }, |
| { |
| "epoch": 4.368114063512638, |
| "grad_norm": 3.625, |
| "learning_rate": 6.437894001308953e-05, |
| "loss": 0.0373046875, |
| "memory(GiB)": 43.05, |
| "step": 6740, |
| "train_speed(iter/s)": 0.621461 |
| }, |
| { |
| "epoch": 4.371354504212573, |
| "grad_norm": 1.5078125, |
| "learning_rate": 6.432761367588223e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 6745, |
| "train_speed(iter/s)": 0.621395 |
| }, |
| { |
| "epoch": 4.374594944912508, |
| "grad_norm": 17.875, |
| "learning_rate": 6.427627088495349e-05, |
| "loss": 0.04765625, |
| "memory(GiB)": 43.05, |
| "step": 6750, |
| "train_speed(iter/s)": 0.621476 |
| }, |
| { |
| "epoch": 4.3778353856124435, |
| "grad_norm": 0.515625, |
| "learning_rate": 6.422491169926495e-05, |
| "loss": 0.0384765625, |
| "memory(GiB)": 43.05, |
| "step": 6755, |
| "train_speed(iter/s)": 0.621498 |
| }, |
| { |
| "epoch": 4.381075826312379, |
| "grad_norm": 14.8125, |
| "learning_rate": 6.417353617779715e-05, |
| "loss": 0.0451171875, |
| "memory(GiB)": 43.05, |
| "step": 6760, |
| "train_speed(iter/s)": 0.621524 |
| }, |
| { |
| "epoch": 4.384316267012314, |
| "grad_norm": 5.15625, |
| "learning_rate": 6.41221443795493e-05, |
| "loss": 0.0400390625, |
| "memory(GiB)": 43.05, |
| "step": 6765, |
| "train_speed(iter/s)": 0.621575 |
| }, |
| { |
| "epoch": 4.387556707712249, |
| "grad_norm": 4.75, |
| "learning_rate": 6.407073636353937e-05, |
| "loss": 0.0232421875, |
| "memory(GiB)": 43.05, |
| "step": 6770, |
| "train_speed(iter/s)": 0.621704 |
| }, |
| { |
| "epoch": 4.390797148412184, |
| "grad_norm": 14.6875, |
| "learning_rate": 6.401931218880393e-05, |
| "loss": 0.046875, |
| "memory(GiB)": 43.05, |
| "step": 6775, |
| "train_speed(iter/s)": 0.621833 |
| }, |
| { |
| "epoch": 4.394037589112119, |
| "grad_norm": 15.3125, |
| "learning_rate": 6.396787191439808e-05, |
| "loss": 0.04140625, |
| "memory(GiB)": 43.05, |
| "step": 6780, |
| "train_speed(iter/s)": 0.621942 |
| }, |
| { |
| "epoch": 4.397278029812054, |
| "grad_norm": 17.0, |
| "learning_rate": 6.391641559939549e-05, |
| "loss": 0.046484375, |
| "memory(GiB)": 43.05, |
| "step": 6785, |
| "train_speed(iter/s)": 0.622018 |
| }, |
| { |
| "epoch": 4.400518470511989, |
| "grad_norm": 15.25, |
| "learning_rate": 6.386494330288815e-05, |
| "loss": 0.04375, |
| "memory(GiB)": 43.05, |
| "step": 6790, |
| "train_speed(iter/s)": 0.622035 |
| }, |
| { |
| "epoch": 4.403758911211924, |
| "grad_norm": 5.5, |
| "learning_rate": 6.381345508398647e-05, |
| "loss": 0.033203125, |
| "memory(GiB)": 43.05, |
| "step": 6795, |
| "train_speed(iter/s)": 0.6221 |
| }, |
| { |
| "epoch": 4.40699935191186, |
| "grad_norm": 0.80859375, |
| "learning_rate": 6.376195100181911e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 6800, |
| "train_speed(iter/s)": 0.622253 |
| }, |
| { |
| "epoch": 4.4102397926117956, |
| "grad_norm": 8.875, |
| "learning_rate": 6.371043111553296e-05, |
| "loss": 0.0361328125, |
| "memory(GiB)": 43.05, |
| "step": 6805, |
| "train_speed(iter/s)": 0.622384 |
| }, |
| { |
| "epoch": 4.413480233311731, |
| "grad_norm": 7.9375, |
| "learning_rate": 6.365889548429309e-05, |
| "loss": 0.023046875, |
| "memory(GiB)": 43.05, |
| "step": 6810, |
| "train_speed(iter/s)": 0.622483 |
| }, |
| { |
| "epoch": 4.416720674011666, |
| "grad_norm": 11.4375, |
| "learning_rate": 6.360734416728261e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 6815, |
| "train_speed(iter/s)": 0.622554 |
| }, |
| { |
| "epoch": 4.419961114711601, |
| "grad_norm": 13.75, |
| "learning_rate": 6.355577722370264e-05, |
| "loss": 0.0263671875, |
| "memory(GiB)": 43.05, |
| "step": 6820, |
| "train_speed(iter/s)": 0.622643 |
| }, |
| { |
| "epoch": 4.423201555411536, |
| "grad_norm": 12.0625, |
| "learning_rate": 6.35041947127723e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 6825, |
| "train_speed(iter/s)": 0.622739 |
| }, |
| { |
| "epoch": 4.426441996111471, |
| "grad_norm": 18.125, |
| "learning_rate": 6.345259669372849e-05, |
| "loss": 0.0328125, |
| "memory(GiB)": 43.05, |
| "step": 6830, |
| "train_speed(iter/s)": 0.62278 |
| }, |
| { |
| "epoch": 4.429682436811406, |
| "grad_norm": 6.53125, |
| "learning_rate": 6.340098322582603e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 6835, |
| "train_speed(iter/s)": 0.622944 |
| }, |
| { |
| "epoch": 4.432922877511341, |
| "grad_norm": 3.578125, |
| "learning_rate": 6.334935436833741e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 6840, |
| "train_speed(iter/s)": 0.623049 |
| }, |
| { |
| "epoch": 4.4361633182112765, |
| "grad_norm": 12.0, |
| "learning_rate": 6.329771018055281e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 6845, |
| "train_speed(iter/s)": 0.623179 |
| }, |
| { |
| "epoch": 4.439403758911212, |
| "grad_norm": 17.0, |
| "learning_rate": 6.324605072178002e-05, |
| "loss": 0.0263671875, |
| "memory(GiB)": 43.05, |
| "step": 6850, |
| "train_speed(iter/s)": 0.623055 |
| }, |
| { |
| "epoch": 4.442644199611147, |
| "grad_norm": 10.8125, |
| "learning_rate": 6.319437605134437e-05, |
| "loss": 0.025390625, |
| "memory(GiB)": 43.05, |
| "step": 6855, |
| "train_speed(iter/s)": 0.62318 |
| }, |
| { |
| "epoch": 4.445884640311082, |
| "grad_norm": 1.6953125, |
| "learning_rate": 6.314268622858866e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 6860, |
| "train_speed(iter/s)": 0.623258 |
| }, |
| { |
| "epoch": 4.449125081011018, |
| "grad_norm": 13.8125, |
| "learning_rate": 6.309098131287308e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 6865, |
| "train_speed(iter/s)": 0.623343 |
| }, |
| { |
| "epoch": 4.452365521710953, |
| "grad_norm": 8.375, |
| "learning_rate": 6.303926136357517e-05, |
| "loss": 0.0255859375, |
| "memory(GiB)": 43.05, |
| "step": 6870, |
| "train_speed(iter/s)": 0.623556 |
| }, |
| { |
| "epoch": 4.455605962410888, |
| "grad_norm": 4.25, |
| "learning_rate": 6.298752644008967e-05, |
| "loss": 0.0216796875, |
| "memory(GiB)": 43.05, |
| "step": 6875, |
| "train_speed(iter/s)": 0.623629 |
| }, |
| { |
| "epoch": 4.458846403110823, |
| "grad_norm": 0.470703125, |
| "learning_rate": 6.293577660182863e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 6880, |
| "train_speed(iter/s)": 0.623641 |
| }, |
| { |
| "epoch": 4.462086843810758, |
| "grad_norm": 13.6875, |
| "learning_rate": 6.288401190822116e-05, |
| "loss": 0.0345703125, |
| "memory(GiB)": 43.05, |
| "step": 6885, |
| "train_speed(iter/s)": 0.623644 |
| }, |
| { |
| "epoch": 4.4653272845106935, |
| "grad_norm": 0.890625, |
| "learning_rate": 6.283223241871338e-05, |
| "loss": 0.0376953125, |
| "memory(GiB)": 43.05, |
| "step": 6890, |
| "train_speed(iter/s)": 0.623819 |
| }, |
| { |
| "epoch": 4.468567725210629, |
| "grad_norm": 5.625, |
| "learning_rate": 6.278043819276853e-05, |
| "loss": 0.0328125, |
| "memory(GiB)": 43.05, |
| "step": 6895, |
| "train_speed(iter/s)": 0.623808 |
| }, |
| { |
| "epoch": 4.471808165910564, |
| "grad_norm": 10.25, |
| "learning_rate": 6.272862928986668e-05, |
| "loss": 0.01796875, |
| "memory(GiB)": 43.05, |
| "step": 6900, |
| "train_speed(iter/s)": 0.623992 |
| }, |
| { |
| "epoch": 4.475048606610499, |
| "grad_norm": 10.8125, |
| "learning_rate": 6.267680576950473e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 6905, |
| "train_speed(iter/s)": 0.62414 |
| }, |
| { |
| "epoch": 4.478289047310434, |
| "grad_norm": 13.1875, |
| "learning_rate": 6.262496769119646e-05, |
| "loss": 0.02578125, |
| "memory(GiB)": 43.05, |
| "step": 6910, |
| "train_speed(iter/s)": 0.624303 |
| }, |
| { |
| "epoch": 4.481529488010369, |
| "grad_norm": 0.84765625, |
| "learning_rate": 6.257311511447232e-05, |
| "loss": 0.019921875, |
| "memory(GiB)": 43.05, |
| "step": 6915, |
| "train_speed(iter/s)": 0.62434 |
| }, |
| { |
| "epoch": 4.484769928710304, |
| "grad_norm": 15.6875, |
| "learning_rate": 6.252124809887938e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 6920, |
| "train_speed(iter/s)": 0.624398 |
| }, |
| { |
| "epoch": 4.48801036941024, |
| "grad_norm": 9.1875, |
| "learning_rate": 6.246936670398136e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 6925, |
| "train_speed(iter/s)": 0.624461 |
| }, |
| { |
| "epoch": 4.491250810110175, |
| "grad_norm": 0.6796875, |
| "learning_rate": 6.241747098935843e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 6930, |
| "train_speed(iter/s)": 0.624671 |
| }, |
| { |
| "epoch": 4.4944912508101105, |
| "grad_norm": 8.4375, |
| "learning_rate": 6.236556101460724e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 6935, |
| "train_speed(iter/s)": 0.624793 |
| }, |
| { |
| "epoch": 4.497731691510046, |
| "grad_norm": 16.25, |
| "learning_rate": 6.23136368393408e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 6940, |
| "train_speed(iter/s)": 0.62492 |
| }, |
| { |
| "epoch": 4.500972132209981, |
| "grad_norm": 2.015625, |
| "learning_rate": 6.226169852318842e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 6945, |
| "train_speed(iter/s)": 0.625014 |
| }, |
| { |
| "epoch": 4.504212572909916, |
| "grad_norm": 9.625, |
| "learning_rate": 6.22097461257957e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 6950, |
| "train_speed(iter/s)": 0.625087 |
| }, |
| { |
| "epoch": 4.507453013609851, |
| "grad_norm": 10.875, |
| "learning_rate": 6.215777970682435e-05, |
| "loss": 0.0365234375, |
| "memory(GiB)": 43.05, |
| "step": 6955, |
| "train_speed(iter/s)": 0.625181 |
| }, |
| { |
| "epoch": 4.510693454309786, |
| "grad_norm": 2.890625, |
| "learning_rate": 6.210579932595219e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 6960, |
| "train_speed(iter/s)": 0.625165 |
| }, |
| { |
| "epoch": 4.513933895009721, |
| "grad_norm": 16.25, |
| "learning_rate": 6.205380504287314e-05, |
| "loss": 0.041796875, |
| "memory(GiB)": 43.05, |
| "step": 6965, |
| "train_speed(iter/s)": 0.625297 |
| }, |
| { |
| "epoch": 4.517174335709656, |
| "grad_norm": 5.375, |
| "learning_rate": 6.2001796917297e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 6970, |
| "train_speed(iter/s)": 0.625372 |
| }, |
| { |
| "epoch": 4.520414776409591, |
| "grad_norm": 1.375, |
| "learning_rate": 6.19497750089495e-05, |
| "loss": 0.024609375, |
| "memory(GiB)": 43.05, |
| "step": 6975, |
| "train_speed(iter/s)": 0.625461 |
| }, |
| { |
| "epoch": 4.523655217109527, |
| "grad_norm": 8.5625, |
| "learning_rate": 6.18977393775722e-05, |
| "loss": 0.038671875, |
| "memory(GiB)": 43.05, |
| "step": 6980, |
| "train_speed(iter/s)": 0.625638 |
| }, |
| { |
| "epoch": 4.526895657809462, |
| "grad_norm": 1.953125, |
| "learning_rate": 6.184569008292242e-05, |
| "loss": 0.01875, |
| "memory(GiB)": 43.05, |
| "step": 6985, |
| "train_speed(iter/s)": 0.625769 |
| }, |
| { |
| "epoch": 4.530136098509397, |
| "grad_norm": 5.3125, |
| "learning_rate": 6.179362718477319e-05, |
| "loss": 0.0279296875, |
| "memory(GiB)": 43.05, |
| "step": 6990, |
| "train_speed(iter/s)": 0.625847 |
| }, |
| { |
| "epoch": 4.533376539209333, |
| "grad_norm": 1.8125, |
| "learning_rate": 6.174155074291312e-05, |
| "loss": 0.0201171875, |
| "memory(GiB)": 43.05, |
| "step": 6995, |
| "train_speed(iter/s)": 0.626021 |
| }, |
| { |
| "epoch": 4.536616979909268, |
| "grad_norm": 2.03125, |
| "learning_rate": 6.168946081714642e-05, |
| "loss": 0.021875, |
| "memory(GiB)": 43.05, |
| "step": 7000, |
| "train_speed(iter/s)": 0.626232 |
| }, |
| { |
| "epoch": 4.539857420609203, |
| "grad_norm": 1.0234375, |
| "learning_rate": 6.163735746729272e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 7005, |
| "train_speed(iter/s)": 0.626324 |
| }, |
| { |
| "epoch": 4.543097861309138, |
| "grad_norm": 8.0625, |
| "learning_rate": 6.158524075318715e-05, |
| "loss": 0.0349609375, |
| "memory(GiB)": 43.05, |
| "step": 7010, |
| "train_speed(iter/s)": 0.626345 |
| }, |
| { |
| "epoch": 4.546338302009073, |
| "grad_norm": 14.8125, |
| "learning_rate": 6.153311073468011e-05, |
| "loss": 0.0279296875, |
| "memory(GiB)": 43.05, |
| "step": 7015, |
| "train_speed(iter/s)": 0.626445 |
| }, |
| { |
| "epoch": 4.549578742709008, |
| "grad_norm": 14.3125, |
| "learning_rate": 6.148096747163734e-05, |
| "loss": 0.029296875, |
| "memory(GiB)": 43.05, |
| "step": 7020, |
| "train_speed(iter/s)": 0.626427 |
| }, |
| { |
| "epoch": 4.5528191834089435, |
| "grad_norm": 11.625, |
| "learning_rate": 6.142881102393973e-05, |
| "loss": 0.0345703125, |
| "memory(GiB)": 43.05, |
| "step": 7025, |
| "train_speed(iter/s)": 0.62628 |
| }, |
| { |
| "epoch": 4.556059624108879, |
| "grad_norm": 2.84375, |
| "learning_rate": 6.137664145148339e-05, |
| "loss": 0.02626953125, |
| "memory(GiB)": 43.05, |
| "step": 7030, |
| "train_speed(iter/s)": 0.62635 |
| }, |
| { |
| "epoch": 4.559300064808814, |
| "grad_norm": 6.875, |
| "learning_rate": 6.132445881417941e-05, |
| "loss": 0.0146484375, |
| "memory(GiB)": 43.05, |
| "step": 7035, |
| "train_speed(iter/s)": 0.626468 |
| }, |
| { |
| "epoch": 4.562540505508749, |
| "grad_norm": 14.4375, |
| "learning_rate": 6.127226317195396e-05, |
| "loss": 0.0490234375, |
| "memory(GiB)": 43.05, |
| "step": 7040, |
| "train_speed(iter/s)": 0.626465 |
| }, |
| { |
| "epoch": 4.565780946208684, |
| "grad_norm": 4.15625, |
| "learning_rate": 6.122005458474808e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 7045, |
| "train_speed(iter/s)": 0.626539 |
| }, |
| { |
| "epoch": 4.56902138690862, |
| "grad_norm": 7.125, |
| "learning_rate": 6.116783311251775e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 7050, |
| "train_speed(iter/s)": 0.626607 |
| }, |
| { |
| "epoch": 4.572261827608555, |
| "grad_norm": 5.8125, |
| "learning_rate": 6.111559881523371e-05, |
| "loss": 0.028125, |
| "memory(GiB)": 43.05, |
| "step": 7055, |
| "train_speed(iter/s)": 0.626759 |
| }, |
| { |
| "epoch": 4.57550226830849, |
| "grad_norm": 10.3125, |
| "learning_rate": 6.106335175288139e-05, |
| "loss": 0.0408203125, |
| "memory(GiB)": 43.05, |
| "step": 7060, |
| "train_speed(iter/s)": 0.626856 |
| }, |
| { |
| "epoch": 4.578742709008425, |
| "grad_norm": 13.5, |
| "learning_rate": 6.101109198546093e-05, |
| "loss": 0.0265625, |
| "memory(GiB)": 43.05, |
| "step": 7065, |
| "train_speed(iter/s)": 0.627055 |
| }, |
| { |
| "epoch": 4.5819831497083605, |
| "grad_norm": 3.609375, |
| "learning_rate": 6.095881957298706e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 7070, |
| "train_speed(iter/s)": 0.627056 |
| }, |
| { |
| "epoch": 4.585223590408296, |
| "grad_norm": 7.6875, |
| "learning_rate": 6.0906534575488994e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 7075, |
| "train_speed(iter/s)": 0.627143 |
| }, |
| { |
| "epoch": 4.588464031108231, |
| "grad_norm": 13.25, |
| "learning_rate": 6.0854237053010424e-05, |
| "loss": 0.024609375, |
| "memory(GiB)": 43.05, |
| "step": 7080, |
| "train_speed(iter/s)": 0.627199 |
| }, |
| { |
| "epoch": 4.591704471808166, |
| "grad_norm": 0.6015625, |
| "learning_rate": 6.080192706560944e-05, |
| "loss": 0.0228515625, |
| "memory(GiB)": 43.05, |
| "step": 7085, |
| "train_speed(iter/s)": 0.627417 |
| }, |
| { |
| "epoch": 4.594944912508101, |
| "grad_norm": 16.375, |
| "learning_rate": 6.074960467335842e-05, |
| "loss": 0.0494140625, |
| "memory(GiB)": 43.05, |
| "step": 7090, |
| "train_speed(iter/s)": 0.627538 |
| }, |
| { |
| "epoch": 4.598185353208036, |
| "grad_norm": 13.5, |
| "learning_rate": 6.0697269936343994e-05, |
| "loss": 0.0310546875, |
| "memory(GiB)": 43.05, |
| "step": 7095, |
| "train_speed(iter/s)": 0.627544 |
| }, |
| { |
| "epoch": 4.601425793907971, |
| "grad_norm": 2.109375, |
| "learning_rate": 6.064492291466698e-05, |
| "loss": 0.0251953125, |
| "memory(GiB)": 43.05, |
| "step": 7100, |
| "train_speed(iter/s)": 0.627636 |
| }, |
| { |
| "epoch": 4.604666234607906, |
| "grad_norm": 14.6875, |
| "learning_rate": 6.059256366844228e-05, |
| "loss": 0.0271484375, |
| "memory(GiB)": 43.05, |
| "step": 7105, |
| "train_speed(iter/s)": 0.627726 |
| }, |
| { |
| "epoch": 4.6079066753078415, |
| "grad_norm": 10.0625, |
| "learning_rate": 6.054019225779888e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 7110, |
| "train_speed(iter/s)": 0.627781 |
| }, |
| { |
| "epoch": 4.611147116007777, |
| "grad_norm": 13.4375, |
| "learning_rate": 6.048780874287967e-05, |
| "loss": 0.023828125, |
| "memory(GiB)": 43.05, |
| "step": 7115, |
| "train_speed(iter/s)": 0.62782 |
| }, |
| { |
| "epoch": 4.614387556707713, |
| "grad_norm": 15.9375, |
| "learning_rate": 6.0435413183841484e-05, |
| "loss": 0.03515625, |
| "memory(GiB)": 43.05, |
| "step": 7120, |
| "train_speed(iter/s)": 0.627882 |
| }, |
| { |
| "epoch": 4.617627997407648, |
| "grad_norm": 2.703125, |
| "learning_rate": 6.0383005640855006e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 7125, |
| "train_speed(iter/s)": 0.627954 |
| }, |
| { |
| "epoch": 4.620868438107583, |
| "grad_norm": 0.7890625, |
| "learning_rate": 6.0330586174104644e-05, |
| "loss": 0.041796875, |
| "memory(GiB)": 43.05, |
| "step": 7130, |
| "train_speed(iter/s)": 0.628013 |
| }, |
| { |
| "epoch": 4.624108878807518, |
| "grad_norm": 11.4375, |
| "learning_rate": 6.027815484378848e-05, |
| "loss": 0.0224609375, |
| "memory(GiB)": 43.05, |
| "step": 7135, |
| "train_speed(iter/s)": 0.628186 |
| }, |
| { |
| "epoch": 4.627349319507453, |
| "grad_norm": 2.34375, |
| "learning_rate": 6.0225711710118296e-05, |
| "loss": 0.0380859375, |
| "memory(GiB)": 43.05, |
| "step": 7140, |
| "train_speed(iter/s)": 0.628313 |
| }, |
| { |
| "epoch": 4.630589760207388, |
| "grad_norm": 0.75, |
| "learning_rate": 6.0173256833319336e-05, |
| "loss": 0.029296875, |
| "memory(GiB)": 43.05, |
| "step": 7145, |
| "train_speed(iter/s)": 0.628297 |
| }, |
| { |
| "epoch": 4.633830200907323, |
| "grad_norm": 4.90625, |
| "learning_rate": 6.012079027363041e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 7150, |
| "train_speed(iter/s)": 0.628393 |
| }, |
| { |
| "epoch": 4.6370706416072585, |
| "grad_norm": 12.0625, |
| "learning_rate": 6.006831209130372e-05, |
| "loss": 0.02421875, |
| "memory(GiB)": 43.05, |
| "step": 7155, |
| "train_speed(iter/s)": 0.628479 |
| }, |
| { |
| "epoch": 4.640311082307194, |
| "grad_norm": 5.21875, |
| "learning_rate": 6.00158223466048e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 7160, |
| "train_speed(iter/s)": 0.628405 |
| }, |
| { |
| "epoch": 4.643551523007129, |
| "grad_norm": 7.5, |
| "learning_rate": 5.9963321099812445e-05, |
| "loss": 0.0251953125, |
| "memory(GiB)": 43.05, |
| "step": 7165, |
| "train_speed(iter/s)": 0.628501 |
| }, |
| { |
| "epoch": 4.646791963707064, |
| "grad_norm": 9.8125, |
| "learning_rate": 5.991080841121871e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 7170, |
| "train_speed(iter/s)": 0.628517 |
| }, |
| { |
| "epoch": 4.650032404407, |
| "grad_norm": 11.6875, |
| "learning_rate": 5.9858284341128756e-05, |
| "loss": 0.0232421875, |
| "memory(GiB)": 43.05, |
| "step": 7175, |
| "train_speed(iter/s)": 0.628643 |
| }, |
| { |
| "epoch": 4.653272845106935, |
| "grad_norm": 1.03125, |
| "learning_rate": 5.980574894986081e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 7180, |
| "train_speed(iter/s)": 0.628817 |
| }, |
| { |
| "epoch": 4.65651328580687, |
| "grad_norm": 1.4921875, |
| "learning_rate": 5.975320229774612e-05, |
| "loss": 0.0423828125, |
| "memory(GiB)": 43.05, |
| "step": 7185, |
| "train_speed(iter/s)": 0.628729 |
| }, |
| { |
| "epoch": 4.659753726506805, |
| "grad_norm": 2.765625, |
| "learning_rate": 5.9700644445128874e-05, |
| "loss": 0.0416015625, |
| "memory(GiB)": 43.05, |
| "step": 7190, |
| "train_speed(iter/s)": 0.628877 |
| }, |
| { |
| "epoch": 4.66299416720674, |
| "grad_norm": 3.03125, |
| "learning_rate": 5.964807545236607e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 7195, |
| "train_speed(iter/s)": 0.628933 |
| }, |
| { |
| "epoch": 4.666234607906675, |
| "grad_norm": 1.8046875, |
| "learning_rate": 5.959549537982757e-05, |
| "loss": 0.02734375, |
| "memory(GiB)": 43.05, |
| "step": 7200, |
| "train_speed(iter/s)": 0.628988 |
| }, |
| { |
| "epoch": 4.669475048606611, |
| "grad_norm": 1.28125, |
| "learning_rate": 5.954290428789592e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 7205, |
| "train_speed(iter/s)": 0.629186 |
| }, |
| { |
| "epoch": 4.672715489306546, |
| "grad_norm": 4.03125, |
| "learning_rate": 5.94903022369663e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 7210, |
| "train_speed(iter/s)": 0.629256 |
| }, |
| { |
| "epoch": 4.675955930006481, |
| "grad_norm": 12.8125, |
| "learning_rate": 5.943768928744651e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 7215, |
| "train_speed(iter/s)": 0.629209 |
| }, |
| { |
| "epoch": 4.679196370706416, |
| "grad_norm": 11.625, |
| "learning_rate": 5.938506549975688e-05, |
| "loss": 0.044140625, |
| "memory(GiB)": 43.05, |
| "step": 7220, |
| "train_speed(iter/s)": 0.629147 |
| }, |
| { |
| "epoch": 4.682436811406351, |
| "grad_norm": 1.2421875, |
| "learning_rate": 5.933243093433015e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 7225, |
| "train_speed(iter/s)": 0.629296 |
| }, |
| { |
| "epoch": 4.685677252106286, |
| "grad_norm": 9.875, |
| "learning_rate": 5.9279785651611455e-05, |
| "loss": 0.0244140625, |
| "memory(GiB)": 43.05, |
| "step": 7230, |
| "train_speed(iter/s)": 0.629301 |
| }, |
| { |
| "epoch": 4.688917692806221, |
| "grad_norm": 17.5, |
| "learning_rate": 5.9227129712058207e-05, |
| "loss": 0.0376953125, |
| "memory(GiB)": 43.05, |
| "step": 7235, |
| "train_speed(iter/s)": 0.629337 |
| }, |
| { |
| "epoch": 4.692158133506156, |
| "grad_norm": 0.734375, |
| "learning_rate": 5.9174463176140115e-05, |
| "loss": 0.034765625, |
| "memory(GiB)": 43.05, |
| "step": 7240, |
| "train_speed(iter/s)": 0.629481 |
| }, |
| { |
| "epoch": 4.695398574206092, |
| "grad_norm": 8.625, |
| "learning_rate": 5.912178610433902e-05, |
| "loss": 0.0431640625, |
| "memory(GiB)": 43.05, |
| "step": 7245, |
| "train_speed(iter/s)": 0.629593 |
| }, |
| { |
| "epoch": 4.6986390149060275, |
| "grad_norm": 2.75, |
| "learning_rate": 5.906909855714884e-05, |
| "loss": 0.0396484375, |
| "memory(GiB)": 43.05, |
| "step": 7250, |
| "train_speed(iter/s)": 0.629641 |
| }, |
| { |
| "epoch": 4.701879455605963, |
| "grad_norm": 12.6875, |
| "learning_rate": 5.901640059507557e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 7255, |
| "train_speed(iter/s)": 0.629732 |
| }, |
| { |
| "epoch": 4.705119896305898, |
| "grad_norm": 2.96875, |
| "learning_rate": 5.896369227863715e-05, |
| "loss": 0.04921875, |
| "memory(GiB)": 43.05, |
| "step": 7260, |
| "train_speed(iter/s)": 0.629872 |
| }, |
| { |
| "epoch": 4.708360337005833, |
| "grad_norm": 3.640625, |
| "learning_rate": 5.891097366836339e-05, |
| "loss": 0.0271484375, |
| "memory(GiB)": 43.05, |
| "step": 7265, |
| "train_speed(iter/s)": 0.628331 |
| }, |
| { |
| "epoch": 4.711600777705768, |
| "grad_norm": 12.75, |
| "learning_rate": 5.885824482479596e-05, |
| "loss": 0.0353515625, |
| "memory(GiB)": 43.05, |
| "step": 7270, |
| "train_speed(iter/s)": 0.62829 |
| }, |
| { |
| "epoch": 4.714841218405703, |
| "grad_norm": 8.1875, |
| "learning_rate": 5.880550580848824e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 7275, |
| "train_speed(iter/s)": 0.628428 |
| }, |
| { |
| "epoch": 4.718081659105638, |
| "grad_norm": 3.15625, |
| "learning_rate": 5.875275668000529e-05, |
| "loss": 0.034765625, |
| "memory(GiB)": 43.05, |
| "step": 7280, |
| "train_speed(iter/s)": 0.628185 |
| }, |
| { |
| "epoch": 4.721322099805573, |
| "grad_norm": 10.8125, |
| "learning_rate": 5.8699997499923855e-05, |
| "loss": 0.043359375, |
| "memory(GiB)": 43.05, |
| "step": 7285, |
| "train_speed(iter/s)": 0.628275 |
| }, |
| { |
| "epoch": 4.7245625405055085, |
| "grad_norm": 4.78125, |
| "learning_rate": 5.8647228328832135e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 7290, |
| "train_speed(iter/s)": 0.628342 |
| }, |
| { |
| "epoch": 4.727802981205444, |
| "grad_norm": 15.875, |
| "learning_rate": 5.859444922732985e-05, |
| "loss": 0.040234375, |
| "memory(GiB)": 43.05, |
| "step": 7295, |
| "train_speed(iter/s)": 0.628399 |
| }, |
| { |
| "epoch": 4.731043421905379, |
| "grad_norm": 0.8515625, |
| "learning_rate": 5.854166025602812e-05, |
| "loss": 0.02392578125, |
| "memory(GiB)": 43.05, |
| "step": 7300, |
| "train_speed(iter/s)": 0.628513 |
| }, |
| { |
| "epoch": 4.734283862605315, |
| "grad_norm": 0.953125, |
| "learning_rate": 5.84888614755494e-05, |
| "loss": 0.0353515625, |
| "memory(GiB)": 43.05, |
| "step": 7305, |
| "train_speed(iter/s)": 0.628291 |
| }, |
| { |
| "epoch": 4.73752430330525, |
| "grad_norm": 10.4375, |
| "learning_rate": 5.8436052946527365e-05, |
| "loss": 0.01865234375, |
| "memory(GiB)": 43.05, |
| "step": 7310, |
| "train_speed(iter/s)": 0.628451 |
| }, |
| { |
| "epoch": 4.740764744005185, |
| "grad_norm": 0.921875, |
| "learning_rate": 5.838323472960696e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 7315, |
| "train_speed(iter/s)": 0.628483 |
| }, |
| { |
| "epoch": 4.74400518470512, |
| "grad_norm": 2.84375, |
| "learning_rate": 5.833040688544422e-05, |
| "loss": 0.025390625, |
| "memory(GiB)": 43.05, |
| "step": 7320, |
| "train_speed(iter/s)": 0.628453 |
| }, |
| { |
| "epoch": 4.747245625405055, |
| "grad_norm": 7.09375, |
| "learning_rate": 5.827756947470622e-05, |
| "loss": 0.0337890625, |
| "memory(GiB)": 43.05, |
| "step": 7325, |
| "train_speed(iter/s)": 0.628404 |
| }, |
| { |
| "epoch": 4.75048606610499, |
| "grad_norm": 11.625, |
| "learning_rate": 5.822472255807106e-05, |
| "loss": 0.0328125, |
| "memory(GiB)": 43.05, |
| "step": 7330, |
| "train_speed(iter/s)": 0.628528 |
| }, |
| { |
| "epoch": 4.7537265068049255, |
| "grad_norm": 0.484375, |
| "learning_rate": 5.817186619622771e-05, |
| "loss": 0.0478515625, |
| "memory(GiB)": 43.05, |
| "step": 7335, |
| "train_speed(iter/s)": 0.628664 |
| }, |
| { |
| "epoch": 4.756966947504861, |
| "grad_norm": 13.6875, |
| "learning_rate": 5.811900044987601e-05, |
| "loss": 0.0240234375, |
| "memory(GiB)": 43.05, |
| "step": 7340, |
| "train_speed(iter/s)": 0.628769 |
| }, |
| { |
| "epoch": 4.760207388204796, |
| "grad_norm": 0.58984375, |
| "learning_rate": 5.8066125379726576e-05, |
| "loss": 0.0365234375, |
| "memory(GiB)": 43.05, |
| "step": 7345, |
| "train_speed(iter/s)": 0.62888 |
| }, |
| { |
| "epoch": 4.763447828904731, |
| "grad_norm": 10.4375, |
| "learning_rate": 5.801324104650074e-05, |
| "loss": 0.040234375, |
| "memory(GiB)": 43.05, |
| "step": 7350, |
| "train_speed(iter/s)": 0.62902 |
| }, |
| { |
| "epoch": 4.766688269604666, |
| "grad_norm": 1.5078125, |
| "learning_rate": 5.796034751093047e-05, |
| "loss": 0.0384765625, |
| "memory(GiB)": 43.05, |
| "step": 7355, |
| "train_speed(iter/s)": 0.629189 |
| }, |
| { |
| "epoch": 4.769928710304601, |
| "grad_norm": 10.0625, |
| "learning_rate": 5.7907444833758295e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 7360, |
| "train_speed(iter/s)": 0.629383 |
| }, |
| { |
| "epoch": 4.773169151004536, |
| "grad_norm": 9.9375, |
| "learning_rate": 5.7854533075737224e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 7365, |
| "train_speed(iter/s)": 0.629448 |
| }, |
| { |
| "epoch": 4.776409591704471, |
| "grad_norm": 2.890625, |
| "learning_rate": 5.7801612297630734e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 7370, |
| "train_speed(iter/s)": 0.629513 |
| }, |
| { |
| "epoch": 4.779650032404407, |
| "grad_norm": 2.046875, |
| "learning_rate": 5.774868256021264e-05, |
| "loss": 0.035546875, |
| "memory(GiB)": 43.05, |
| "step": 7375, |
| "train_speed(iter/s)": 0.629642 |
| }, |
| { |
| "epoch": 4.7828904731043425, |
| "grad_norm": 8.5625, |
| "learning_rate": 5.769574392426702e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 7380, |
| "train_speed(iter/s)": 0.629671 |
| }, |
| { |
| "epoch": 4.786130913804278, |
| "grad_norm": 4.71875, |
| "learning_rate": 5.764279645058822e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 7385, |
| "train_speed(iter/s)": 0.629703 |
| }, |
| { |
| "epoch": 4.789371354504213, |
| "grad_norm": 4.25, |
| "learning_rate": 5.75898401999807e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 7390, |
| "train_speed(iter/s)": 0.629715 |
| }, |
| { |
| "epoch": 4.792611795204148, |
| "grad_norm": 2.375, |
| "learning_rate": 5.7536875233259036e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 7395, |
| "train_speed(iter/s)": 0.629774 |
| }, |
| { |
| "epoch": 4.795852235904083, |
| "grad_norm": 12.4375, |
| "learning_rate": 5.748390161124776e-05, |
| "loss": 0.0509765625, |
| "memory(GiB)": 43.05, |
| "step": 7400, |
| "train_speed(iter/s)": 0.62985 |
| }, |
| { |
| "epoch": 4.799092676604018, |
| "grad_norm": 6.40625, |
| "learning_rate": 5.7430919394781394e-05, |
| "loss": 0.0251953125, |
| "memory(GiB)": 43.05, |
| "step": 7405, |
| "train_speed(iter/s)": 0.629995 |
| }, |
| { |
| "epoch": 4.802333117303953, |
| "grad_norm": 5.0, |
| "learning_rate": 5.737792864470428e-05, |
| "loss": 0.0263671875, |
| "memory(GiB)": 43.05, |
| "step": 7410, |
| "train_speed(iter/s)": 0.630111 |
| }, |
| { |
| "epoch": 4.805573558003888, |
| "grad_norm": 11.5, |
| "learning_rate": 5.732492942187061e-05, |
| "loss": 0.0173828125, |
| "memory(GiB)": 43.05, |
| "step": 7415, |
| "train_speed(iter/s)": 0.630205 |
| }, |
| { |
| "epoch": 4.808813998703823, |
| "grad_norm": 3.40625, |
| "learning_rate": 5.7271921787144276e-05, |
| "loss": 0.0265625, |
| "memory(GiB)": 43.05, |
| "step": 7420, |
| "train_speed(iter/s)": 0.630346 |
| }, |
| { |
| "epoch": 4.8120544394037585, |
| "grad_norm": 1.6953125, |
| "learning_rate": 5.7218905801398846e-05, |
| "loss": 0.0306640625, |
| "memory(GiB)": 43.05, |
| "step": 7425, |
| "train_speed(iter/s)": 0.630399 |
| }, |
| { |
| "epoch": 4.8152948801036946, |
| "grad_norm": 9.875, |
| "learning_rate": 5.716588152551747e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 7430, |
| "train_speed(iter/s)": 0.63043 |
| }, |
| { |
| "epoch": 4.81853532080363, |
| "grad_norm": 13.8125, |
| "learning_rate": 5.711284902039282e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 7435, |
| "train_speed(iter/s)": 0.630488 |
| }, |
| { |
| "epoch": 4.821775761503565, |
| "grad_norm": 8.75, |
| "learning_rate": 5.7059808346927e-05, |
| "loss": 0.02421875, |
| "memory(GiB)": 43.05, |
| "step": 7440, |
| "train_speed(iter/s)": 0.630683 |
| }, |
| { |
| "epoch": 4.8250162022035, |
| "grad_norm": 15.25, |
| "learning_rate": 5.7006759566031535e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 7445, |
| "train_speed(iter/s)": 0.630663 |
| }, |
| { |
| "epoch": 4.828256642903435, |
| "grad_norm": 0.490234375, |
| "learning_rate": 5.695370273862721e-05, |
| "loss": 0.04140625, |
| "memory(GiB)": 43.05, |
| "step": 7450, |
| "train_speed(iter/s)": 0.63078 |
| }, |
| { |
| "epoch": 4.83149708360337, |
| "grad_norm": 4.40625, |
| "learning_rate": 5.6900637925644106e-05, |
| "loss": 0.019921875, |
| "memory(GiB)": 43.05, |
| "step": 7455, |
| "train_speed(iter/s)": 0.630894 |
| }, |
| { |
| "epoch": 4.834737524303305, |
| "grad_norm": 12.0, |
| "learning_rate": 5.6847565188021445e-05, |
| "loss": 0.0310546875, |
| "memory(GiB)": 43.05, |
| "step": 7460, |
| "train_speed(iter/s)": 0.631061 |
| }, |
| { |
| "epoch": 4.83797796500324, |
| "grad_norm": 3.0625, |
| "learning_rate": 5.6794484586707545e-05, |
| "loss": 0.04921875, |
| "memory(GiB)": 43.05, |
| "step": 7465, |
| "train_speed(iter/s)": 0.631133 |
| }, |
| { |
| "epoch": 4.8412184057031755, |
| "grad_norm": 16.25, |
| "learning_rate": 5.6741396182659735e-05, |
| "loss": 0.0232421875, |
| "memory(GiB)": 43.05, |
| "step": 7470, |
| "train_speed(iter/s)": 0.63119 |
| }, |
| { |
| "epoch": 4.844458846403111, |
| "grad_norm": 3.9375, |
| "learning_rate": 5.6688300036844365e-05, |
| "loss": 0.025390625, |
| "memory(GiB)": 43.05, |
| "step": 7475, |
| "train_speed(iter/s)": 0.631266 |
| }, |
| { |
| "epoch": 4.847699287103046, |
| "grad_norm": 0.5546875, |
| "learning_rate": 5.663519621023663e-05, |
| "loss": 0.0234375, |
| "memory(GiB)": 43.05, |
| "step": 7480, |
| "train_speed(iter/s)": 0.631362 |
| }, |
| { |
| "epoch": 4.850939727802981, |
| "grad_norm": 1.9609375, |
| "learning_rate": 5.658208476382053e-05, |
| "loss": 0.021484375, |
| "memory(GiB)": 43.05, |
| "step": 7485, |
| "train_speed(iter/s)": 0.63139 |
| }, |
| { |
| "epoch": 4.854180168502916, |
| "grad_norm": 3.1875, |
| "learning_rate": 5.65289657585889e-05, |
| "loss": 0.0185546875, |
| "memory(GiB)": 43.05, |
| "step": 7490, |
| "train_speed(iter/s)": 0.631378 |
| }, |
| { |
| "epoch": 4.857420609202851, |
| "grad_norm": 1.9453125, |
| "learning_rate": 5.647583925554314e-05, |
| "loss": 0.02578125, |
| "memory(GiB)": 43.05, |
| "step": 7495, |
| "train_speed(iter/s)": 0.631432 |
| }, |
| { |
| "epoch": 4.860661049902787, |
| "grad_norm": 4.21875, |
| "learning_rate": 5.642270531569336e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 7500, |
| "train_speed(iter/s)": 0.631482 |
| }, |
| { |
| "epoch": 4.863901490602722, |
| "grad_norm": 1.09375, |
| "learning_rate": 5.636956400005815e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 7505, |
| "train_speed(iter/s)": 0.631579 |
| }, |
| { |
| "epoch": 4.867141931302657, |
| "grad_norm": 11.5625, |
| "learning_rate": 5.6316415369664575e-05, |
| "loss": 0.0328125, |
| "memory(GiB)": 43.05, |
| "step": 7510, |
| "train_speed(iter/s)": 0.631606 |
| }, |
| { |
| "epoch": 4.8703823720025925, |
| "grad_norm": 13.5, |
| "learning_rate": 5.6263259485548134e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 7515, |
| "train_speed(iter/s)": 0.631747 |
| }, |
| { |
| "epoch": 4.873622812702528, |
| "grad_norm": 3.375, |
| "learning_rate": 5.621009640875262e-05, |
| "loss": 0.0185546875, |
| "memory(GiB)": 43.05, |
| "step": 7520, |
| "train_speed(iter/s)": 0.631913 |
| }, |
| { |
| "epoch": 4.876863253402463, |
| "grad_norm": 5.1875, |
| "learning_rate": 5.615692620033012e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 7525, |
| "train_speed(iter/s)": 0.631943 |
| }, |
| { |
| "epoch": 4.880103694102398, |
| "grad_norm": 12.8125, |
| "learning_rate": 5.610374892134088e-05, |
| "loss": 0.0390625, |
| "memory(GiB)": 43.05, |
| "step": 7530, |
| "train_speed(iter/s)": 0.631878 |
| }, |
| { |
| "epoch": 4.883344134802333, |
| "grad_norm": 5.125, |
| "learning_rate": 5.60505646328533e-05, |
| "loss": 0.0255859375, |
| "memory(GiB)": 43.05, |
| "step": 7535, |
| "train_speed(iter/s)": 0.631941 |
| }, |
| { |
| "epoch": 4.886584575502268, |
| "grad_norm": 10.25, |
| "learning_rate": 5.599737339594376e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 7540, |
| "train_speed(iter/s)": 0.632037 |
| }, |
| { |
| "epoch": 4.889825016202203, |
| "grad_norm": 17.75, |
| "learning_rate": 5.594417527169673e-05, |
| "loss": 0.0486328125, |
| "memory(GiB)": 43.05, |
| "step": 7545, |
| "train_speed(iter/s)": 0.632007 |
| }, |
| { |
| "epoch": 4.893065456902138, |
| "grad_norm": 10.3125, |
| "learning_rate": 5.589097032120447e-05, |
| "loss": 0.0482421875, |
| "memory(GiB)": 43.05, |
| "step": 7550, |
| "train_speed(iter/s)": 0.632156 |
| }, |
| { |
| "epoch": 4.896305897602074, |
| "grad_norm": 11.625, |
| "learning_rate": 5.583775860556717e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 7555, |
| "train_speed(iter/s)": 0.632135 |
| }, |
| { |
| "epoch": 4.8995463383020095, |
| "grad_norm": 15.5, |
| "learning_rate": 5.578454018589274e-05, |
| "loss": 0.015625, |
| "memory(GiB)": 43.05, |
| "step": 7560, |
| "train_speed(iter/s)": 0.632263 |
| }, |
| { |
| "epoch": 4.902786779001945, |
| "grad_norm": 18.0, |
| "learning_rate": 5.5731315123296834e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 7565, |
| "train_speed(iter/s)": 0.632362 |
| }, |
| { |
| "epoch": 4.90602721970188, |
| "grad_norm": 3.671875, |
| "learning_rate": 5.5678083478902655e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 7570, |
| "train_speed(iter/s)": 0.632384 |
| }, |
| { |
| "epoch": 4.909267660401815, |
| "grad_norm": 8.9375, |
| "learning_rate": 5.562484531384107e-05, |
| "loss": 0.04375, |
| "memory(GiB)": 43.05, |
| "step": 7575, |
| "train_speed(iter/s)": 0.632468 |
| }, |
| { |
| "epoch": 4.91250810110175, |
| "grad_norm": 11.5, |
| "learning_rate": 5.5571600689250335e-05, |
| "loss": 0.0271484375, |
| "memory(GiB)": 43.05, |
| "step": 7580, |
| "train_speed(iter/s)": 0.632557 |
| }, |
| { |
| "epoch": 4.915748541801685, |
| "grad_norm": 2.828125, |
| "learning_rate": 5.551834966627617e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 7585, |
| "train_speed(iter/s)": 0.632647 |
| }, |
| { |
| "epoch": 4.91898898250162, |
| "grad_norm": 1.1484375, |
| "learning_rate": 5.5465092306071666e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 7590, |
| "train_speed(iter/s)": 0.632737 |
| }, |
| { |
| "epoch": 4.922229423201555, |
| "grad_norm": 1.6953125, |
| "learning_rate": 5.541182866979715e-05, |
| "loss": 0.026953125, |
| "memory(GiB)": 43.05, |
| "step": 7595, |
| "train_speed(iter/s)": 0.632828 |
| }, |
| { |
| "epoch": 4.92546986390149, |
| "grad_norm": 2.578125, |
| "learning_rate": 5.5358558818620176e-05, |
| "loss": 0.0244140625, |
| "memory(GiB)": 43.05, |
| "step": 7600, |
| "train_speed(iter/s)": 0.633014 |
| }, |
| { |
| "epoch": 4.928710304601426, |
| "grad_norm": 3.75, |
| "learning_rate": 5.530528281371544e-05, |
| "loss": 0.0365234375, |
| "memory(GiB)": 43.05, |
| "step": 7605, |
| "train_speed(iter/s)": 0.633075 |
| }, |
| { |
| "epoch": 4.931950745301361, |
| "grad_norm": 1.8828125, |
| "learning_rate": 5.5252000716264686e-05, |
| "loss": 0.0177734375, |
| "memory(GiB)": 43.05, |
| "step": 7610, |
| "train_speed(iter/s)": 0.63301 |
| }, |
| { |
| "epoch": 4.935191186001296, |
| "grad_norm": 11.375, |
| "learning_rate": 5.5198712587456655e-05, |
| "loss": 0.0380859375, |
| "memory(GiB)": 43.05, |
| "step": 7615, |
| "train_speed(iter/s)": 0.633044 |
| }, |
| { |
| "epoch": 4.938431626701231, |
| "grad_norm": 14.0, |
| "learning_rate": 5.514541848848704e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 7620, |
| "train_speed(iter/s)": 0.633114 |
| }, |
| { |
| "epoch": 4.941672067401167, |
| "grad_norm": 6.71875, |
| "learning_rate": 5.5092118480558386e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 7625, |
| "train_speed(iter/s)": 0.633226 |
| }, |
| { |
| "epoch": 4.944912508101102, |
| "grad_norm": 12.0, |
| "learning_rate": 5.503881262487999e-05, |
| "loss": 0.0181640625, |
| "memory(GiB)": 43.05, |
| "step": 7630, |
| "train_speed(iter/s)": 0.633247 |
| }, |
| { |
| "epoch": 4.948152948801037, |
| "grad_norm": 2.28125, |
| "learning_rate": 5.4985500982667903e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 7635, |
| "train_speed(iter/s)": 0.633361 |
| }, |
| { |
| "epoch": 4.951393389500972, |
| "grad_norm": 14.25, |
| "learning_rate": 5.4932183615144785e-05, |
| "loss": 0.0421875, |
| "memory(GiB)": 43.05, |
| "step": 7640, |
| "train_speed(iter/s)": 0.633469 |
| }, |
| { |
| "epoch": 4.954633830200907, |
| "grad_norm": 9.625, |
| "learning_rate": 5.4878860583539915e-05, |
| "loss": 0.0265625, |
| "memory(GiB)": 43.05, |
| "step": 7645, |
| "train_speed(iter/s)": 0.633526 |
| }, |
| { |
| "epoch": 4.9578742709008425, |
| "grad_norm": 10.1875, |
| "learning_rate": 5.482553194908905e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 7650, |
| "train_speed(iter/s)": 0.633593 |
| }, |
| { |
| "epoch": 4.961114711600778, |
| "grad_norm": 3.84375, |
| "learning_rate": 5.477219777303435e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 7655, |
| "train_speed(iter/s)": 0.633688 |
| }, |
| { |
| "epoch": 4.964355152300713, |
| "grad_norm": 4.84375, |
| "learning_rate": 5.4718858116624416e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 7660, |
| "train_speed(iter/s)": 0.633655 |
| }, |
| { |
| "epoch": 4.967595593000648, |
| "grad_norm": 1.9296875, |
| "learning_rate": 5.466551304111408e-05, |
| "loss": 0.0173828125, |
| "memory(GiB)": 43.05, |
| "step": 7665, |
| "train_speed(iter/s)": 0.633793 |
| }, |
| { |
| "epoch": 4.970836033700583, |
| "grad_norm": 0.458984375, |
| "learning_rate": 5.461216260776442e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 7670, |
| "train_speed(iter/s)": 0.633856 |
| }, |
| { |
| "epoch": 4.974076474400518, |
| "grad_norm": 9.5625, |
| "learning_rate": 5.455880687784266e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 7675, |
| "train_speed(iter/s)": 0.633919 |
| }, |
| { |
| "epoch": 4.977316915100454, |
| "grad_norm": 10.9375, |
| "learning_rate": 5.450544591262212e-05, |
| "loss": 0.037890625, |
| "memory(GiB)": 43.05, |
| "step": 7680, |
| "train_speed(iter/s)": 0.633972 |
| }, |
| { |
| "epoch": 4.980557355800389, |
| "grad_norm": 0.74609375, |
| "learning_rate": 5.44520797733821e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 7685, |
| "train_speed(iter/s)": 0.633993 |
| }, |
| { |
| "epoch": 4.983797796500324, |
| "grad_norm": 10.6875, |
| "learning_rate": 5.4398708521407895e-05, |
| "loss": 0.019140625, |
| "memory(GiB)": 43.05, |
| "step": 7690, |
| "train_speed(iter/s)": 0.634048 |
| }, |
| { |
| "epoch": 4.9870382372002595, |
| "grad_norm": 3.5, |
| "learning_rate": 5.434533221799062e-05, |
| "loss": 0.04453125, |
| "memory(GiB)": 43.05, |
| "step": 7695, |
| "train_speed(iter/s)": 0.634109 |
| }, |
| { |
| "epoch": 4.990278677900195, |
| "grad_norm": 3.3125, |
| "learning_rate": 5.429195092442721e-05, |
| "loss": 0.019140625, |
| "memory(GiB)": 43.05, |
| "step": 7700, |
| "train_speed(iter/s)": 0.634221 |
| }, |
| { |
| "epoch": 4.99351911860013, |
| "grad_norm": 16.0, |
| "learning_rate": 5.423856470202036e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 7705, |
| "train_speed(iter/s)": 0.634178 |
| }, |
| { |
| "epoch": 4.996759559300065, |
| "grad_norm": 14.4375, |
| "learning_rate": 5.4185173612078365e-05, |
| "loss": 0.0275390625, |
| "memory(GiB)": 43.05, |
| "step": 7710, |
| "train_speed(iter/s)": 0.634191 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 14.0625, |
| "learning_rate": 5.413177771591515e-05, |
| "loss": 0.0271484375, |
| "memory(GiB)": 43.05, |
| "step": 7715, |
| "train_speed(iter/s)": 0.634304 |
| }, |
| { |
| "epoch": 5.003240440699935, |
| "grad_norm": 5.71875, |
| "learning_rate": 5.407837707485015e-05, |
| "loss": 0.0359375, |
| "memory(GiB)": 43.05, |
| "step": 7720, |
| "train_speed(iter/s)": 0.634305 |
| }, |
| { |
| "epoch": 5.00648088139987, |
| "grad_norm": 10.3125, |
| "learning_rate": 5.402497175020828e-05, |
| "loss": 0.0263671875, |
| "memory(GiB)": 43.05, |
| "step": 7725, |
| "train_speed(iter/s)": 0.634454 |
| }, |
| { |
| "epoch": 5.009721322099805, |
| "grad_norm": 12.125, |
| "learning_rate": 5.397156180331976e-05, |
| "loss": 0.0263671875, |
| "memory(GiB)": 43.05, |
| "step": 7730, |
| "train_speed(iter/s)": 0.634515 |
| }, |
| { |
| "epoch": 5.0129617627997405, |
| "grad_norm": 1.6875, |
| "learning_rate": 5.39181472955202e-05, |
| "loss": 0.0529296875, |
| "memory(GiB)": 43.05, |
| "step": 7735, |
| "train_speed(iter/s)": 0.63457 |
| }, |
| { |
| "epoch": 5.016202203499676, |
| "grad_norm": 11.4375, |
| "learning_rate": 5.386472828815039e-05, |
| "loss": 0.0578125, |
| "memory(GiB)": 43.05, |
| "step": 7740, |
| "train_speed(iter/s)": 0.634627 |
| }, |
| { |
| "epoch": 5.019442644199611, |
| "grad_norm": 10.5, |
| "learning_rate": 5.38113048425563e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 7745, |
| "train_speed(iter/s)": 0.634651 |
| }, |
| { |
| "epoch": 5.022683084899547, |
| "grad_norm": 0.64453125, |
| "learning_rate": 5.375787702008903e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 7750, |
| "train_speed(iter/s)": 0.63465 |
| }, |
| { |
| "epoch": 5.025923525599482, |
| "grad_norm": 13.8125, |
| "learning_rate": 5.370444488210465e-05, |
| "loss": 0.0427734375, |
| "memory(GiB)": 43.05, |
| "step": 7755, |
| "train_speed(iter/s)": 0.633812 |
| }, |
| { |
| "epoch": 5.029163966299417, |
| "grad_norm": 10.4375, |
| "learning_rate": 5.365100848996425e-05, |
| "loss": 0.0173828125, |
| "memory(GiB)": 43.05, |
| "step": 7760, |
| "train_speed(iter/s)": 0.633824 |
| }, |
| { |
| "epoch": 5.032404406999352, |
| "grad_norm": 1.1953125, |
| "learning_rate": 5.359756790503375e-05, |
| "loss": 0.03359375, |
| "memory(GiB)": 43.05, |
| "step": 7765, |
| "train_speed(iter/s)": 0.633735 |
| }, |
| { |
| "epoch": 5.035644847699287, |
| "grad_norm": 4.84375, |
| "learning_rate": 5.354412318868391e-05, |
| "loss": 0.01875, |
| "memory(GiB)": 43.05, |
| "step": 7770, |
| "train_speed(iter/s)": 0.633869 |
| }, |
| { |
| "epoch": 5.038885288399222, |
| "grad_norm": 3.65625, |
| "learning_rate": 5.349067440229024e-05, |
| "loss": 0.0171875, |
| "memory(GiB)": 43.05, |
| "step": 7775, |
| "train_speed(iter/s)": 0.633922 |
| }, |
| { |
| "epoch": 5.0421257290991575, |
| "grad_norm": 14.5, |
| "learning_rate": 5.343722160723292e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 7780, |
| "train_speed(iter/s)": 0.634081 |
| }, |
| { |
| "epoch": 5.045366169799093, |
| "grad_norm": 10.0, |
| "learning_rate": 5.33837648648967e-05, |
| "loss": 0.01640625, |
| "memory(GiB)": 43.05, |
| "step": 7785, |
| "train_speed(iter/s)": 0.634081 |
| }, |
| { |
| "epoch": 5.048606610499028, |
| "grad_norm": 1.3984375, |
| "learning_rate": 5.333030423667092e-05, |
| "loss": 0.0158203125, |
| "memory(GiB)": 43.05, |
| "step": 7790, |
| "train_speed(iter/s)": 0.634146 |
| }, |
| { |
| "epoch": 5.051847051198963, |
| "grad_norm": 16.25, |
| "learning_rate": 5.327683978394935e-05, |
| "loss": 0.0267578125, |
| "memory(GiB)": 43.05, |
| "step": 7795, |
| "train_speed(iter/s)": 0.63421 |
| }, |
| { |
| "epoch": 5.055087491898898, |
| "grad_norm": 3.328125, |
| "learning_rate": 5.322337156813014e-05, |
| "loss": 0.0232421875, |
| "memory(GiB)": 43.05, |
| "step": 7800, |
| "train_speed(iter/s)": 0.634236 |
| }, |
| { |
| "epoch": 5.058327932598833, |
| "grad_norm": 12.0, |
| "learning_rate": 5.31698996506158e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 7805, |
| "train_speed(iter/s)": 0.634233 |
| }, |
| { |
| "epoch": 5.061568373298769, |
| "grad_norm": 13.625, |
| "learning_rate": 5.3116424092813063e-05, |
| "loss": 0.0185546875, |
| "memory(GiB)": 43.05, |
| "step": 7810, |
| "train_speed(iter/s)": 0.634187 |
| }, |
| { |
| "epoch": 5.064808813998704, |
| "grad_norm": 1.9765625, |
| "learning_rate": 5.306294495613284e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 7815, |
| "train_speed(iter/s)": 0.634316 |
| }, |
| { |
| "epoch": 5.068049254698639, |
| "grad_norm": 2.015625, |
| "learning_rate": 5.3009462301990174e-05, |
| "loss": 0.0412109375, |
| "memory(GiB)": 43.05, |
| "step": 7820, |
| "train_speed(iter/s)": 0.634324 |
| }, |
| { |
| "epoch": 5.071289695398574, |
| "grad_norm": 14.375, |
| "learning_rate": 5.295597619180411e-05, |
| "loss": 0.0138671875, |
| "memory(GiB)": 43.05, |
| "step": 7825, |
| "train_speed(iter/s)": 0.63442 |
| }, |
| { |
| "epoch": 5.07453013609851, |
| "grad_norm": 2.453125, |
| "learning_rate": 5.290248668699771e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 7830, |
| "train_speed(iter/s)": 0.634437 |
| }, |
| { |
| "epoch": 5.077770576798445, |
| "grad_norm": 10.9375, |
| "learning_rate": 5.284899384899791e-05, |
| "loss": 0.028125, |
| "memory(GiB)": 43.05, |
| "step": 7835, |
| "train_speed(iter/s)": 0.634457 |
| }, |
| { |
| "epoch": 5.08101101749838, |
| "grad_norm": 12.9375, |
| "learning_rate": 5.279549773923547e-05, |
| "loss": 0.0240234375, |
| "memory(GiB)": 43.05, |
| "step": 7840, |
| "train_speed(iter/s)": 0.63448 |
| }, |
| { |
| "epoch": 5.084251458198315, |
| "grad_norm": 10.5, |
| "learning_rate": 5.274199841914489e-05, |
| "loss": 0.02421875, |
| "memory(GiB)": 43.05, |
| "step": 7845, |
| "train_speed(iter/s)": 0.634588 |
| }, |
| { |
| "epoch": 5.08749189889825, |
| "grad_norm": 0.41015625, |
| "learning_rate": 5.268849595016441e-05, |
| "loss": 0.0267578125, |
| "memory(GiB)": 43.05, |
| "step": 7850, |
| "train_speed(iter/s)": 0.634681 |
| }, |
| { |
| "epoch": 5.090732339598185, |
| "grad_norm": 12.875, |
| "learning_rate": 5.263499039373583e-05, |
| "loss": 0.033203125, |
| "memory(GiB)": 43.05, |
| "step": 7855, |
| "train_speed(iter/s)": 0.63476 |
| }, |
| { |
| "epoch": 5.09397278029812, |
| "grad_norm": 12.25, |
| "learning_rate": 5.2581481811304534e-05, |
| "loss": 0.0267578125, |
| "memory(GiB)": 43.05, |
| "step": 7860, |
| "train_speed(iter/s)": 0.63484 |
| }, |
| { |
| "epoch": 5.097213220998055, |
| "grad_norm": 4.34375, |
| "learning_rate": 5.252797026431937e-05, |
| "loss": 0.0162109375, |
| "memory(GiB)": 43.05, |
| "step": 7865, |
| "train_speed(iter/s)": 0.634867 |
| }, |
| { |
| "epoch": 5.1004536616979905, |
| "grad_norm": 6.84375, |
| "learning_rate": 5.247445581423257e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 7870, |
| "train_speed(iter/s)": 0.634923 |
| }, |
| { |
| "epoch": 5.1036941023979265, |
| "grad_norm": 2.59375, |
| "learning_rate": 5.242093852249973e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 7875, |
| "train_speed(iter/s)": 0.635026 |
| }, |
| { |
| "epoch": 5.106934543097862, |
| "grad_norm": 2.046875, |
| "learning_rate": 5.236741845057971e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 7880, |
| "train_speed(iter/s)": 0.635112 |
| }, |
| { |
| "epoch": 5.110174983797797, |
| "grad_norm": 15.6875, |
| "learning_rate": 5.2313895659934516e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 7885, |
| "train_speed(iter/s)": 0.635123 |
| }, |
| { |
| "epoch": 5.113415424497732, |
| "grad_norm": 0.60546875, |
| "learning_rate": 5.226037021202932e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 7890, |
| "train_speed(iter/s)": 0.635182 |
| }, |
| { |
| "epoch": 5.116655865197667, |
| "grad_norm": 7.0, |
| "learning_rate": 5.220684216833236e-05, |
| "loss": 0.0224609375, |
| "memory(GiB)": 43.05, |
| "step": 7895, |
| "train_speed(iter/s)": 0.635271 |
| }, |
| { |
| "epoch": 5.119896305897602, |
| "grad_norm": 2.109375, |
| "learning_rate": 5.215331159031479e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 7900, |
| "train_speed(iter/s)": 0.635228 |
| }, |
| { |
| "epoch": 5.123136746597537, |
| "grad_norm": 8.625, |
| "learning_rate": 5.209977853945076e-05, |
| "loss": 0.0373046875, |
| "memory(GiB)": 43.05, |
| "step": 7905, |
| "train_speed(iter/s)": 0.635387 |
| }, |
| { |
| "epoch": 5.126377187297472, |
| "grad_norm": 12.75, |
| "learning_rate": 5.204624307721719e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 7910, |
| "train_speed(iter/s)": 0.635298 |
| }, |
| { |
| "epoch": 5.1296176279974075, |
| "grad_norm": 11.0625, |
| "learning_rate": 5.1992705265093775e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 7915, |
| "train_speed(iter/s)": 0.635456 |
| }, |
| { |
| "epoch": 5.132858068697343, |
| "grad_norm": 0.6875, |
| "learning_rate": 5.1939165164562974e-05, |
| "loss": 0.0267578125, |
| "memory(GiB)": 43.05, |
| "step": 7920, |
| "train_speed(iter/s)": 0.635471 |
| }, |
| { |
| "epoch": 5.136098509397278, |
| "grad_norm": 12.625, |
| "learning_rate": 5.188562283710977e-05, |
| "loss": 0.024609375, |
| "memory(GiB)": 43.05, |
| "step": 7925, |
| "train_speed(iter/s)": 0.635584 |
| }, |
| { |
| "epoch": 5.139338950097213, |
| "grad_norm": 9.9375, |
| "learning_rate": 5.1832078344221804e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 7930, |
| "train_speed(iter/s)": 0.635608 |
| }, |
| { |
| "epoch": 5.142579390797148, |
| "grad_norm": 1.3125, |
| "learning_rate": 5.177853174738915e-05, |
| "loss": 0.016796875, |
| "memory(GiB)": 43.05, |
| "step": 7935, |
| "train_speed(iter/s)": 0.635558 |
| }, |
| { |
| "epoch": 5.145819831497084, |
| "grad_norm": 0.6328125, |
| "learning_rate": 5.1724983108104305e-05, |
| "loss": 0.0517578125, |
| "memory(GiB)": 43.05, |
| "step": 7940, |
| "train_speed(iter/s)": 0.63554 |
| }, |
| { |
| "epoch": 5.149060272197019, |
| "grad_norm": 0.734375, |
| "learning_rate": 5.1671432487862106e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 7945, |
| "train_speed(iter/s)": 0.635652 |
| }, |
| { |
| "epoch": 5.152300712896954, |
| "grad_norm": 0.953125, |
| "learning_rate": 5.1617879948159684e-05, |
| "loss": 0.0203125, |
| "memory(GiB)": 43.05, |
| "step": 7950, |
| "train_speed(iter/s)": 0.635712 |
| }, |
| { |
| "epoch": 5.155541153596889, |
| "grad_norm": 3.40625, |
| "learning_rate": 5.156432555049636e-05, |
| "loss": 0.0244140625, |
| "memory(GiB)": 43.05, |
| "step": 7955, |
| "train_speed(iter/s)": 0.635774 |
| }, |
| { |
| "epoch": 5.1587815942968245, |
| "grad_norm": 13.5625, |
| "learning_rate": 5.151076935637359e-05, |
| "loss": 0.0244140625, |
| "memory(GiB)": 43.05, |
| "step": 7960, |
| "train_speed(iter/s)": 0.635854 |
| }, |
| { |
| "epoch": 5.16202203499676, |
| "grad_norm": 6.59375, |
| "learning_rate": 5.1457211427294914e-05, |
| "loss": 0.0220703125, |
| "memory(GiB)": 43.05, |
| "step": 7965, |
| "train_speed(iter/s)": 0.635986 |
| }, |
| { |
| "epoch": 5.165262475696695, |
| "grad_norm": 11.5625, |
| "learning_rate": 5.140365182476583e-05, |
| "loss": 0.0232421875, |
| "memory(GiB)": 43.05, |
| "step": 7970, |
| "train_speed(iter/s)": 0.635978 |
| }, |
| { |
| "epoch": 5.16850291639663, |
| "grad_norm": 1.3671875, |
| "learning_rate": 5.1350090610293765e-05, |
| "loss": 0.0359375, |
| "memory(GiB)": 43.05, |
| "step": 7975, |
| "train_speed(iter/s)": 0.636059 |
| }, |
| { |
| "epoch": 5.171743357096565, |
| "grad_norm": 11.625, |
| "learning_rate": 5.129652784538803e-05, |
| "loss": 0.020703125, |
| "memory(GiB)": 43.05, |
| "step": 7980, |
| "train_speed(iter/s)": 0.636064 |
| }, |
| { |
| "epoch": 5.1749837977965, |
| "grad_norm": 7.6875, |
| "learning_rate": 5.124296359155968e-05, |
| "loss": 0.0140625, |
| "memory(GiB)": 43.05, |
| "step": 7985, |
| "train_speed(iter/s)": 0.636167 |
| }, |
| { |
| "epoch": 5.178224238496435, |
| "grad_norm": 14.0625, |
| "learning_rate": 5.118939791032148e-05, |
| "loss": 0.0130859375, |
| "memory(GiB)": 43.05, |
| "step": 7990, |
| "train_speed(iter/s)": 0.636255 |
| }, |
| { |
| "epoch": 5.18146467919637, |
| "grad_norm": 4.90625, |
| "learning_rate": 5.113583086318786e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 7995, |
| "train_speed(iter/s)": 0.636389 |
| }, |
| { |
| "epoch": 5.1847051198963054, |
| "grad_norm": 13.0, |
| "learning_rate": 5.108226251167483e-05, |
| "loss": 0.03359375, |
| "memory(GiB)": 43.05, |
| "step": 8000, |
| "train_speed(iter/s)": 0.6365 |
| }, |
| { |
| "epoch": 5.1879455605962415, |
| "grad_norm": 11.0, |
| "learning_rate": 5.1028692917299825e-05, |
| "loss": 0.054296875, |
| "memory(GiB)": 43.05, |
| "step": 8005, |
| "train_speed(iter/s)": 0.604173 |
| }, |
| { |
| "epoch": 5.191186001296177, |
| "grad_norm": 2.125, |
| "learning_rate": 5.097512214158179e-05, |
| "loss": 0.05, |
| "memory(GiB)": 43.05, |
| "step": 8010, |
| "train_speed(iter/s)": 0.604167 |
| }, |
| { |
| "epoch": 5.194426441996112, |
| "grad_norm": 2.71875, |
| "learning_rate": 5.0921550246040974e-05, |
| "loss": 0.0228515625, |
| "memory(GiB)": 43.05, |
| "step": 8015, |
| "train_speed(iter/s)": 0.604346 |
| }, |
| { |
| "epoch": 5.197666882696047, |
| "grad_norm": 4.8125, |
| "learning_rate": 5.0867977292198935e-05, |
| "loss": 0.029296875, |
| "memory(GiB)": 43.05, |
| "step": 8020, |
| "train_speed(iter/s)": 0.604425 |
| }, |
| { |
| "epoch": 5.200907323395982, |
| "grad_norm": 13.5625, |
| "learning_rate": 5.0814403341578444e-05, |
| "loss": 0.03359375, |
| "memory(GiB)": 43.05, |
| "step": 8025, |
| "train_speed(iter/s)": 0.604499 |
| }, |
| { |
| "epoch": 5.204147764095917, |
| "grad_norm": 14.8125, |
| "learning_rate": 5.076082845570342e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 8030, |
| "train_speed(iter/s)": 0.604584 |
| }, |
| { |
| "epoch": 5.207388204795852, |
| "grad_norm": 15.25, |
| "learning_rate": 5.070725269609884e-05, |
| "loss": 0.02265625, |
| "memory(GiB)": 43.05, |
| "step": 8035, |
| "train_speed(iter/s)": 0.604694 |
| }, |
| { |
| "epoch": 5.210628645495787, |
| "grad_norm": 11.875, |
| "learning_rate": 5.065367612429071e-05, |
| "loss": 0.022265625, |
| "memory(GiB)": 43.05, |
| "step": 8040, |
| "train_speed(iter/s)": 0.604773 |
| }, |
| { |
| "epoch": 5.213869086195722, |
| "grad_norm": 1.140625, |
| "learning_rate": 5.060009880180592e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 8045, |
| "train_speed(iter/s)": 0.604909 |
| }, |
| { |
| "epoch": 5.2171095268956575, |
| "grad_norm": 13.5, |
| "learning_rate": 5.054652079017229e-05, |
| "loss": 0.041796875, |
| "memory(GiB)": 43.05, |
| "step": 8050, |
| "train_speed(iter/s)": 0.604926 |
| }, |
| { |
| "epoch": 5.220349967595593, |
| "grad_norm": 0.59765625, |
| "learning_rate": 5.049294215091839e-05, |
| "loss": 0.0349609375, |
| "memory(GiB)": 43.05, |
| "step": 8055, |
| "train_speed(iter/s)": 0.605073 |
| }, |
| { |
| "epoch": 5.223590408295528, |
| "grad_norm": 4.96875, |
| "learning_rate": 5.04393629455735e-05, |
| "loss": 0.016796875, |
| "memory(GiB)": 43.05, |
| "step": 8060, |
| "train_speed(iter/s)": 0.605121 |
| }, |
| { |
| "epoch": 5.226830848995464, |
| "grad_norm": 10.75, |
| "learning_rate": 5.038578323566757e-05, |
| "loss": 0.0408203125, |
| "memory(GiB)": 43.05, |
| "step": 8065, |
| "train_speed(iter/s)": 0.605194 |
| }, |
| { |
| "epoch": 5.230071289695399, |
| "grad_norm": 2.265625, |
| "learning_rate": 5.0332203082731165e-05, |
| "loss": 0.0162109375, |
| "memory(GiB)": 43.05, |
| "step": 8070, |
| "train_speed(iter/s)": 0.605244 |
| }, |
| { |
| "epoch": 5.233311730395334, |
| "grad_norm": 0.6171875, |
| "learning_rate": 5.027862254829527e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 8075, |
| "train_speed(iter/s)": 0.605317 |
| }, |
| { |
| "epoch": 5.236552171095269, |
| "grad_norm": 1.3359375, |
| "learning_rate": 5.0225041693891396e-05, |
| "loss": 0.0201171875, |
| "memory(GiB)": 43.05, |
| "step": 8080, |
| "train_speed(iter/s)": 0.604723 |
| }, |
| { |
| "epoch": 5.239792611795204, |
| "grad_norm": 1.421875, |
| "learning_rate": 5.0171460581051364e-05, |
| "loss": 0.018359375, |
| "memory(GiB)": 43.05, |
| "step": 8085, |
| "train_speed(iter/s)": 0.60472 |
| }, |
| { |
| "epoch": 5.243033052495139, |
| "grad_norm": 3.890625, |
| "learning_rate": 5.011787927130732e-05, |
| "loss": 0.0240234375, |
| "memory(GiB)": 43.05, |
| "step": 8090, |
| "train_speed(iter/s)": 0.604836 |
| }, |
| { |
| "epoch": 5.2462734931950745, |
| "grad_norm": 12.0, |
| "learning_rate": 5.006429782619162e-05, |
| "loss": 0.0244140625, |
| "memory(GiB)": 43.05, |
| "step": 8095, |
| "train_speed(iter/s)": 0.604937 |
| }, |
| { |
| "epoch": 5.24951393389501, |
| "grad_norm": 1.03125, |
| "learning_rate": 5.0010716307236796e-05, |
| "loss": 0.042578125, |
| "memory(GiB)": 43.05, |
| "step": 8100, |
| "train_speed(iter/s)": 0.604993 |
| }, |
| { |
| "epoch": 5.252754374594945, |
| "grad_norm": 8.25, |
| "learning_rate": 4.995713477597546e-05, |
| "loss": 0.0296875, |
| "memory(GiB)": 43.05, |
| "step": 8105, |
| "train_speed(iter/s)": 0.605031 |
| }, |
| { |
| "epoch": 5.25599481529488, |
| "grad_norm": 1.46875, |
| "learning_rate": 4.990355329394019e-05, |
| "loss": 0.0310546875, |
| "memory(GiB)": 43.05, |
| "step": 8110, |
| "train_speed(iter/s)": 0.60515 |
| }, |
| { |
| "epoch": 5.259235255994815, |
| "grad_norm": 7.1875, |
| "learning_rate": 4.984997192266359e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 8115, |
| "train_speed(iter/s)": 0.605281 |
| }, |
| { |
| "epoch": 5.26247569669475, |
| "grad_norm": 7.4375, |
| "learning_rate": 4.9796390723678085e-05, |
| "loss": 0.0224609375, |
| "memory(GiB)": 43.05, |
| "step": 8120, |
| "train_speed(iter/s)": 0.605418 |
| }, |
| { |
| "epoch": 5.265716137394685, |
| "grad_norm": 1.8984375, |
| "learning_rate": 4.97428097585159e-05, |
| "loss": 0.016796875, |
| "memory(GiB)": 43.05, |
| "step": 8125, |
| "train_speed(iter/s)": 0.605487 |
| }, |
| { |
| "epoch": 5.268956578094621, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.968922908870901e-05, |
| "loss": 0.01875, |
| "memory(GiB)": 43.05, |
| "step": 8130, |
| "train_speed(iter/s)": 0.605636 |
| }, |
| { |
| "epoch": 5.272197018794556, |
| "grad_norm": 1.4921875, |
| "learning_rate": 4.9635648775789075e-05, |
| "loss": 0.0244140625, |
| "memory(GiB)": 43.05, |
| "step": 8135, |
| "train_speed(iter/s)": 0.605722 |
| }, |
| { |
| "epoch": 5.2754374594944915, |
| "grad_norm": 17.0, |
| "learning_rate": 4.958206888128726e-05, |
| "loss": 0.0482421875, |
| "memory(GiB)": 43.05, |
| "step": 8140, |
| "train_speed(iter/s)": 0.605755 |
| }, |
| { |
| "epoch": 5.278677900194427, |
| "grad_norm": 10.125, |
| "learning_rate": 4.9528489466734326e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 8145, |
| "train_speed(iter/s)": 0.605841 |
| }, |
| { |
| "epoch": 5.281918340894362, |
| "grad_norm": 9.25, |
| "learning_rate": 4.947491059366049e-05, |
| "loss": 0.0255859375, |
| "memory(GiB)": 43.05, |
| "step": 8150, |
| "train_speed(iter/s)": 0.606016 |
| }, |
| { |
| "epoch": 5.285158781594297, |
| "grad_norm": 2.703125, |
| "learning_rate": 4.942133232359527e-05, |
| "loss": 0.0390625, |
| "memory(GiB)": 43.05, |
| "step": 8155, |
| "train_speed(iter/s)": 0.60617 |
| }, |
| { |
| "epoch": 5.288399222294232, |
| "grad_norm": 0.81640625, |
| "learning_rate": 4.9367754718067566e-05, |
| "loss": 0.0251953125, |
| "memory(GiB)": 43.05, |
| "step": 8160, |
| "train_speed(iter/s)": 0.606285 |
| }, |
| { |
| "epoch": 5.291639662994167, |
| "grad_norm": 3.765625, |
| "learning_rate": 4.93141778386055e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 8165, |
| "train_speed(iter/s)": 0.60636 |
| }, |
| { |
| "epoch": 5.294880103694102, |
| "grad_norm": 10.1875, |
| "learning_rate": 4.9260601746736315e-05, |
| "loss": 0.0349609375, |
| "memory(GiB)": 43.05, |
| "step": 8170, |
| "train_speed(iter/s)": 0.606432 |
| }, |
| { |
| "epoch": 5.298120544394037, |
| "grad_norm": 1.453125, |
| "learning_rate": 4.92070265039864e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 8175, |
| "train_speed(iter/s)": 0.606522 |
| }, |
| { |
| "epoch": 5.3013609850939725, |
| "grad_norm": 1.5546875, |
| "learning_rate": 4.9153452171881184e-05, |
| "loss": 0.0458984375, |
| "memory(GiB)": 43.05, |
| "step": 8180, |
| "train_speed(iter/s)": 0.606545 |
| }, |
| { |
| "epoch": 5.304601425793908, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.909987881194497e-05, |
| "loss": 0.03359375, |
| "memory(GiB)": 43.05, |
| "step": 8185, |
| "train_speed(iter/s)": 0.606695 |
| }, |
| { |
| "epoch": 5.307841866493844, |
| "grad_norm": 0.65625, |
| "learning_rate": 4.9046306485701e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 8190, |
| "train_speed(iter/s)": 0.606869 |
| }, |
| { |
| "epoch": 5.311082307193779, |
| "grad_norm": 10.875, |
| "learning_rate": 4.899273525467135e-05, |
| "loss": 0.0248046875, |
| "memory(GiB)": 43.05, |
| "step": 8195, |
| "train_speed(iter/s)": 0.606882 |
| }, |
| { |
| "epoch": 5.314322747893714, |
| "grad_norm": 4.65625, |
| "learning_rate": 4.893916518037678e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 8200, |
| "train_speed(iter/s)": 0.606937 |
| }, |
| { |
| "epoch": 5.317563188593649, |
| "grad_norm": 8.9375, |
| "learning_rate": 4.888559632433677e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 8205, |
| "train_speed(iter/s)": 0.607074 |
| }, |
| { |
| "epoch": 5.320803629293584, |
| "grad_norm": 16.0, |
| "learning_rate": 4.88320287480694e-05, |
| "loss": 0.02109375, |
| "memory(GiB)": 43.05, |
| "step": 8210, |
| "train_speed(iter/s)": 0.607103 |
| }, |
| { |
| "epoch": 5.324044069993519, |
| "grad_norm": 3.890625, |
| "learning_rate": 4.8778462513091214e-05, |
| "loss": 0.0421875, |
| "memory(GiB)": 43.05, |
| "step": 8215, |
| "train_speed(iter/s)": 0.607146 |
| }, |
| { |
| "epoch": 5.327284510693454, |
| "grad_norm": 3.984375, |
| "learning_rate": 4.872489768091729e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 8220, |
| "train_speed(iter/s)": 0.607161 |
| }, |
| { |
| "epoch": 5.330524951393389, |
| "grad_norm": 2.1875, |
| "learning_rate": 4.867133431306108e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 8225, |
| "train_speed(iter/s)": 0.607205 |
| }, |
| { |
| "epoch": 5.333765392093325, |
| "grad_norm": 12.25, |
| "learning_rate": 4.8617772471034335e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 8230, |
| "train_speed(iter/s)": 0.607379 |
| }, |
| { |
| "epoch": 5.33700583279326, |
| "grad_norm": 1.640625, |
| "learning_rate": 4.856421221634705e-05, |
| "loss": 0.0220703125, |
| "memory(GiB)": 43.05, |
| "step": 8235, |
| "train_speed(iter/s)": 0.607384 |
| }, |
| { |
| "epoch": 5.340246273493195, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.851065361050743e-05, |
| "loss": 0.0146484375, |
| "memory(GiB)": 43.05, |
| "step": 8240, |
| "train_speed(iter/s)": 0.607513 |
| }, |
| { |
| "epoch": 5.34348671419313, |
| "grad_norm": 1.4296875, |
| "learning_rate": 4.845709671502178e-05, |
| "loss": 0.032421875, |
| "memory(GiB)": 43.05, |
| "step": 8245, |
| "train_speed(iter/s)": 0.607557 |
| }, |
| { |
| "epoch": 5.346727154893065, |
| "grad_norm": 11.0625, |
| "learning_rate": 4.840354159139438e-05, |
| "loss": 0.0556640625, |
| "memory(GiB)": 43.05, |
| "step": 8250, |
| "train_speed(iter/s)": 0.607691 |
| }, |
| { |
| "epoch": 5.349967595593001, |
| "grad_norm": 2.375, |
| "learning_rate": 4.8349988301127555e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 8255, |
| "train_speed(iter/s)": 0.607817 |
| }, |
| { |
| "epoch": 5.353208036292936, |
| "grad_norm": 1.3828125, |
| "learning_rate": 4.82964369057215e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 8260, |
| "train_speed(iter/s)": 0.607898 |
| }, |
| { |
| "epoch": 5.356448476992871, |
| "grad_norm": 3.578125, |
| "learning_rate": 4.8242887466674194e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 8265, |
| "train_speed(iter/s)": 0.608052 |
| }, |
| { |
| "epoch": 5.359688917692806, |
| "grad_norm": 6.9375, |
| "learning_rate": 4.818934004548142e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 8270, |
| "train_speed(iter/s)": 0.608099 |
| }, |
| { |
| "epoch": 5.3629293583927415, |
| "grad_norm": 1.7578125, |
| "learning_rate": 4.8135794703636643e-05, |
| "loss": 0.034765625, |
| "memory(GiB)": 43.05, |
| "step": 8275, |
| "train_speed(iter/s)": 0.608232 |
| }, |
| { |
| "epoch": 5.366169799092677, |
| "grad_norm": 2.21875, |
| "learning_rate": 4.808225150263088e-05, |
| "loss": 0.034375, |
| "memory(GiB)": 43.05, |
| "step": 8280, |
| "train_speed(iter/s)": 0.60837 |
| }, |
| { |
| "epoch": 5.369410239792612, |
| "grad_norm": 8.0, |
| "learning_rate": 4.802871050395276e-05, |
| "loss": 0.015234375, |
| "memory(GiB)": 43.05, |
| "step": 8285, |
| "train_speed(iter/s)": 0.608458 |
| }, |
| { |
| "epoch": 5.372650680492547, |
| "grad_norm": 15.25, |
| "learning_rate": 4.797517176908836e-05, |
| "loss": 0.0275390625, |
| "memory(GiB)": 43.05, |
| "step": 8290, |
| "train_speed(iter/s)": 0.60859 |
| }, |
| { |
| "epoch": 5.375891121192482, |
| "grad_norm": 0.66015625, |
| "learning_rate": 4.792163535952113e-05, |
| "loss": 0.016796875, |
| "memory(GiB)": 43.05, |
| "step": 8295, |
| "train_speed(iter/s)": 0.608648 |
| }, |
| { |
| "epoch": 5.379131561892417, |
| "grad_norm": 6.3125, |
| "learning_rate": 4.786810133673188e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 8300, |
| "train_speed(iter/s)": 0.608736 |
| }, |
| { |
| "epoch": 5.382372002592352, |
| "grad_norm": 0.84375, |
| "learning_rate": 4.781456976219869e-05, |
| "loss": 0.021875, |
| "memory(GiB)": 43.05, |
| "step": 8305, |
| "train_speed(iter/s)": 0.608824 |
| }, |
| { |
| "epoch": 5.385612443292287, |
| "grad_norm": 2.484375, |
| "learning_rate": 4.776104069739677e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 8310, |
| "train_speed(iter/s)": 0.608935 |
| }, |
| { |
| "epoch": 5.388852883992223, |
| "grad_norm": 15.125, |
| "learning_rate": 4.770751420379852e-05, |
| "loss": 0.0490234375, |
| "memory(GiB)": 43.05, |
| "step": 8315, |
| "train_speed(iter/s)": 0.608995 |
| }, |
| { |
| "epoch": 5.3920933246921585, |
| "grad_norm": 2.15625, |
| "learning_rate": 4.7653990342873354e-05, |
| "loss": 0.0373046875, |
| "memory(GiB)": 43.05, |
| "step": 8320, |
| "train_speed(iter/s)": 0.609089 |
| }, |
| { |
| "epoch": 5.395333765392094, |
| "grad_norm": 0.578125, |
| "learning_rate": 4.7600469176087634e-05, |
| "loss": 0.03359375, |
| "memory(GiB)": 43.05, |
| "step": 8325, |
| "train_speed(iter/s)": 0.609144 |
| }, |
| { |
| "epoch": 5.398574206092029, |
| "grad_norm": 12.375, |
| "learning_rate": 4.754695076490467e-05, |
| "loss": 0.02578125, |
| "memory(GiB)": 43.05, |
| "step": 8330, |
| "train_speed(iter/s)": 0.609278 |
| }, |
| { |
| "epoch": 5.401814646791964, |
| "grad_norm": 8.5, |
| "learning_rate": 4.7493435170784615e-05, |
| "loss": 0.0353515625, |
| "memory(GiB)": 43.05, |
| "step": 8335, |
| "train_speed(iter/s)": 0.609252 |
| }, |
| { |
| "epoch": 5.405055087491899, |
| "grad_norm": 5.125, |
| "learning_rate": 4.7439922455184325e-05, |
| "loss": 0.0337890625, |
| "memory(GiB)": 43.05, |
| "step": 8340, |
| "train_speed(iter/s)": 0.60928 |
| }, |
| { |
| "epoch": 5.408295528191834, |
| "grad_norm": 3.859375, |
| "learning_rate": 4.738641267955742e-05, |
| "loss": 0.0220703125, |
| "memory(GiB)": 43.05, |
| "step": 8345, |
| "train_speed(iter/s)": 0.609361 |
| }, |
| { |
| "epoch": 5.411535968891769, |
| "grad_norm": 12.1875, |
| "learning_rate": 4.7332905905354136e-05, |
| "loss": 0.0232421875, |
| "memory(GiB)": 43.05, |
| "step": 8350, |
| "train_speed(iter/s)": 0.609534 |
| }, |
| { |
| "epoch": 5.414776409591704, |
| "grad_norm": 2.0625, |
| "learning_rate": 4.727940219402119e-05, |
| "loss": 0.0193359375, |
| "memory(GiB)": 43.05, |
| "step": 8355, |
| "train_speed(iter/s)": 0.609561 |
| }, |
| { |
| "epoch": 5.4180168502916395, |
| "grad_norm": 12.875, |
| "learning_rate": 4.722590160700186e-05, |
| "loss": 0.050390625, |
| "memory(GiB)": 43.05, |
| "step": 8360, |
| "train_speed(iter/s)": 0.609583 |
| }, |
| { |
| "epoch": 5.421257290991575, |
| "grad_norm": 3.375, |
| "learning_rate": 4.717240420573581e-05, |
| "loss": 0.0208984375, |
| "memory(GiB)": 43.05, |
| "step": 8365, |
| "train_speed(iter/s)": 0.609734 |
| }, |
| { |
| "epoch": 5.42449773169151, |
| "grad_norm": 11.375, |
| "learning_rate": 4.711891005165904e-05, |
| "loss": 0.0361328125, |
| "memory(GiB)": 43.05, |
| "step": 8370, |
| "train_speed(iter/s)": 0.609765 |
| }, |
| { |
| "epoch": 5.427738172391445, |
| "grad_norm": 11.5625, |
| "learning_rate": 4.706541920620383e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 8375, |
| "train_speed(iter/s)": 0.609934 |
| }, |
| { |
| "epoch": 5.43097861309138, |
| "grad_norm": 13.25, |
| "learning_rate": 4.701193173079867e-05, |
| "loss": 0.0310546875, |
| "memory(GiB)": 43.05, |
| "step": 8380, |
| "train_speed(iter/s)": 0.610021 |
| }, |
| { |
| "epoch": 5.434219053791316, |
| "grad_norm": 13.375, |
| "learning_rate": 4.695844768686812e-05, |
| "loss": 0.02265625, |
| "memory(GiB)": 43.05, |
| "step": 8385, |
| "train_speed(iter/s)": 0.610153 |
| }, |
| { |
| "epoch": 5.437459494491251, |
| "grad_norm": 4.71875, |
| "learning_rate": 4.690496713583289e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 8390, |
| "train_speed(iter/s)": 0.610216 |
| }, |
| { |
| "epoch": 5.440699935191186, |
| "grad_norm": 16.375, |
| "learning_rate": 4.685149013910961e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 8395, |
| "train_speed(iter/s)": 0.610328 |
| }, |
| { |
| "epoch": 5.443940375891121, |
| "grad_norm": 14.625, |
| "learning_rate": 4.6798016758110865e-05, |
| "loss": 0.0201171875, |
| "memory(GiB)": 43.05, |
| "step": 8400, |
| "train_speed(iter/s)": 0.610501 |
| }, |
| { |
| "epoch": 5.4471808165910565, |
| "grad_norm": 10.0, |
| "learning_rate": 4.674454705424506e-05, |
| "loss": 0.05, |
| "memory(GiB)": 43.05, |
| "step": 8405, |
| "train_speed(iter/s)": 0.610602 |
| }, |
| { |
| "epoch": 5.450421257290992, |
| "grad_norm": 13.1875, |
| "learning_rate": 4.6691081088916436e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 8410, |
| "train_speed(iter/s)": 0.610583 |
| }, |
| { |
| "epoch": 5.453661697990927, |
| "grad_norm": 12.125, |
| "learning_rate": 4.663761892352483e-05, |
| "loss": 0.0208984375, |
| "memory(GiB)": 43.05, |
| "step": 8415, |
| "train_speed(iter/s)": 0.610661 |
| }, |
| { |
| "epoch": 5.456902138690862, |
| "grad_norm": 10.3125, |
| "learning_rate": 4.6584160619465814e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 8420, |
| "train_speed(iter/s)": 0.610726 |
| }, |
| { |
| "epoch": 5.460142579390797, |
| "grad_norm": 15.0625, |
| "learning_rate": 4.653070623813051e-05, |
| "loss": 0.0310546875, |
| "memory(GiB)": 43.05, |
| "step": 8425, |
| "train_speed(iter/s)": 0.610783 |
| }, |
| { |
| "epoch": 5.463383020090732, |
| "grad_norm": 3.796875, |
| "learning_rate": 4.6477255840905484e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 8430, |
| "train_speed(iter/s)": 0.610706 |
| }, |
| { |
| "epoch": 5.466623460790667, |
| "grad_norm": 11.5625, |
| "learning_rate": 4.642380948917279e-05, |
| "loss": 0.0154296875, |
| "memory(GiB)": 43.05, |
| "step": 8435, |
| "train_speed(iter/s)": 0.610804 |
| }, |
| { |
| "epoch": 5.469863901490602, |
| "grad_norm": 6.71875, |
| "learning_rate": 4.637036724430981e-05, |
| "loss": 0.028125, |
| "memory(GiB)": 43.05, |
| "step": 8440, |
| "train_speed(iter/s)": 0.610933 |
| }, |
| { |
| "epoch": 5.473104342190538, |
| "grad_norm": 6.5, |
| "learning_rate": 4.6316929167689176e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 8445, |
| "train_speed(iter/s)": 0.611038 |
| }, |
| { |
| "epoch": 5.476344782890473, |
| "grad_norm": 1.4375, |
| "learning_rate": 4.626349532067879e-05, |
| "loss": 0.0197265625, |
| "memory(GiB)": 43.05, |
| "step": 8450, |
| "train_speed(iter/s)": 0.611129 |
| }, |
| { |
| "epoch": 5.479585223590409, |
| "grad_norm": 3.0625, |
| "learning_rate": 4.621006576464168e-05, |
| "loss": 0.026953125, |
| "memory(GiB)": 43.05, |
| "step": 8455, |
| "train_speed(iter/s)": 0.61121 |
| }, |
| { |
| "epoch": 5.482825664290344, |
| "grad_norm": 8.6875, |
| "learning_rate": 4.61566405609359e-05, |
| "loss": 0.025390625, |
| "memory(GiB)": 43.05, |
| "step": 8460, |
| "train_speed(iter/s)": 0.611307 |
| }, |
| { |
| "epoch": 5.486066104990279, |
| "grad_norm": 1.4921875, |
| "learning_rate": 4.610321977091458e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 8465, |
| "train_speed(iter/s)": 0.61141 |
| }, |
| { |
| "epoch": 5.489306545690214, |
| "grad_norm": 11.25, |
| "learning_rate": 4.6049803455925725e-05, |
| "loss": 0.0193359375, |
| "memory(GiB)": 43.05, |
| "step": 8470, |
| "train_speed(iter/s)": 0.61143 |
| }, |
| { |
| "epoch": 5.492546986390149, |
| "grad_norm": 2.328125, |
| "learning_rate": 4.5996391677312225e-05, |
| "loss": 0.0279296875, |
| "memory(GiB)": 43.05, |
| "step": 8475, |
| "train_speed(iter/s)": 0.611595 |
| }, |
| { |
| "epoch": 5.495787427090084, |
| "grad_norm": 12.9375, |
| "learning_rate": 4.594298449641175e-05, |
| "loss": 0.01875, |
| "memory(GiB)": 43.05, |
| "step": 8480, |
| "train_speed(iter/s)": 0.611601 |
| }, |
| { |
| "epoch": 5.499027867790019, |
| "grad_norm": 8.75, |
| "learning_rate": 4.588958197455673e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 8485, |
| "train_speed(iter/s)": 0.61175 |
| }, |
| { |
| "epoch": 5.502268308489954, |
| "grad_norm": 13.5, |
| "learning_rate": 4.583618417307416e-05, |
| "loss": 0.0390625, |
| "memory(GiB)": 43.05, |
| "step": 8490, |
| "train_speed(iter/s)": 0.611858 |
| }, |
| { |
| "epoch": 5.5055087491898895, |
| "grad_norm": 17.625, |
| "learning_rate": 4.578279115328569e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 8495, |
| "train_speed(iter/s)": 0.611954 |
| }, |
| { |
| "epoch": 5.508749189889825, |
| "grad_norm": 13.4375, |
| "learning_rate": 4.572940297650747e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 8500, |
| "train_speed(iter/s)": 0.612014 |
| }, |
| { |
| "epoch": 5.51198963058976, |
| "grad_norm": 1.2109375, |
| "learning_rate": 4.567601970405004e-05, |
| "loss": 0.0345703125, |
| "memory(GiB)": 43.05, |
| "step": 8505, |
| "train_speed(iter/s)": 0.612165 |
| }, |
| { |
| "epoch": 5.515230071289696, |
| "grad_norm": 3.984375, |
| "learning_rate": 4.5622641397218355e-05, |
| "loss": 0.020703125, |
| "memory(GiB)": 43.05, |
| "step": 8510, |
| "train_speed(iter/s)": 0.612217 |
| }, |
| { |
| "epoch": 5.518470511989631, |
| "grad_norm": 10.3125, |
| "learning_rate": 4.556926811731165e-05, |
| "loss": 0.023046875, |
| "memory(GiB)": 43.05, |
| "step": 8515, |
| "train_speed(iter/s)": 0.612292 |
| }, |
| { |
| "epoch": 5.521710952689566, |
| "grad_norm": 5.09375, |
| "learning_rate": 4.5515899925623415e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 8520, |
| "train_speed(iter/s)": 0.612312 |
| }, |
| { |
| "epoch": 5.524951393389501, |
| "grad_norm": 3.1875, |
| "learning_rate": 4.546253688344122e-05, |
| "loss": 0.0189453125, |
| "memory(GiB)": 43.05, |
| "step": 8525, |
| "train_speed(iter/s)": 0.612402 |
| }, |
| { |
| "epoch": 5.528191834089436, |
| "grad_norm": 11.625, |
| "learning_rate": 4.540917905204681e-05, |
| "loss": 0.031640625, |
| "memory(GiB)": 43.05, |
| "step": 8530, |
| "train_speed(iter/s)": 0.61249 |
| }, |
| { |
| "epoch": 5.531432274789371, |
| "grad_norm": 0.7265625, |
| "learning_rate": 4.53558264927159e-05, |
| "loss": 0.0216796875, |
| "memory(GiB)": 43.05, |
| "step": 8535, |
| "train_speed(iter/s)": 0.612517 |
| }, |
| { |
| "epoch": 5.5346727154893065, |
| "grad_norm": 3.796875, |
| "learning_rate": 4.530247926671816e-05, |
| "loss": 0.0283203125, |
| "memory(GiB)": 43.05, |
| "step": 8540, |
| "train_speed(iter/s)": 0.612561 |
| }, |
| { |
| "epoch": 5.537913156189242, |
| "grad_norm": 16.125, |
| "learning_rate": 4.524913743531712e-05, |
| "loss": 0.0498046875, |
| "memory(GiB)": 43.05, |
| "step": 8545, |
| "train_speed(iter/s)": 0.612591 |
| }, |
| { |
| "epoch": 5.541153596889177, |
| "grad_norm": 0.494140625, |
| "learning_rate": 4.519580105977017e-05, |
| "loss": 0.030859375, |
| "memory(GiB)": 43.05, |
| "step": 8550, |
| "train_speed(iter/s)": 0.612707 |
| }, |
| { |
| "epoch": 5.544394037589112, |
| "grad_norm": 12.5, |
| "learning_rate": 4.514247020132835e-05, |
| "loss": 0.0404296875, |
| "memory(GiB)": 43.05, |
| "step": 8555, |
| "train_speed(iter/s)": 0.612856 |
| }, |
| { |
| "epoch": 5.547634478289047, |
| "grad_norm": 3.21875, |
| "learning_rate": 4.508914492123642e-05, |
| "loss": 0.030859375, |
| "memory(GiB)": 43.05, |
| "step": 8560, |
| "train_speed(iter/s)": 0.612854 |
| }, |
| { |
| "epoch": 5.550874918988983, |
| "grad_norm": 13.0, |
| "learning_rate": 4.503582528073272e-05, |
| "loss": 0.02421875, |
| "memory(GiB)": 43.05, |
| "step": 8565, |
| "train_speed(iter/s)": 0.613024 |
| }, |
| { |
| "epoch": 5.554115359688918, |
| "grad_norm": 1.3828125, |
| "learning_rate": 4.4982511341049124e-05, |
| "loss": 0.02578125, |
| "memory(GiB)": 43.05, |
| "step": 8570, |
| "train_speed(iter/s)": 0.613083 |
| }, |
| { |
| "epoch": 5.557355800388853, |
| "grad_norm": 4.15625, |
| "learning_rate": 4.492920316341095e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 8575, |
| "train_speed(iter/s)": 0.613224 |
| }, |
| { |
| "epoch": 5.560596241088788, |
| "grad_norm": 8.3125, |
| "learning_rate": 4.487590080903692e-05, |
| "loss": 0.0248046875, |
| "memory(GiB)": 43.05, |
| "step": 8580, |
| "train_speed(iter/s)": 0.613218 |
| }, |
| { |
| "epoch": 5.5638366817887235, |
| "grad_norm": 14.5625, |
| "learning_rate": 4.482260433913899e-05, |
| "loss": 0.02109375, |
| "memory(GiB)": 43.05, |
| "step": 8585, |
| "train_speed(iter/s)": 0.613258 |
| }, |
| { |
| "epoch": 5.567077122488659, |
| "grad_norm": 3.5625, |
| "learning_rate": 4.476931381492247e-05, |
| "loss": 0.0537109375, |
| "memory(GiB)": 43.05, |
| "step": 8590, |
| "train_speed(iter/s)": 0.613228 |
| }, |
| { |
| "epoch": 5.570317563188594, |
| "grad_norm": 17.5, |
| "learning_rate": 4.471602929758577e-05, |
| "loss": 0.0251953125, |
| "memory(GiB)": 43.05, |
| "step": 8595, |
| "train_speed(iter/s)": 0.61316 |
| }, |
| { |
| "epoch": 5.573558003888529, |
| "grad_norm": 2.265625, |
| "learning_rate": 4.466275084832041e-05, |
| "loss": 0.0279296875, |
| "memory(GiB)": 43.05, |
| "step": 8600, |
| "train_speed(iter/s)": 0.613196 |
| }, |
| { |
| "epoch": 5.576798444588464, |
| "grad_norm": 0.431640625, |
| "learning_rate": 4.460947852831096e-05, |
| "loss": 0.0416015625, |
| "memory(GiB)": 43.05, |
| "step": 8605, |
| "train_speed(iter/s)": 0.613363 |
| }, |
| { |
| "epoch": 5.580038885288399, |
| "grad_norm": 16.0, |
| "learning_rate": 4.455621239873498e-05, |
| "loss": 0.0216796875, |
| "memory(GiB)": 43.05, |
| "step": 8610, |
| "train_speed(iter/s)": 0.61345 |
| }, |
| { |
| "epoch": 5.583279325988334, |
| "grad_norm": 14.75, |
| "learning_rate": 4.450295252076282e-05, |
| "loss": 0.0220703125, |
| "memory(GiB)": 43.05, |
| "step": 8615, |
| "train_speed(iter/s)": 0.6135 |
| }, |
| { |
| "epoch": 5.586519766688269, |
| "grad_norm": 11.0625, |
| "learning_rate": 4.444969895555774e-05, |
| "loss": 0.038671875, |
| "memory(GiB)": 43.05, |
| "step": 8620, |
| "train_speed(iter/s)": 0.613576 |
| }, |
| { |
| "epoch": 5.5897602073882044, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.4396451764275755e-05, |
| "loss": 0.030859375, |
| "memory(GiB)": 43.05, |
| "step": 8625, |
| "train_speed(iter/s)": 0.613598 |
| }, |
| { |
| "epoch": 5.59300064808814, |
| "grad_norm": 2.546875, |
| "learning_rate": 4.4343211008065484e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 8630, |
| "train_speed(iter/s)": 0.613615 |
| }, |
| { |
| "epoch": 5.596241088788075, |
| "grad_norm": 2.890625, |
| "learning_rate": 4.428997674806822e-05, |
| "loss": 0.0564453125, |
| "memory(GiB)": 43.05, |
| "step": 8635, |
| "train_speed(iter/s)": 0.613688 |
| }, |
| { |
| "epoch": 5.599481529488011, |
| "grad_norm": 3.1875, |
| "learning_rate": 4.423674904541779e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 8640, |
| "train_speed(iter/s)": 0.613749 |
| }, |
| { |
| "epoch": 5.602721970187946, |
| "grad_norm": 2.46875, |
| "learning_rate": 4.4183527961240455e-05, |
| "loss": 0.0345703125, |
| "memory(GiB)": 43.05, |
| "step": 8645, |
| "train_speed(iter/s)": 0.613872 |
| }, |
| { |
| "epoch": 5.605962410887881, |
| "grad_norm": 8.9375, |
| "learning_rate": 4.413031355665492e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 8650, |
| "train_speed(iter/s)": 0.613911 |
| }, |
| { |
| "epoch": 5.609202851587816, |
| "grad_norm": 9.125, |
| "learning_rate": 4.407710589277221e-05, |
| "loss": 0.03359375, |
| "memory(GiB)": 43.05, |
| "step": 8655, |
| "train_speed(iter/s)": 0.613996 |
| }, |
| { |
| "epoch": 5.612443292287751, |
| "grad_norm": 14.25, |
| "learning_rate": 4.402390503069556e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 8660, |
| "train_speed(iter/s)": 0.613954 |
| }, |
| { |
| "epoch": 5.615683732987686, |
| "grad_norm": 10.625, |
| "learning_rate": 4.3970711031520446e-05, |
| "loss": 0.0271484375, |
| "memory(GiB)": 43.05, |
| "step": 8665, |
| "train_speed(iter/s)": 0.61412 |
| }, |
| { |
| "epoch": 5.618924173687621, |
| "grad_norm": 0.81640625, |
| "learning_rate": 4.391752395633446e-05, |
| "loss": 0.024609375, |
| "memory(GiB)": 43.05, |
| "step": 8670, |
| "train_speed(iter/s)": 0.614246 |
| }, |
| { |
| "epoch": 5.6221646143875565, |
| "grad_norm": 1.4765625, |
| "learning_rate": 4.386434386621722e-05, |
| "loss": 0.0380859375, |
| "memory(GiB)": 43.05, |
| "step": 8675, |
| "train_speed(iter/s)": 0.614211 |
| }, |
| { |
| "epoch": 5.625405055087492, |
| "grad_norm": 1.3671875, |
| "learning_rate": 4.381117082224033e-05, |
| "loss": 0.0181640625, |
| "memory(GiB)": 43.05, |
| "step": 8680, |
| "train_speed(iter/s)": 0.614312 |
| }, |
| { |
| "epoch": 5.628645495787427, |
| "grad_norm": 1.125, |
| "learning_rate": 4.375800488546733e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 8685, |
| "train_speed(iter/s)": 0.614351 |
| }, |
| { |
| "epoch": 5.631885936487362, |
| "grad_norm": 15.9375, |
| "learning_rate": 4.370484611695354e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 8690, |
| "train_speed(iter/s)": 0.614385 |
| }, |
| { |
| "epoch": 5.635126377187298, |
| "grad_norm": 1.6796875, |
| "learning_rate": 4.365169457774609e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 8695, |
| "train_speed(iter/s)": 0.614455 |
| }, |
| { |
| "epoch": 5.638366817887233, |
| "grad_norm": 12.375, |
| "learning_rate": 4.3598550328883814e-05, |
| "loss": 0.0271484375, |
| "memory(GiB)": 43.05, |
| "step": 8700, |
| "train_speed(iter/s)": 0.614552 |
| }, |
| { |
| "epoch": 5.641607258587168, |
| "grad_norm": 1.375, |
| "learning_rate": 4.354541343139714e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 8705, |
| "train_speed(iter/s)": 0.614735 |
| }, |
| { |
| "epoch": 5.644847699287103, |
| "grad_norm": 16.125, |
| "learning_rate": 4.349228394630808e-05, |
| "loss": 0.025, |
| "memory(GiB)": 43.05, |
| "step": 8710, |
| "train_speed(iter/s)": 0.614743 |
| }, |
| { |
| "epoch": 5.648088139987038, |
| "grad_norm": 11.375, |
| "learning_rate": 4.3439161934630156e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 8715, |
| "train_speed(iter/s)": 0.614913 |
| }, |
| { |
| "epoch": 5.6513285806869735, |
| "grad_norm": 17.125, |
| "learning_rate": 4.338604745736822e-05, |
| "loss": 0.0359375, |
| "memory(GiB)": 43.05, |
| "step": 8720, |
| "train_speed(iter/s)": 0.615012 |
| }, |
| { |
| "epoch": 5.654569021386909, |
| "grad_norm": 13.25, |
| "learning_rate": 4.3332940575518565e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 8725, |
| "train_speed(iter/s)": 0.615076 |
| }, |
| { |
| "epoch": 5.657809462086844, |
| "grad_norm": 12.125, |
| "learning_rate": 4.327984135006873e-05, |
| "loss": 0.0197265625, |
| "memory(GiB)": 43.05, |
| "step": 8730, |
| "train_speed(iter/s)": 0.61514 |
| }, |
| { |
| "epoch": 5.661049902786779, |
| "grad_norm": 1.59375, |
| "learning_rate": 4.3226749841997436e-05, |
| "loss": 0.0240234375, |
| "memory(GiB)": 43.05, |
| "step": 8735, |
| "train_speed(iter/s)": 0.615215 |
| }, |
| { |
| "epoch": 5.664290343486714, |
| "grad_norm": 3.625, |
| "learning_rate": 4.317366611227458e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 8740, |
| "train_speed(iter/s)": 0.615171 |
| }, |
| { |
| "epoch": 5.667530784186649, |
| "grad_norm": 7.78125, |
| "learning_rate": 4.3120590221861125e-05, |
| "loss": 0.03359375, |
| "memory(GiB)": 43.05, |
| "step": 8745, |
| "train_speed(iter/s)": 0.615263 |
| }, |
| { |
| "epoch": 5.670771224886584, |
| "grad_norm": 11.625, |
| "learning_rate": 4.3067522231708974e-05, |
| "loss": 0.027734375, |
| "memory(GiB)": 43.05, |
| "step": 8750, |
| "train_speed(iter/s)": 0.615425 |
| }, |
| { |
| "epoch": 5.674011665586519, |
| "grad_norm": 5.65625, |
| "learning_rate": 4.301446220276102e-05, |
| "loss": 0.0095703125, |
| "memory(GiB)": 43.05, |
| "step": 8755, |
| "train_speed(iter/s)": 0.615447 |
| }, |
| { |
| "epoch": 5.6772521062864545, |
| "grad_norm": 4.875, |
| "learning_rate": 4.2961410195951e-05, |
| "loss": 0.02265625, |
| "memory(GiB)": 43.05, |
| "step": 8760, |
| "train_speed(iter/s)": 0.615449 |
| }, |
| { |
| "epoch": 5.6804925469863905, |
| "grad_norm": 3.15625, |
| "learning_rate": 4.2908366272203414e-05, |
| "loss": 0.039453125, |
| "memory(GiB)": 43.05, |
| "step": 8765, |
| "train_speed(iter/s)": 0.61553 |
| }, |
| { |
| "epoch": 5.683732987686326, |
| "grad_norm": 1.9765625, |
| "learning_rate": 4.285533049243351e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 8770, |
| "train_speed(iter/s)": 0.615572 |
| }, |
| { |
| "epoch": 5.686973428386261, |
| "grad_norm": 2.1875, |
| "learning_rate": 4.280230291754718e-05, |
| "loss": 0.0396484375, |
| "memory(GiB)": 43.05, |
| "step": 8775, |
| "train_speed(iter/s)": 0.61563 |
| }, |
| { |
| "epoch": 5.690213869086196, |
| "grad_norm": 11.6875, |
| "learning_rate": 4.274928360844086e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 8780, |
| "train_speed(iter/s)": 0.615709 |
| }, |
| { |
| "epoch": 5.693454309786131, |
| "grad_norm": 3.078125, |
| "learning_rate": 4.269627262600151e-05, |
| "loss": 0.0283203125, |
| "memory(GiB)": 43.05, |
| "step": 8785, |
| "train_speed(iter/s)": 0.615769 |
| }, |
| { |
| "epoch": 5.696694750486066, |
| "grad_norm": 11.4375, |
| "learning_rate": 4.264327003110657e-05, |
| "loss": 0.021875, |
| "memory(GiB)": 43.05, |
| "step": 8790, |
| "train_speed(iter/s)": 0.615899 |
| }, |
| { |
| "epoch": 5.699935191186001, |
| "grad_norm": 5.53125, |
| "learning_rate": 4.2590275884623805e-05, |
| "loss": 0.0216796875, |
| "memory(GiB)": 43.05, |
| "step": 8795, |
| "train_speed(iter/s)": 0.61597 |
| }, |
| { |
| "epoch": 5.703175631885936, |
| "grad_norm": 0.640625, |
| "learning_rate": 4.253729024741125e-05, |
| "loss": 0.0177734375, |
| "memory(GiB)": 43.05, |
| "step": 8800, |
| "train_speed(iter/s)": 0.616019 |
| }, |
| { |
| "epoch": 5.7064160725858715, |
| "grad_norm": 0.6484375, |
| "learning_rate": 4.248431318031724e-05, |
| "loss": 0.04375, |
| "memory(GiB)": 43.05, |
| "step": 8805, |
| "train_speed(iter/s)": 0.616098 |
| }, |
| { |
| "epoch": 5.709656513285807, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.2431344744180225e-05, |
| "loss": 0.0197265625, |
| "memory(GiB)": 43.05, |
| "step": 8810, |
| "train_speed(iter/s)": 0.616157 |
| }, |
| { |
| "epoch": 5.712896953985742, |
| "grad_norm": 13.0, |
| "learning_rate": 4.2378384999828736e-05, |
| "loss": 0.030859375, |
| "memory(GiB)": 43.05, |
| "step": 8815, |
| "train_speed(iter/s)": 0.616015 |
| }, |
| { |
| "epoch": 5.716137394685678, |
| "grad_norm": 15.625, |
| "learning_rate": 4.2325434008081344e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 8820, |
| "train_speed(iter/s)": 0.616157 |
| }, |
| { |
| "epoch": 5.719377835385613, |
| "grad_norm": 1.5546875, |
| "learning_rate": 4.2272491829746585e-05, |
| "loss": 0.026953125, |
| "memory(GiB)": 43.05, |
| "step": 8825, |
| "train_speed(iter/s)": 0.616221 |
| }, |
| { |
| "epoch": 5.722618276085548, |
| "grad_norm": 13.125, |
| "learning_rate": 4.22195585256228e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 8830, |
| "train_speed(iter/s)": 0.616236 |
| }, |
| { |
| "epoch": 5.725858716785483, |
| "grad_norm": 13.75, |
| "learning_rate": 4.216663415649823e-05, |
| "loss": 0.0287109375, |
| "memory(GiB)": 43.05, |
| "step": 8835, |
| "train_speed(iter/s)": 0.616401 |
| }, |
| { |
| "epoch": 5.729099157485418, |
| "grad_norm": 4.15625, |
| "learning_rate": 4.21137187831508e-05, |
| "loss": 0.0486328125, |
| "memory(GiB)": 43.05, |
| "step": 8840, |
| "train_speed(iter/s)": 0.61642 |
| }, |
| { |
| "epoch": 5.732339598185353, |
| "grad_norm": 1.5390625, |
| "learning_rate": 4.206081246634811e-05, |
| "loss": 0.0279296875, |
| "memory(GiB)": 43.05, |
| "step": 8845, |
| "train_speed(iter/s)": 0.616472 |
| }, |
| { |
| "epoch": 5.7355800388852884, |
| "grad_norm": 4.71875, |
| "learning_rate": 4.200791526684738e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 8850, |
| "train_speed(iter/s)": 0.616534 |
| }, |
| { |
| "epoch": 5.738820479585224, |
| "grad_norm": 0.515625, |
| "learning_rate": 4.195502724539536e-05, |
| "loss": 0.01953125, |
| "memory(GiB)": 43.05, |
| "step": 8855, |
| "train_speed(iter/s)": 0.616599 |
| }, |
| { |
| "epoch": 5.742060920285159, |
| "grad_norm": 11.0625, |
| "learning_rate": 4.190214846272821e-05, |
| "loss": 0.029296875, |
| "memory(GiB)": 43.05, |
| "step": 8860, |
| "train_speed(iter/s)": 0.616742 |
| }, |
| { |
| "epoch": 5.745301360985094, |
| "grad_norm": 7.6875, |
| "learning_rate": 4.184927897957154e-05, |
| "loss": 0.02265625, |
| "memory(GiB)": 43.05, |
| "step": 8865, |
| "train_speed(iter/s)": 0.616703 |
| }, |
| { |
| "epoch": 5.748541801685029, |
| "grad_norm": 16.625, |
| "learning_rate": 4.179641885664026e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 8870, |
| "train_speed(iter/s)": 0.616676 |
| }, |
| { |
| "epoch": 5.751782242384964, |
| "grad_norm": 6.40625, |
| "learning_rate": 4.1743568154638526e-05, |
| "loss": 0.0205078125, |
| "memory(GiB)": 43.05, |
| "step": 8875, |
| "train_speed(iter/s)": 0.616644 |
| }, |
| { |
| "epoch": 5.755022683084899, |
| "grad_norm": 6.96875, |
| "learning_rate": 4.169072693425967e-05, |
| "loss": 0.0244140625, |
| "memory(GiB)": 43.05, |
| "step": 8880, |
| "train_speed(iter/s)": 0.616723 |
| }, |
| { |
| "epoch": 5.758263123784834, |
| "grad_norm": 10.0625, |
| "learning_rate": 4.1637895256186175e-05, |
| "loss": 0.028125, |
| "memory(GiB)": 43.05, |
| "step": 8885, |
| "train_speed(iter/s)": 0.61676 |
| }, |
| { |
| "epoch": 5.76150356448477, |
| "grad_norm": 2.0625, |
| "learning_rate": 4.158507318108949e-05, |
| "loss": 0.03125, |
| "memory(GiB)": 43.05, |
| "step": 8890, |
| "train_speed(iter/s)": 0.616858 |
| }, |
| { |
| "epoch": 5.764744005184705, |
| "grad_norm": 4.53125, |
| "learning_rate": 4.153226076963011e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 8895, |
| "train_speed(iter/s)": 0.616854 |
| }, |
| { |
| "epoch": 5.7679844458846405, |
| "grad_norm": 2.046875, |
| "learning_rate": 4.1479458082457383e-05, |
| "loss": 0.05078125, |
| "memory(GiB)": 43.05, |
| "step": 8900, |
| "train_speed(iter/s)": 0.616931 |
| }, |
| { |
| "epoch": 5.771224886584576, |
| "grad_norm": 0.54296875, |
| "learning_rate": 4.142666518020952e-05, |
| "loss": 0.0154296875, |
| "memory(GiB)": 43.05, |
| "step": 8905, |
| "train_speed(iter/s)": 0.61703 |
| }, |
| { |
| "epoch": 5.774465327284511, |
| "grad_norm": 11.5, |
| "learning_rate": 4.137388212351348e-05, |
| "loss": 0.0359375, |
| "memory(GiB)": 43.05, |
| "step": 8910, |
| "train_speed(iter/s)": 0.617053 |
| }, |
| { |
| "epoch": 5.777705767984446, |
| "grad_norm": 1.9765625, |
| "learning_rate": 4.1321108972984946e-05, |
| "loss": 0.0234375, |
| "memory(GiB)": 43.05, |
| "step": 8915, |
| "train_speed(iter/s)": 0.617197 |
| }, |
| { |
| "epoch": 5.780946208684381, |
| "grad_norm": 2.515625, |
| "learning_rate": 4.1268345789228155e-05, |
| "loss": 0.0310546875, |
| "memory(GiB)": 43.05, |
| "step": 8920, |
| "train_speed(iter/s)": 0.617315 |
| }, |
| { |
| "epoch": 5.784186649384316, |
| "grad_norm": 3.578125, |
| "learning_rate": 4.121559263283596e-05, |
| "loss": 0.023828125, |
| "memory(GiB)": 43.05, |
| "step": 8925, |
| "train_speed(iter/s)": 0.617482 |
| }, |
| { |
| "epoch": 5.787427090084251, |
| "grad_norm": 2.578125, |
| "learning_rate": 4.1162849564389693e-05, |
| "loss": 0.02578125, |
| "memory(GiB)": 43.05, |
| "step": 8930, |
| "train_speed(iter/s)": 0.617643 |
| }, |
| { |
| "epoch": 5.790667530784186, |
| "grad_norm": 8.0625, |
| "learning_rate": 4.111011664445907e-05, |
| "loss": 0.0353515625, |
| "memory(GiB)": 43.05, |
| "step": 8935, |
| "train_speed(iter/s)": 0.61776 |
| }, |
| { |
| "epoch": 5.7939079714841215, |
| "grad_norm": 1.7421875, |
| "learning_rate": 4.105739393360218e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 8940, |
| "train_speed(iter/s)": 0.617825 |
| }, |
| { |
| "epoch": 5.7971484121840575, |
| "grad_norm": 7.21875, |
| "learning_rate": 4.10046814923654e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 8945, |
| "train_speed(iter/s)": 0.617923 |
| }, |
| { |
| "epoch": 5.800388852883993, |
| "grad_norm": 0.859375, |
| "learning_rate": 4.095197938128325e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 8950, |
| "train_speed(iter/s)": 0.618049 |
| }, |
| { |
| "epoch": 5.803629293583928, |
| "grad_norm": 13.625, |
| "learning_rate": 4.0899287660878444e-05, |
| "loss": 0.0333984375, |
| "memory(GiB)": 43.05, |
| "step": 8955, |
| "train_speed(iter/s)": 0.61808 |
| }, |
| { |
| "epoch": 5.806869734283863, |
| "grad_norm": 14.875, |
| "learning_rate": 4.084660639166178e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 8960, |
| "train_speed(iter/s)": 0.61814 |
| }, |
| { |
| "epoch": 5.810110174983798, |
| "grad_norm": 14.1875, |
| "learning_rate": 4.079393563413197e-05, |
| "loss": 0.0212890625, |
| "memory(GiB)": 43.05, |
| "step": 8965, |
| "train_speed(iter/s)": 0.618267 |
| }, |
| { |
| "epoch": 5.813350615683733, |
| "grad_norm": 10.8125, |
| "learning_rate": 4.074127544877574e-05, |
| "loss": 0.0208984375, |
| "memory(GiB)": 43.05, |
| "step": 8970, |
| "train_speed(iter/s)": 0.618328 |
| }, |
| { |
| "epoch": 5.816591056383668, |
| "grad_norm": 0.71875, |
| "learning_rate": 4.068862589606765e-05, |
| "loss": 0.0263671875, |
| "memory(GiB)": 43.05, |
| "step": 8975, |
| "train_speed(iter/s)": 0.618373 |
| }, |
| { |
| "epoch": 5.819831497083603, |
| "grad_norm": 9.1875, |
| "learning_rate": 4.063598703647002e-05, |
| "loss": 0.023828125, |
| "memory(GiB)": 43.05, |
| "step": 8980, |
| "train_speed(iter/s)": 0.618401 |
| }, |
| { |
| "epoch": 5.8230719377835385, |
| "grad_norm": 3.09375, |
| "learning_rate": 4.0583358930432916e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 8985, |
| "train_speed(iter/s)": 0.618533 |
| }, |
| { |
| "epoch": 5.826312378483474, |
| "grad_norm": 10.1875, |
| "learning_rate": 4.0530741638394076e-05, |
| "loss": 0.03046875, |
| "memory(GiB)": 43.05, |
| "step": 8990, |
| "train_speed(iter/s)": 0.618508 |
| }, |
| { |
| "epoch": 5.829552819183409, |
| "grad_norm": 0.57421875, |
| "learning_rate": 4.0478135220778755e-05, |
| "loss": 0.029296875, |
| "memory(GiB)": 43.05, |
| "step": 8995, |
| "train_speed(iter/s)": 0.618612 |
| }, |
| { |
| "epoch": 5.832793259883344, |
| "grad_norm": 4.4375, |
| "learning_rate": 4.042553973799977e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 9000, |
| "train_speed(iter/s)": 0.618586 |
| }, |
| { |
| "epoch": 5.836033700583279, |
| "grad_norm": 10.3125, |
| "learning_rate": 4.03729552504574e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 9005, |
| "train_speed(iter/s)": 0.618742 |
| }, |
| { |
| "epoch": 5.839274141283214, |
| "grad_norm": 0.56640625, |
| "learning_rate": 4.032038181853922e-05, |
| "loss": 0.04111328125, |
| "memory(GiB)": 43.05, |
| "step": 9010, |
| "train_speed(iter/s)": 0.618801 |
| }, |
| { |
| "epoch": 5.842514581983149, |
| "grad_norm": 8.1875, |
| "learning_rate": 4.026781950262018e-05, |
| "loss": 0.035546875, |
| "memory(GiB)": 43.05, |
| "step": 9015, |
| "train_speed(iter/s)": 0.618804 |
| }, |
| { |
| "epoch": 5.845755022683085, |
| "grad_norm": 18.375, |
| "learning_rate": 4.0215268363062465e-05, |
| "loss": 0.0251953125, |
| "memory(GiB)": 43.05, |
| "step": 9020, |
| "train_speed(iter/s)": 0.618882 |
| }, |
| { |
| "epoch": 5.84899546338302, |
| "grad_norm": 0.69921875, |
| "learning_rate": 4.0162728460215346e-05, |
| "loss": 0.0232421875, |
| "memory(GiB)": 43.05, |
| "step": 9025, |
| "train_speed(iter/s)": 0.618868 |
| }, |
| { |
| "epoch": 5.8522359040829555, |
| "grad_norm": 10.0, |
| "learning_rate": 4.0110199854415264e-05, |
| "loss": 0.0345703125, |
| "memory(GiB)": 43.05, |
| "step": 9030, |
| "train_speed(iter/s)": 0.619029 |
| }, |
| { |
| "epoch": 5.855476344782891, |
| "grad_norm": 2.828125, |
| "learning_rate": 4.005768260598569e-05, |
| "loss": 0.0240234375, |
| "memory(GiB)": 43.05, |
| "step": 9035, |
| "train_speed(iter/s)": 0.619114 |
| }, |
| { |
| "epoch": 5.858716785482826, |
| "grad_norm": 12.375, |
| "learning_rate": 4.0005176775237e-05, |
| "loss": 0.0240234375, |
| "memory(GiB)": 43.05, |
| "step": 9040, |
| "train_speed(iter/s)": 0.619198 |
| }, |
| { |
| "epoch": 5.861957226182761, |
| "grad_norm": 14.25, |
| "learning_rate": 3.99526824224665e-05, |
| "loss": 0.0240234375, |
| "memory(GiB)": 43.05, |
| "step": 9045, |
| "train_speed(iter/s)": 0.619316 |
| }, |
| { |
| "epoch": 5.865197666882696, |
| "grad_norm": 1.5390625, |
| "learning_rate": 3.990019960795831e-05, |
| "loss": 0.0337890625, |
| "memory(GiB)": 43.05, |
| "step": 9050, |
| "train_speed(iter/s)": 0.619371 |
| }, |
| { |
| "epoch": 5.868438107582631, |
| "grad_norm": 8.4375, |
| "learning_rate": 3.984772839198327e-05, |
| "loss": 0.0177734375, |
| "memory(GiB)": 43.05, |
| "step": 9055, |
| "train_speed(iter/s)": 0.619431 |
| }, |
| { |
| "epoch": 5.871678548282566, |
| "grad_norm": 2.453125, |
| "learning_rate": 3.979526883479892e-05, |
| "loss": 0.0162109375, |
| "memory(GiB)": 43.05, |
| "step": 9060, |
| "train_speed(iter/s)": 0.619449 |
| }, |
| { |
| "epoch": 5.874918988982501, |
| "grad_norm": 10.1875, |
| "learning_rate": 3.9742820996649435e-05, |
| "loss": 0.0314453125, |
| "memory(GiB)": 43.05, |
| "step": 9065, |
| "train_speed(iter/s)": 0.619509 |
| }, |
| { |
| "epoch": 5.878159429682436, |
| "grad_norm": 13.8125, |
| "learning_rate": 3.9690384937765495e-05, |
| "loss": 0.0216796875, |
| "memory(GiB)": 43.05, |
| "step": 9070, |
| "train_speed(iter/s)": 0.619525 |
| }, |
| { |
| "epoch": 5.881399870382372, |
| "grad_norm": 8.125, |
| "learning_rate": 3.9637960718364265e-05, |
| "loss": 0.02734375, |
| "memory(GiB)": 43.05, |
| "step": 9075, |
| "train_speed(iter/s)": 0.619648 |
| }, |
| { |
| "epoch": 5.884640311082308, |
| "grad_norm": 1.921875, |
| "learning_rate": 3.958554839864932e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 9080, |
| "train_speed(iter/s)": 0.619621 |
| }, |
| { |
| "epoch": 5.887880751782243, |
| "grad_norm": 1.9765625, |
| "learning_rate": 3.953314803881058e-05, |
| "loss": 0.026953125, |
| "memory(GiB)": 43.05, |
| "step": 9085, |
| "train_speed(iter/s)": 0.619703 |
| }, |
| { |
| "epoch": 5.891121192482178, |
| "grad_norm": 2.359375, |
| "learning_rate": 3.94807596990242e-05, |
| "loss": 0.0427734375, |
| "memory(GiB)": 43.05, |
| "step": 9090, |
| "train_speed(iter/s)": 0.619781 |
| }, |
| { |
| "epoch": 5.894361633182113, |
| "grad_norm": 0.6015625, |
| "learning_rate": 3.942838343945253e-05, |
| "loss": 0.0537109375, |
| "memory(GiB)": 43.05, |
| "step": 9095, |
| "train_speed(iter/s)": 0.619794 |
| }, |
| { |
| "epoch": 5.897602073882048, |
| "grad_norm": 10.9375, |
| "learning_rate": 3.93760193202441e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 9100, |
| "train_speed(iter/s)": 0.619792 |
| }, |
| { |
| "epoch": 5.900842514581983, |
| "grad_norm": 11.3125, |
| "learning_rate": 3.932366740153343e-05, |
| "loss": 0.0234375, |
| "memory(GiB)": 43.05, |
| "step": 9105, |
| "train_speed(iter/s)": 0.619931 |
| }, |
| { |
| "epoch": 5.904082955281918, |
| "grad_norm": 6.3125, |
| "learning_rate": 3.927132774344107e-05, |
| "loss": 0.012890625, |
| "memory(GiB)": 43.05, |
| "step": 9110, |
| "train_speed(iter/s)": 0.620064 |
| }, |
| { |
| "epoch": 5.907323395981853, |
| "grad_norm": 16.625, |
| "learning_rate": 3.9219000406073516e-05, |
| "loss": 0.0359375, |
| "memory(GiB)": 43.05, |
| "step": 9115, |
| "train_speed(iter/s)": 0.620009 |
| }, |
| { |
| "epoch": 5.9105638366817885, |
| "grad_norm": 11.375, |
| "learning_rate": 3.916668544952302e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 9120, |
| "train_speed(iter/s)": 0.619989 |
| }, |
| { |
| "epoch": 5.913804277381724, |
| "grad_norm": 4.59375, |
| "learning_rate": 3.911438293386771e-05, |
| "loss": 0.03203125, |
| "memory(GiB)": 43.05, |
| "step": 9125, |
| "train_speed(iter/s)": 0.620071 |
| }, |
| { |
| "epoch": 5.917044718081659, |
| "grad_norm": 2.1875, |
| "learning_rate": 3.9062092919171414e-05, |
| "loss": 0.046875, |
| "memory(GiB)": 43.05, |
| "step": 9130, |
| "train_speed(iter/s)": 0.620124 |
| }, |
| { |
| "epoch": 5.920285158781594, |
| "grad_norm": 2.640625, |
| "learning_rate": 3.9009815465483536e-05, |
| "loss": 0.0322265625, |
| "memory(GiB)": 43.05, |
| "step": 9135, |
| "train_speed(iter/s)": 0.620183 |
| }, |
| { |
| "epoch": 5.923525599481529, |
| "grad_norm": 1.7890625, |
| "learning_rate": 3.895755063283912e-05, |
| "loss": 0.025, |
| "memory(GiB)": 43.05, |
| "step": 9140, |
| "train_speed(iter/s)": 0.620345 |
| }, |
| { |
| "epoch": 5.926766040181465, |
| "grad_norm": 4.0625, |
| "learning_rate": 3.8905298481258726e-05, |
| "loss": 0.0365234375, |
| "memory(GiB)": 43.05, |
| "step": 9145, |
| "train_speed(iter/s)": 0.620408 |
| }, |
| { |
| "epoch": 5.9300064808814, |
| "grad_norm": 15.5625, |
| "learning_rate": 3.8853059070748275e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 9150, |
| "train_speed(iter/s)": 0.620542 |
| }, |
| { |
| "epoch": 5.933246921581335, |
| "grad_norm": 0.66015625, |
| "learning_rate": 3.880083246129914e-05, |
| "loss": 0.019140625, |
| "memory(GiB)": 43.05, |
| "step": 9155, |
| "train_speed(iter/s)": 0.620592 |
| }, |
| { |
| "epoch": 5.93648736228127, |
| "grad_norm": 2.203125, |
| "learning_rate": 3.8748618712887966e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 9160, |
| "train_speed(iter/s)": 0.620562 |
| }, |
| { |
| "epoch": 5.9397278029812055, |
| "grad_norm": 2.4375, |
| "learning_rate": 3.869641788547658e-05, |
| "loss": 0.0169921875, |
| "memory(GiB)": 43.05, |
| "step": 9165, |
| "train_speed(iter/s)": 0.620596 |
| }, |
| { |
| "epoch": 5.942968243681141, |
| "grad_norm": 9.3125, |
| "learning_rate": 3.864423003901203e-05, |
| "loss": 0.0423828125, |
| "memory(GiB)": 43.05, |
| "step": 9170, |
| "train_speed(iter/s)": 0.620719 |
| }, |
| { |
| "epoch": 5.946208684381076, |
| "grad_norm": 3.328125, |
| "learning_rate": 3.8592055233426454e-05, |
| "loss": 0.038671875, |
| "memory(GiB)": 43.05, |
| "step": 9175, |
| "train_speed(iter/s)": 0.620788 |
| }, |
| { |
| "epoch": 5.949449125081011, |
| "grad_norm": 15.875, |
| "learning_rate": 3.853989352863698e-05, |
| "loss": 0.0234375, |
| "memory(GiB)": 43.05, |
| "step": 9180, |
| "train_speed(iter/s)": 0.620866 |
| }, |
| { |
| "epoch": 5.952689565780946, |
| "grad_norm": 2.875, |
| "learning_rate": 3.8487744984545705e-05, |
| "loss": 0.01484375, |
| "memory(GiB)": 43.05, |
| "step": 9185, |
| "train_speed(iter/s)": 0.620868 |
| }, |
| { |
| "epoch": 5.955930006480881, |
| "grad_norm": 1.4921875, |
| "learning_rate": 3.843560966103965e-05, |
| "loss": 0.0193359375, |
| "memory(GiB)": 43.05, |
| "step": 9190, |
| "train_speed(iter/s)": 0.620895 |
| }, |
| { |
| "epoch": 5.959170447180816, |
| "grad_norm": 6.53125, |
| "learning_rate": 3.838348761799058e-05, |
| "loss": 0.0224609375, |
| "memory(GiB)": 43.05, |
| "step": 9195, |
| "train_speed(iter/s)": 0.620994 |
| }, |
| { |
| "epoch": 5.962410887880752, |
| "grad_norm": 1.1796875, |
| "learning_rate": 3.833137891525506e-05, |
| "loss": 0.0439453125, |
| "memory(GiB)": 43.05, |
| "step": 9200, |
| "train_speed(iter/s)": 0.621117 |
| }, |
| { |
| "epoch": 5.965651328580687, |
| "grad_norm": 1.8671875, |
| "learning_rate": 3.827928361267433e-05, |
| "loss": 0.034375, |
| "memory(GiB)": 43.05, |
| "step": 9205, |
| "train_speed(iter/s)": 0.621276 |
| }, |
| { |
| "epoch": 5.9688917692806225, |
| "grad_norm": 5.0, |
| "learning_rate": 3.8227201770074225e-05, |
| "loss": 0.01875, |
| "memory(GiB)": 43.05, |
| "step": 9210, |
| "train_speed(iter/s)": 0.621345 |
| }, |
| { |
| "epoch": 5.972132209980558, |
| "grad_norm": 11.625, |
| "learning_rate": 3.8175133447265146e-05, |
| "loss": 0.029296875, |
| "memory(GiB)": 43.05, |
| "step": 9215, |
| "train_speed(iter/s)": 0.621345 |
| }, |
| { |
| "epoch": 5.975372650680493, |
| "grad_norm": 6.71875, |
| "learning_rate": 3.812307870404197e-05, |
| "loss": 0.0380859375, |
| "memory(GiB)": 43.05, |
| "step": 9220, |
| "train_speed(iter/s)": 0.621503 |
| }, |
| { |
| "epoch": 5.978613091380428, |
| "grad_norm": 12.6875, |
| "learning_rate": 3.807103760018392e-05, |
| "loss": 0.040625, |
| "memory(GiB)": 43.05, |
| "step": 9225, |
| "train_speed(iter/s)": 0.621577 |
| }, |
| { |
| "epoch": 5.981853532080363, |
| "grad_norm": 7.21875, |
| "learning_rate": 3.801901019545463e-05, |
| "loss": 0.0498046875, |
| "memory(GiB)": 43.05, |
| "step": 9230, |
| "train_speed(iter/s)": 0.621586 |
| }, |
| { |
| "epoch": 5.985093972780298, |
| "grad_norm": 8.75, |
| "learning_rate": 3.796699654960197e-05, |
| "loss": 0.0251953125, |
| "memory(GiB)": 43.05, |
| "step": 9235, |
| "train_speed(iter/s)": 0.621643 |
| }, |
| { |
| "epoch": 5.988334413480233, |
| "grad_norm": 10.6875, |
| "learning_rate": 3.791499672235799e-05, |
| "loss": 0.0095703125, |
| "memory(GiB)": 43.05, |
| "step": 9240, |
| "train_speed(iter/s)": 0.621758 |
| }, |
| { |
| "epoch": 5.991574854180168, |
| "grad_norm": 13.1875, |
| "learning_rate": 3.786301077343892e-05, |
| "loss": 0.0376953125, |
| "memory(GiB)": 43.05, |
| "step": 9245, |
| "train_speed(iter/s)": 0.62188 |
| }, |
| { |
| "epoch": 5.9948152948801035, |
| "grad_norm": 2.484375, |
| "learning_rate": 3.781103876254503e-05, |
| "loss": 0.030859375, |
| "memory(GiB)": 43.05, |
| "step": 9250, |
| "train_speed(iter/s)": 0.621971 |
| }, |
| { |
| "epoch": 5.998055735580039, |
| "grad_norm": 3.515625, |
| "learning_rate": 3.775908074936053e-05, |
| "loss": 0.02421875, |
| "memory(GiB)": 43.05, |
| "step": 9255, |
| "train_speed(iter/s)": 0.62198 |
| }, |
| { |
| "epoch": 6.001296176279974, |
| "grad_norm": 8.0625, |
| "learning_rate": 3.770713679355364e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 9260, |
| "train_speed(iter/s)": 0.622038 |
| }, |
| { |
| "epoch": 6.00453661697991, |
| "grad_norm": 12.0625, |
| "learning_rate": 3.765520695477642e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 9265, |
| "train_speed(iter/s)": 0.622113 |
| }, |
| { |
| "epoch": 6.007777057679845, |
| "grad_norm": 0.66796875, |
| "learning_rate": 3.760329129266464e-05, |
| "loss": 0.0255859375, |
| "memory(GiB)": 43.05, |
| "step": 9270, |
| "train_speed(iter/s)": 0.622179 |
| }, |
| { |
| "epoch": 6.01101749837978, |
| "grad_norm": 9.0625, |
| "learning_rate": 3.755138986683788e-05, |
| "loss": 0.0140625, |
| "memory(GiB)": 43.05, |
| "step": 9275, |
| "train_speed(iter/s)": 0.622171 |
| }, |
| { |
| "epoch": 6.014257939079715, |
| "grad_norm": 6.625, |
| "learning_rate": 3.749950273689935e-05, |
| "loss": 0.028125, |
| "memory(GiB)": 43.05, |
| "step": 9280, |
| "train_speed(iter/s)": 0.622064 |
| }, |
| { |
| "epoch": 6.01749837977965, |
| "grad_norm": 12.9375, |
| "learning_rate": 3.7447629962435816e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 9285, |
| "train_speed(iter/s)": 0.622156 |
| }, |
| { |
| "epoch": 6.020738820479585, |
| "grad_norm": 3.265625, |
| "learning_rate": 3.739577160301756e-05, |
| "loss": 0.0173828125, |
| "memory(GiB)": 43.05, |
| "step": 9290, |
| "train_speed(iter/s)": 0.622216 |
| }, |
| { |
| "epoch": 6.02397926117952, |
| "grad_norm": 9.0625, |
| "learning_rate": 3.734392771819837e-05, |
| "loss": 0.0181640625, |
| "memory(GiB)": 43.05, |
| "step": 9295, |
| "train_speed(iter/s)": 0.622264 |
| }, |
| { |
| "epoch": 6.0272197018794555, |
| "grad_norm": 13.75, |
| "learning_rate": 3.729209836751531e-05, |
| "loss": 0.041796875, |
| "memory(GiB)": 43.05, |
| "step": 9300, |
| "train_speed(iter/s)": 0.622218 |
| }, |
| { |
| "epoch": 6.030460142579391, |
| "grad_norm": 14.1875, |
| "learning_rate": 3.7240283610488836e-05, |
| "loss": 0.0361328125, |
| "memory(GiB)": 43.05, |
| "step": 9305, |
| "train_speed(iter/s)": 0.62234 |
| }, |
| { |
| "epoch": 6.033700583279326, |
| "grad_norm": 1.21875, |
| "learning_rate": 3.718848350662262e-05, |
| "loss": 0.0189453125, |
| "memory(GiB)": 43.05, |
| "step": 9310, |
| "train_speed(iter/s)": 0.622437 |
| }, |
| { |
| "epoch": 6.036941023979261, |
| "grad_norm": 16.875, |
| "learning_rate": 3.713669811540349e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 9315, |
| "train_speed(iter/s)": 0.622473 |
| }, |
| { |
| "epoch": 6.040181464679196, |
| "grad_norm": 0.73046875, |
| "learning_rate": 3.70849274963014e-05, |
| "loss": 0.01875, |
| "memory(GiB)": 43.05, |
| "step": 9320, |
| "train_speed(iter/s)": 0.622534 |
| }, |
| { |
| "epoch": 6.043421905379131, |
| "grad_norm": 5.15625, |
| "learning_rate": 3.7033171708769324e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 9325, |
| "train_speed(iter/s)": 0.622625 |
| }, |
| { |
| "epoch": 6.046662346079067, |
| "grad_norm": 8.25, |
| "learning_rate": 3.698143081224323e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 9330, |
| "train_speed(iter/s)": 0.622703 |
| }, |
| { |
| "epoch": 6.049902786779002, |
| "grad_norm": 5.6875, |
| "learning_rate": 3.692970486614195e-05, |
| "loss": 0.0279296875, |
| "memory(GiB)": 43.05, |
| "step": 9335, |
| "train_speed(iter/s)": 0.622734 |
| }, |
| { |
| "epoch": 6.053143227478937, |
| "grad_norm": 12.25, |
| "learning_rate": 3.687799392986714e-05, |
| "loss": 0.0294921875, |
| "memory(GiB)": 43.05, |
| "step": 9340, |
| "train_speed(iter/s)": 0.622792 |
| }, |
| { |
| "epoch": 6.0563836681788725, |
| "grad_norm": 4.28125, |
| "learning_rate": 3.6826298062803296e-05, |
| "loss": 0.0205078125, |
| "memory(GiB)": 43.05, |
| "step": 9345, |
| "train_speed(iter/s)": 0.622846 |
| }, |
| { |
| "epoch": 6.059624108878808, |
| "grad_norm": 9.8125, |
| "learning_rate": 3.677461732431751e-05, |
| "loss": 0.040625, |
| "memory(GiB)": 43.05, |
| "step": 9350, |
| "train_speed(iter/s)": 0.62294 |
| }, |
| { |
| "epoch": 6.062864549578743, |
| "grad_norm": 9.875, |
| "learning_rate": 3.672295177375955e-05, |
| "loss": 0.044921875, |
| "memory(GiB)": 43.05, |
| "step": 9355, |
| "train_speed(iter/s)": 0.622956 |
| }, |
| { |
| "epoch": 6.066104990278678, |
| "grad_norm": 0.7890625, |
| "learning_rate": 3.6671301470461776e-05, |
| "loss": 0.02578125, |
| "memory(GiB)": 43.05, |
| "step": 9360, |
| "train_speed(iter/s)": 0.623076 |
| }, |
| { |
| "epoch": 6.069345430978613, |
| "grad_norm": 4.59375, |
| "learning_rate": 3.661966647373895e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 9365, |
| "train_speed(iter/s)": 0.623143 |
| }, |
| { |
| "epoch": 6.072585871678548, |
| "grad_norm": 7.0, |
| "learning_rate": 3.6568046842888326e-05, |
| "loss": 0.017578125, |
| "memory(GiB)": 43.05, |
| "step": 9370, |
| "train_speed(iter/s)": 0.623216 |
| }, |
| { |
| "epoch": 6.075826312378483, |
| "grad_norm": 12.5, |
| "learning_rate": 3.6516442637189496e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 9375, |
| "train_speed(iter/s)": 0.623271 |
| }, |
| { |
| "epoch": 6.079066753078418, |
| "grad_norm": 2.21875, |
| "learning_rate": 3.646485391590433e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 9380, |
| "train_speed(iter/s)": 0.623314 |
| }, |
| { |
| "epoch": 6.0823071937783535, |
| "grad_norm": 2.328125, |
| "learning_rate": 3.64132807382769e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 9385, |
| "train_speed(iter/s)": 0.623427 |
| }, |
| { |
| "epoch": 6.085547634478289, |
| "grad_norm": 0.53125, |
| "learning_rate": 3.6361723163533504e-05, |
| "loss": 0.0203125, |
| "memory(GiB)": 43.05, |
| "step": 9390, |
| "train_speed(iter/s)": 0.623582 |
| }, |
| { |
| "epoch": 6.088788075178225, |
| "grad_norm": 7.0625, |
| "learning_rate": 3.631018125088239e-05, |
| "loss": 0.020703125, |
| "memory(GiB)": 43.05, |
| "step": 9395, |
| "train_speed(iter/s)": 0.623702 |
| }, |
| { |
| "epoch": 6.09202851587816, |
| "grad_norm": 15.0, |
| "learning_rate": 3.625865505951394e-05, |
| "loss": 0.0244140625, |
| "memory(GiB)": 43.05, |
| "step": 9400, |
| "train_speed(iter/s)": 0.62378 |
| }, |
| { |
| "epoch": 6.095268956578095, |
| "grad_norm": 11.375, |
| "learning_rate": 3.620714464860043e-05, |
| "loss": 0.03671875, |
| "memory(GiB)": 43.05, |
| "step": 9405, |
| "train_speed(iter/s)": 0.623872 |
| }, |
| { |
| "epoch": 6.09850939727803, |
| "grad_norm": 2.09375, |
| "learning_rate": 3.615565007729601e-05, |
| "loss": 0.025, |
| "memory(GiB)": 43.05, |
| "step": 9410, |
| "train_speed(iter/s)": 0.623985 |
| }, |
| { |
| "epoch": 6.101749837977965, |
| "grad_norm": 1.1953125, |
| "learning_rate": 3.6104171404736655e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 9415, |
| "train_speed(iter/s)": 0.624073 |
| }, |
| { |
| "epoch": 6.1049902786779, |
| "grad_norm": 11.5625, |
| "learning_rate": 3.6052708690040096e-05, |
| "loss": 0.0310546875, |
| "memory(GiB)": 43.05, |
| "step": 9420, |
| "train_speed(iter/s)": 0.624186 |
| }, |
| { |
| "epoch": 6.108230719377835, |
| "grad_norm": 9.6875, |
| "learning_rate": 3.600126199230568e-05, |
| "loss": 0.03203125, |
| "memory(GiB)": 43.05, |
| "step": 9425, |
| "train_speed(iter/s)": 0.624106 |
| }, |
| { |
| "epoch": 6.1114711600777705, |
| "grad_norm": 14.125, |
| "learning_rate": 3.5949831370614425e-05, |
| "loss": 0.0388671875, |
| "memory(GiB)": 43.05, |
| "step": 9430, |
| "train_speed(iter/s)": 0.624121 |
| }, |
| { |
| "epoch": 6.114711600777706, |
| "grad_norm": 9.25, |
| "learning_rate": 3.589841688402887e-05, |
| "loss": 0.034375, |
| "memory(GiB)": 43.05, |
| "step": 9435, |
| "train_speed(iter/s)": 0.62415 |
| }, |
| { |
| "epoch": 6.117952041477641, |
| "grad_norm": 0.55078125, |
| "learning_rate": 3.5847018591593e-05, |
| "loss": 0.0251953125, |
| "memory(GiB)": 43.05, |
| "step": 9440, |
| "train_speed(iter/s)": 0.62424 |
| }, |
| { |
| "epoch": 6.121192482177576, |
| "grad_norm": 8.8125, |
| "learning_rate": 3.57956365523322e-05, |
| "loss": 0.02265625, |
| "memory(GiB)": 43.05, |
| "step": 9445, |
| "train_speed(iter/s)": 0.624293 |
| }, |
| { |
| "epoch": 6.124432922877511, |
| "grad_norm": 15.4375, |
| "learning_rate": 3.574427082525326e-05, |
| "loss": 0.0205078125, |
| "memory(GiB)": 43.05, |
| "step": 9450, |
| "train_speed(iter/s)": 0.62428 |
| }, |
| { |
| "epoch": 6.127673363577447, |
| "grad_norm": 1.4765625, |
| "learning_rate": 3.569292146934413e-05, |
| "loss": 0.02421875, |
| "memory(GiB)": 43.05, |
| "step": 9455, |
| "train_speed(iter/s)": 0.624332 |
| }, |
| { |
| "epoch": 6.130913804277382, |
| "grad_norm": 0.66015625, |
| "learning_rate": 3.564158854357406e-05, |
| "loss": 0.0216796875, |
| "memory(GiB)": 43.05, |
| "step": 9460, |
| "train_speed(iter/s)": 0.624278 |
| }, |
| { |
| "epoch": 6.134154244977317, |
| "grad_norm": 0.60546875, |
| "learning_rate": 3.559027210689338e-05, |
| "loss": 0.033203125, |
| "memory(GiB)": 43.05, |
| "step": 9465, |
| "train_speed(iter/s)": 0.624252 |
| }, |
| { |
| "epoch": 6.137394685677252, |
| "grad_norm": 4.59375, |
| "learning_rate": 3.553897221823347e-05, |
| "loss": 0.0166015625, |
| "memory(GiB)": 43.05, |
| "step": 9470, |
| "train_speed(iter/s)": 0.624385 |
| }, |
| { |
| "epoch": 6.1406351263771874, |
| "grad_norm": 13.75, |
| "learning_rate": 3.5487688936506735e-05, |
| "loss": 0.0185546875, |
| "memory(GiB)": 43.05, |
| "step": 9475, |
| "train_speed(iter/s)": 0.624414 |
| }, |
| { |
| "epoch": 6.143875567077123, |
| "grad_norm": 12.5, |
| "learning_rate": 3.543642232060652e-05, |
| "loss": 0.03515625, |
| "memory(GiB)": 43.05, |
| "step": 9480, |
| "train_speed(iter/s)": 0.624493 |
| }, |
| { |
| "epoch": 6.147116007777058, |
| "grad_norm": 1.7421875, |
| "learning_rate": 3.538517242940699e-05, |
| "loss": 0.0341796875, |
| "memory(GiB)": 43.05, |
| "step": 9485, |
| "train_speed(iter/s)": 0.624511 |
| }, |
| { |
| "epoch": 6.150356448476993, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.5333939321763135e-05, |
| "loss": 0.047265625, |
| "memory(GiB)": 43.05, |
| "step": 9490, |
| "train_speed(iter/s)": 0.624664 |
| }, |
| { |
| "epoch": 6.153596889176928, |
| "grad_norm": 9.5625, |
| "learning_rate": 3.528272305651069e-05, |
| "loss": 0.051953125, |
| "memory(GiB)": 43.05, |
| "step": 9495, |
| "train_speed(iter/s)": 0.624756 |
| }, |
| { |
| "epoch": 6.156837329876863, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.523152369246596e-05, |
| "loss": 0.0138671875, |
| "memory(GiB)": 43.05, |
| "step": 9500, |
| "train_speed(iter/s)": 0.624885 |
| }, |
| { |
| "epoch": 6.160077770576798, |
| "grad_norm": 0.640625, |
| "learning_rate": 3.5180341288425945e-05, |
| "loss": 0.033203125, |
| "memory(GiB)": 43.05, |
| "step": 9505, |
| "train_speed(iter/s)": 0.624944 |
| }, |
| { |
| "epoch": 6.163318211276733, |
| "grad_norm": 7.1875, |
| "learning_rate": 3.512917590316812e-05, |
| "loss": 0.0203125, |
| "memory(GiB)": 43.05, |
| "step": 9510, |
| "train_speed(iter/s)": 0.625037 |
| }, |
| { |
| "epoch": 6.166558651976668, |
| "grad_norm": 0.703125, |
| "learning_rate": 3.5078027595450405e-05, |
| "loss": 0.02578125, |
| "memory(GiB)": 43.05, |
| "step": 9515, |
| "train_speed(iter/s)": 0.62499 |
| }, |
| { |
| "epoch": 6.169799092676604, |
| "grad_norm": 12.3125, |
| "learning_rate": 3.502689642401114e-05, |
| "loss": 0.021875, |
| "memory(GiB)": 43.05, |
| "step": 9520, |
| "train_speed(iter/s)": 0.625114 |
| }, |
| { |
| "epoch": 6.1730395333765395, |
| "grad_norm": 8.3125, |
| "learning_rate": 3.497578244756897e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 9525, |
| "train_speed(iter/s)": 0.625097 |
| }, |
| { |
| "epoch": 6.176279974076475, |
| "grad_norm": 0.5546875, |
| "learning_rate": 3.492468572482278e-05, |
| "loss": 0.04248046875, |
| "memory(GiB)": 43.05, |
| "step": 9530, |
| "train_speed(iter/s)": 0.625122 |
| }, |
| { |
| "epoch": 6.17952041477641, |
| "grad_norm": 0.625, |
| "learning_rate": 3.487360631445165e-05, |
| "loss": 0.01953125, |
| "memory(GiB)": 43.05, |
| "step": 9535, |
| "train_speed(iter/s)": 0.625178 |
| }, |
| { |
| "epoch": 6.182760855476345, |
| "grad_norm": 13.0, |
| "learning_rate": 3.4822544275114805e-05, |
| "loss": 0.0453125, |
| "memory(GiB)": 43.05, |
| "step": 9540, |
| "train_speed(iter/s)": 0.625186 |
| }, |
| { |
| "epoch": 6.18600129617628, |
| "grad_norm": 12.5, |
| "learning_rate": 3.477149966545147e-05, |
| "loss": 0.0228515625, |
| "memory(GiB)": 43.05, |
| "step": 9545, |
| "train_speed(iter/s)": 0.625236 |
| }, |
| { |
| "epoch": 6.189241736876215, |
| "grad_norm": 12.75, |
| "learning_rate": 3.4720472544080905e-05, |
| "loss": 0.019140625, |
| "memory(GiB)": 43.05, |
| "step": 9550, |
| "train_speed(iter/s)": 0.625287 |
| }, |
| { |
| "epoch": 6.19248217757615, |
| "grad_norm": 1.765625, |
| "learning_rate": 3.4669462969602274e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 9555, |
| "train_speed(iter/s)": 0.625367 |
| }, |
| { |
| "epoch": 6.195722618276085, |
| "grad_norm": 8.375, |
| "learning_rate": 3.461847100059454e-05, |
| "loss": 0.0345703125, |
| "memory(GiB)": 43.05, |
| "step": 9560, |
| "train_speed(iter/s)": 0.625374 |
| }, |
| { |
| "epoch": 6.1989630589760205, |
| "grad_norm": 12.8125, |
| "learning_rate": 3.456749669561651e-05, |
| "loss": 0.04140625, |
| "memory(GiB)": 43.05, |
| "step": 9565, |
| "train_speed(iter/s)": 0.625358 |
| }, |
| { |
| "epoch": 6.202203499675956, |
| "grad_norm": 5.03125, |
| "learning_rate": 3.4516540113206695e-05, |
| "loss": 0.02421875, |
| "memory(GiB)": 43.05, |
| "step": 9570, |
| "train_speed(iter/s)": 0.625508 |
| }, |
| { |
| "epoch": 6.205443940375891, |
| "grad_norm": 11.75, |
| "learning_rate": 3.446560131188323e-05, |
| "loss": 0.0287109375, |
| "memory(GiB)": 43.05, |
| "step": 9575, |
| "train_speed(iter/s)": 0.625637 |
| }, |
| { |
| "epoch": 6.208684381075827, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.4414680350143843e-05, |
| "loss": 0.0419921875, |
| "memory(GiB)": 43.05, |
| "step": 9580, |
| "train_speed(iter/s)": 0.625628 |
| }, |
| { |
| "epoch": 6.211924821775762, |
| "grad_norm": 1.203125, |
| "learning_rate": 3.4363777286465806e-05, |
| "loss": 0.033203125, |
| "memory(GiB)": 43.05, |
| "step": 9585, |
| "train_speed(iter/s)": 0.625683 |
| }, |
| { |
| "epoch": 6.215165262475697, |
| "grad_norm": 12.0, |
| "learning_rate": 3.431289217930575e-05, |
| "loss": 0.0359375, |
| "memory(GiB)": 43.05, |
| "step": 9590, |
| "train_speed(iter/s)": 0.625761 |
| }, |
| { |
| "epoch": 6.218405703175632, |
| "grad_norm": 2.515625, |
| "learning_rate": 3.426202508709976e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 9595, |
| "train_speed(iter/s)": 0.625794 |
| }, |
| { |
| "epoch": 6.221646143875567, |
| "grad_norm": 11.5625, |
| "learning_rate": 3.421117606826324e-05, |
| "loss": 0.036328125, |
| "memory(GiB)": 43.05, |
| "step": 9600, |
| "train_speed(iter/s)": 0.62594 |
| }, |
| { |
| "epoch": 6.224886584575502, |
| "grad_norm": 2.65625, |
| "learning_rate": 3.4160345181190805e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 9605, |
| "train_speed(iter/s)": 0.62599 |
| }, |
| { |
| "epoch": 6.2281270252754375, |
| "grad_norm": 14.625, |
| "learning_rate": 3.4109532484256234e-05, |
| "loss": 0.0287109375, |
| "memory(GiB)": 43.05, |
| "step": 9610, |
| "train_speed(iter/s)": 0.626052 |
| }, |
| { |
| "epoch": 6.231367465975373, |
| "grad_norm": 1.3203125, |
| "learning_rate": 3.405873803581244e-05, |
| "loss": 0.0369140625, |
| "memory(GiB)": 43.05, |
| "step": 9615, |
| "train_speed(iter/s)": 0.626201 |
| }, |
| { |
| "epoch": 6.234607906675308, |
| "grad_norm": 12.375, |
| "learning_rate": 3.400796189419141e-05, |
| "loss": 0.0201171875, |
| "memory(GiB)": 43.05, |
| "step": 9620, |
| "train_speed(iter/s)": 0.62628 |
| }, |
| { |
| "epoch": 6.237848347375243, |
| "grad_norm": 1.9140625, |
| "learning_rate": 3.3957204117704035e-05, |
| "loss": 0.0248046875, |
| "memory(GiB)": 43.05, |
| "step": 9625, |
| "train_speed(iter/s)": 0.626313 |
| }, |
| { |
| "epoch": 6.241088788075178, |
| "grad_norm": 0.921875, |
| "learning_rate": 3.390646476464017e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 9630, |
| "train_speed(iter/s)": 0.626415 |
| }, |
| { |
| "epoch": 6.244329228775113, |
| "grad_norm": 3.71875, |
| "learning_rate": 3.385574389326852e-05, |
| "loss": 0.029296875, |
| "memory(GiB)": 43.05, |
| "step": 9635, |
| "train_speed(iter/s)": 0.626423 |
| }, |
| { |
| "epoch": 6.247569669475048, |
| "grad_norm": 0.71875, |
| "learning_rate": 3.3805041561836505e-05, |
| "loss": 0.02890625, |
| "memory(GiB)": 43.05, |
| "step": 9640, |
| "train_speed(iter/s)": 0.626412 |
| }, |
| { |
| "epoch": 6.250810110174983, |
| "grad_norm": 5.03125, |
| "learning_rate": 3.375435782857032e-05, |
| "loss": 0.0197265625, |
| "memory(GiB)": 43.05, |
| "step": 9645, |
| "train_speed(iter/s)": 0.626515 |
| }, |
| { |
| "epoch": 6.254050550874919, |
| "grad_norm": 13.0625, |
| "learning_rate": 3.370369275167476e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 9650, |
| "train_speed(iter/s)": 0.626461 |
| }, |
| { |
| "epoch": 6.2572909915748545, |
| "grad_norm": 11.625, |
| "learning_rate": 3.365304638933322e-05, |
| "loss": 0.0185546875, |
| "memory(GiB)": 43.05, |
| "step": 9655, |
| "train_speed(iter/s)": 0.626524 |
| }, |
| { |
| "epoch": 6.26053143227479, |
| "grad_norm": 10.625, |
| "learning_rate": 3.360241879970759e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 9660, |
| "train_speed(iter/s)": 0.626677 |
| }, |
| { |
| "epoch": 6.263771872974725, |
| "grad_norm": 6.84375, |
| "learning_rate": 3.355181004093823e-05, |
| "loss": 0.032421875, |
| "memory(GiB)": 43.05, |
| "step": 9665, |
| "train_speed(iter/s)": 0.626686 |
| }, |
| { |
| "epoch": 6.26701231367466, |
| "grad_norm": 7.875, |
| "learning_rate": 3.3501220171143785e-05, |
| "loss": 0.0283203125, |
| "memory(GiB)": 43.05, |
| "step": 9670, |
| "train_speed(iter/s)": 0.626702 |
| }, |
| { |
| "epoch": 6.270252754374595, |
| "grad_norm": 9.6875, |
| "learning_rate": 3.345064924842133e-05, |
| "loss": 0.0169921875, |
| "memory(GiB)": 43.05, |
| "step": 9675, |
| "train_speed(iter/s)": 0.62685 |
| }, |
| { |
| "epoch": 6.27349319507453, |
| "grad_norm": 5.28125, |
| "learning_rate": 3.340009733084611e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 9680, |
| "train_speed(iter/s)": 0.626918 |
| }, |
| { |
| "epoch": 6.276733635774465, |
| "grad_norm": 15.5, |
| "learning_rate": 3.334956447647155e-05, |
| "loss": 0.0228515625, |
| "memory(GiB)": 43.05, |
| "step": 9685, |
| "train_speed(iter/s)": 0.626933 |
| }, |
| { |
| "epoch": 6.2799740764744, |
| "grad_norm": 14.8125, |
| "learning_rate": 3.32990507433292e-05, |
| "loss": 0.0298828125, |
| "memory(GiB)": 43.05, |
| "step": 9690, |
| "train_speed(iter/s)": 0.627045 |
| }, |
| { |
| "epoch": 6.283214517174335, |
| "grad_norm": 4.15625, |
| "learning_rate": 3.324855618942865e-05, |
| "loss": 0.023046875, |
| "memory(GiB)": 43.05, |
| "step": 9695, |
| "train_speed(iter/s)": 0.627127 |
| }, |
| { |
| "epoch": 6.2864549578742706, |
| "grad_norm": 1.3046875, |
| "learning_rate": 3.319808087275743e-05, |
| "loss": 0.0318359375, |
| "memory(GiB)": 43.05, |
| "step": 9700, |
| "train_speed(iter/s)": 0.627239 |
| }, |
| { |
| "epoch": 6.289695398574206, |
| "grad_norm": 0.7109375, |
| "learning_rate": 3.314762485128102e-05, |
| "loss": 0.0203125, |
| "memory(GiB)": 43.05, |
| "step": 9705, |
| "train_speed(iter/s)": 0.627349 |
| }, |
| { |
| "epoch": 6.292935839274142, |
| "grad_norm": 10.5, |
| "learning_rate": 3.309718818294275e-05, |
| "loss": 0.028515625, |
| "memory(GiB)": 43.05, |
| "step": 9710, |
| "train_speed(iter/s)": 0.62746 |
| }, |
| { |
| "epoch": 6.296176279974077, |
| "grad_norm": 1.9375, |
| "learning_rate": 3.304677092566367e-05, |
| "loss": 0.0375, |
| "memory(GiB)": 43.05, |
| "step": 9715, |
| "train_speed(iter/s)": 0.627494 |
| }, |
| { |
| "epoch": 6.299416720674012, |
| "grad_norm": 3.578125, |
| "learning_rate": 3.299637313734258e-05, |
| "loss": 0.037109375, |
| "memory(GiB)": 43.05, |
| "step": 9720, |
| "train_speed(iter/s)": 0.627573 |
| }, |
| { |
| "epoch": 6.302657161373947, |
| "grad_norm": 10.3125, |
| "learning_rate": 3.294599487585594e-05, |
| "loss": 0.0275390625, |
| "memory(GiB)": 43.05, |
| "step": 9725, |
| "train_speed(iter/s)": 0.627566 |
| }, |
| { |
| "epoch": 6.305897602073882, |
| "grad_norm": 15.5625, |
| "learning_rate": 3.289563619905771e-05, |
| "loss": 0.0291015625, |
| "memory(GiB)": 43.05, |
| "step": 9730, |
| "train_speed(iter/s)": 0.627585 |
| }, |
| { |
| "epoch": 6.309138042773817, |
| "grad_norm": 14.6875, |
| "learning_rate": 3.2845297164779446e-05, |
| "loss": 0.0181640625, |
| "memory(GiB)": 43.05, |
| "step": 9735, |
| "train_speed(iter/s)": 0.627593 |
| }, |
| { |
| "epoch": 6.312378483473752, |
| "grad_norm": 11.5625, |
| "learning_rate": 3.2794977830830085e-05, |
| "loss": 0.0267578125, |
| "memory(GiB)": 43.05, |
| "step": 9740, |
| "train_speed(iter/s)": 0.627519 |
| }, |
| { |
| "epoch": 6.3156189241736875, |
| "grad_norm": 2.28125, |
| "learning_rate": 3.2744678254995974e-05, |
| "loss": 0.0220703125, |
| "memory(GiB)": 43.05, |
| "step": 9745, |
| "train_speed(iter/s)": 0.627617 |
| }, |
| { |
| "epoch": 6.318859364873623, |
| "grad_norm": 1.109375, |
| "learning_rate": 3.269439849504075e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 9750, |
| "train_speed(iter/s)": 0.627748 |
| }, |
| { |
| "epoch": 6.322099805573558, |
| "grad_norm": 9.875, |
| "learning_rate": 3.264413860870535e-05, |
| "loss": 0.010546875, |
| "memory(GiB)": 43.05, |
| "step": 9755, |
| "train_speed(iter/s)": 0.627816 |
| }, |
| { |
| "epoch": 6.325340246273493, |
| "grad_norm": 12.3125, |
| "learning_rate": 3.2593898653707775e-05, |
| "loss": 0.0203125, |
| "memory(GiB)": 43.05, |
| "step": 9760, |
| "train_speed(iter/s)": 0.62786 |
| }, |
| { |
| "epoch": 6.328580686973428, |
| "grad_norm": 4.59375, |
| "learning_rate": 3.254367868774322e-05, |
| "loss": 0.0087890625, |
| "memory(GiB)": 43.05, |
| "step": 9765, |
| "train_speed(iter/s)": 0.627947 |
| }, |
| { |
| "epoch": 6.331821127673363, |
| "grad_norm": 2.828125, |
| "learning_rate": 3.249347876848395e-05, |
| "loss": 0.025390625, |
| "memory(GiB)": 43.05, |
| "step": 9770, |
| "train_speed(iter/s)": 0.627957 |
| }, |
| { |
| "epoch": 6.335061568373299, |
| "grad_norm": 2.078125, |
| "learning_rate": 3.244329895357912e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 9775, |
| "train_speed(iter/s)": 0.628084 |
| }, |
| { |
| "epoch": 6.338302009073234, |
| "grad_norm": 12.4375, |
| "learning_rate": 3.239313930065484e-05, |
| "loss": 0.03828125, |
| "memory(GiB)": 43.05, |
| "step": 9780, |
| "train_speed(iter/s)": 0.628116 |
| }, |
| { |
| "epoch": 6.341542449773169, |
| "grad_norm": 10.1875, |
| "learning_rate": 3.234299986731412e-05, |
| "loss": 0.0392578125, |
| "memory(GiB)": 43.05, |
| "step": 9785, |
| "train_speed(iter/s)": 0.628147 |
| }, |
| { |
| "epoch": 6.3447828904731045, |
| "grad_norm": 12.3125, |
| "learning_rate": 3.2292880711136644e-05, |
| "loss": 0.0236328125, |
| "memory(GiB)": 43.05, |
| "step": 9790, |
| "train_speed(iter/s)": 0.628274 |
| }, |
| { |
| "epoch": 6.34802333117304, |
| "grad_norm": 8.625, |
| "learning_rate": 3.22427818896789e-05, |
| "loss": 0.0396484375, |
| "memory(GiB)": 43.05, |
| "step": 9795, |
| "train_speed(iter/s)": 0.62824 |
| }, |
| { |
| "epoch": 6.351263771872975, |
| "grad_norm": 3.59375, |
| "learning_rate": 3.2192703460473994e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 9800, |
| "train_speed(iter/s)": 0.628324 |
| }, |
| { |
| "epoch": 6.35450421257291, |
| "grad_norm": 1.359375, |
| "learning_rate": 3.214264548103158e-05, |
| "loss": 0.0265625, |
| "memory(GiB)": 43.05, |
| "step": 9805, |
| "train_speed(iter/s)": 0.628438 |
| }, |
| { |
| "epoch": 6.357744653272845, |
| "grad_norm": 15.5, |
| "learning_rate": 3.2092608008837874e-05, |
| "loss": 0.041015625, |
| "memory(GiB)": 43.05, |
| "step": 9810, |
| "train_speed(iter/s)": 0.628488 |
| }, |
| { |
| "epoch": 6.36098509397278, |
| "grad_norm": 12.25, |
| "learning_rate": 3.204259110135553e-05, |
| "loss": 0.0349609375, |
| "memory(GiB)": 43.05, |
| "step": 9815, |
| "train_speed(iter/s)": 0.628557 |
| }, |
| { |
| "epoch": 6.364225534672715, |
| "grad_norm": 3.28125, |
| "learning_rate": 3.1992594816023565e-05, |
| "loss": 0.025390625, |
| "memory(GiB)": 43.05, |
| "step": 9820, |
| "train_speed(iter/s)": 0.628668 |
| }, |
| { |
| "epoch": 6.36746597537265, |
| "grad_norm": 10.4375, |
| "learning_rate": 3.194261921025734e-05, |
| "loss": 0.0185546875, |
| "memory(GiB)": 43.05, |
| "step": 9825, |
| "train_speed(iter/s)": 0.628799 |
| }, |
| { |
| "epoch": 6.3707064160725855, |
| "grad_norm": 11.6875, |
| "learning_rate": 3.189266434144847e-05, |
| "loss": 0.015625, |
| "memory(GiB)": 43.05, |
| "step": 9830, |
| "train_speed(iter/s)": 0.628841 |
| }, |
| { |
| "epoch": 6.3739468567725215, |
| "grad_norm": 7.84375, |
| "learning_rate": 3.18427302669647e-05, |
| "loss": 0.01875, |
| "memory(GiB)": 43.05, |
| "step": 9835, |
| "train_speed(iter/s)": 0.62893 |
| }, |
| { |
| "epoch": 6.377187297472457, |
| "grad_norm": 3.65625, |
| "learning_rate": 3.179281704414998e-05, |
| "loss": 0.01875, |
| "memory(GiB)": 43.05, |
| "step": 9840, |
| "train_speed(iter/s)": 0.629004 |
| }, |
| { |
| "epoch": 6.380427738172392, |
| "grad_norm": 4.78125, |
| "learning_rate": 3.174292473032426e-05, |
| "loss": 0.0193359375, |
| "memory(GiB)": 43.05, |
| "step": 9845, |
| "train_speed(iter/s)": 0.629108 |
| }, |
| { |
| "epoch": 6.383668178872327, |
| "grad_norm": 6.25, |
| "learning_rate": 3.1693053382783474e-05, |
| "loss": 0.0271484375, |
| "memory(GiB)": 43.05, |
| "step": 9850, |
| "train_speed(iter/s)": 0.629189 |
| }, |
| { |
| "epoch": 6.386908619572262, |
| "grad_norm": 9.0, |
| "learning_rate": 3.16432030587995e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 9855, |
| "train_speed(iter/s)": 0.629275 |
| }, |
| { |
| "epoch": 6.390149060272197, |
| "grad_norm": 2.703125, |
| "learning_rate": 3.1593373815620094e-05, |
| "loss": 0.0330078125, |
| "memory(GiB)": 43.05, |
| "step": 9860, |
| "train_speed(iter/s)": 0.629324 |
| }, |
| { |
| "epoch": 6.393389500972132, |
| "grad_norm": 11.9375, |
| "learning_rate": 3.1543565710468744e-05, |
| "loss": 0.02265625, |
| "memory(GiB)": 43.05, |
| "step": 9865, |
| "train_speed(iter/s)": 0.629472 |
| }, |
| { |
| "epoch": 6.396629941672067, |
| "grad_norm": 1.828125, |
| "learning_rate": 3.1493778800544696e-05, |
| "loss": 0.0205078125, |
| "memory(GiB)": 43.05, |
| "step": 9870, |
| "train_speed(iter/s)": 0.629602 |
| }, |
| { |
| "epoch": 6.3998703823720025, |
| "grad_norm": 1.46875, |
| "learning_rate": 3.144401314302287e-05, |
| "loss": 0.052734375, |
| "memory(GiB)": 43.05, |
| "step": 9875, |
| "train_speed(iter/s)": 0.62967 |
| }, |
| { |
| "epoch": 6.403110823071938, |
| "grad_norm": 1.71875, |
| "learning_rate": 3.139426879505377e-05, |
| "loss": 0.019921875, |
| "memory(GiB)": 43.05, |
| "step": 9880, |
| "train_speed(iter/s)": 0.629779 |
| }, |
| { |
| "epoch": 6.406351263771873, |
| "grad_norm": 11.3125, |
| "learning_rate": 3.13445458137634e-05, |
| "loss": 0.0345703125, |
| "memory(GiB)": 43.05, |
| "step": 9885, |
| "train_speed(iter/s)": 0.629868 |
| }, |
| { |
| "epoch": 6.409591704471808, |
| "grad_norm": 1.515625, |
| "learning_rate": 3.129484425625326e-05, |
| "loss": 0.0259765625, |
| "memory(GiB)": 43.05, |
| "step": 9890, |
| "train_speed(iter/s)": 0.629956 |
| }, |
| { |
| "epoch": 6.412832145171743, |
| "grad_norm": 2.1875, |
| "learning_rate": 3.1245164179600264e-05, |
| "loss": 0.03984375, |
| "memory(GiB)": 43.05, |
| "step": 9895, |
| "train_speed(iter/s)": 0.630071 |
| }, |
| { |
| "epoch": 6.416072585871679, |
| "grad_norm": 5.90625, |
| "learning_rate": 3.119550564085658e-05, |
| "loss": 0.020703125, |
| "memory(GiB)": 43.05, |
| "step": 9900, |
| "train_speed(iter/s)": 0.630122 |
| }, |
| { |
| "epoch": 6.419313026571614, |
| "grad_norm": 8.0625, |
| "learning_rate": 3.114586869704972e-05, |
| "loss": 0.0275390625, |
| "memory(GiB)": 43.05, |
| "step": 9905, |
| "train_speed(iter/s)": 0.630186 |
| }, |
| { |
| "epoch": 6.422553467271549, |
| "grad_norm": 3.3125, |
| "learning_rate": 3.109625340518237e-05, |
| "loss": 0.0400390625, |
| "memory(GiB)": 43.05, |
| "step": 9910, |
| "train_speed(iter/s)": 0.630236 |
| }, |
| { |
| "epoch": 6.425793907971484, |
| "grad_norm": 4.21875, |
| "learning_rate": 3.104665982223234e-05, |
| "loss": 0.0357421875, |
| "memory(GiB)": 43.05, |
| "step": 9915, |
| "train_speed(iter/s)": 0.630251 |
| }, |
| { |
| "epoch": 6.429034348671419, |
| "grad_norm": 8.4375, |
| "learning_rate": 3.0997088005152524e-05, |
| "loss": 0.0353515625, |
| "memory(GiB)": 43.05, |
| "step": 9920, |
| "train_speed(iter/s)": 0.63032 |
| }, |
| { |
| "epoch": 6.4322747893713546, |
| "grad_norm": 12.5625, |
| "learning_rate": 3.094753801087083e-05, |
| "loss": 0.025, |
| "memory(GiB)": 43.05, |
| "step": 9925, |
| "train_speed(iter/s)": 0.630287 |
| }, |
| { |
| "epoch": 6.43551523007129, |
| "grad_norm": 2.4375, |
| "learning_rate": 3.0898009896290074e-05, |
| "loss": 0.04140625, |
| "memory(GiB)": 43.05, |
| "step": 9930, |
| "train_speed(iter/s)": 0.63037 |
| }, |
| { |
| "epoch": 6.438755670771225, |
| "grad_norm": 2.828125, |
| "learning_rate": 3.084850371828796e-05, |
| "loss": 0.02421875, |
| "memory(GiB)": 43.05, |
| "step": 9935, |
| "train_speed(iter/s)": 0.630399 |
| }, |
| { |
| "epoch": 6.44199611147116, |
| "grad_norm": 13.75, |
| "learning_rate": 3.0799019533717025e-05, |
| "loss": 0.0150390625, |
| "memory(GiB)": 43.05, |
| "step": 9940, |
| "train_speed(iter/s)": 0.630535 |
| }, |
| { |
| "epoch": 6.445236552171095, |
| "grad_norm": 10.5625, |
| "learning_rate": 3.074955739940449e-05, |
| "loss": 0.030078125, |
| "memory(GiB)": 43.05, |
| "step": 9945, |
| "train_speed(iter/s)": 0.630644 |
| }, |
| { |
| "epoch": 6.44847699287103, |
| "grad_norm": 12.0625, |
| "learning_rate": 3.0700117372152315e-05, |
| "loss": 0.0470703125, |
| "memory(GiB)": 43.05, |
| "step": 9950, |
| "train_speed(iter/s)": 0.630719 |
| }, |
| { |
| "epoch": 6.451717433570965, |
| "grad_norm": 6.53125, |
| "learning_rate": 3.0650699508737046e-05, |
| "loss": 0.0326171875, |
| "memory(GiB)": 43.05, |
| "step": 9955, |
| "train_speed(iter/s)": 0.630822 |
| }, |
| { |
| "epoch": 6.454957874270901, |
| "grad_norm": 6.34375, |
| "learning_rate": 3.060130386590977e-05, |
| "loss": 0.0185546875, |
| "memory(GiB)": 43.05, |
| "step": 9960, |
| "train_speed(iter/s)": 0.630799 |
| }, |
| { |
| "epoch": 6.458198314970836, |
| "grad_norm": 16.125, |
| "learning_rate": 3.055193050039607e-05, |
| "loss": 0.022265625, |
| "memory(GiB)": 43.05, |
| "step": 9965, |
| "train_speed(iter/s)": 0.630819 |
| }, |
| { |
| "epoch": 6.4614387556707715, |
| "grad_norm": 1.1953125, |
| "learning_rate": 3.0502579468895943e-05, |
| "loss": 0.026171875, |
| "memory(GiB)": 43.05, |
| "step": 9970, |
| "train_speed(iter/s)": 0.630866 |
| }, |
| { |
| "epoch": 6.464679196370707, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.0453250828083718e-05, |
| "loss": 0.034765625, |
| "memory(GiB)": 43.05, |
| "step": 9975, |
| "train_speed(iter/s)": 0.63096 |
| }, |
| { |
| "epoch": 6.467919637070642, |
| "grad_norm": 4.40625, |
| "learning_rate": 3.0403944634608034e-05, |
| "loss": 0.0302734375, |
| "memory(GiB)": 43.05, |
| "step": 9980, |
| "train_speed(iter/s)": 0.631083 |
| }, |
| { |
| "epoch": 6.471160077770577, |
| "grad_norm": 6.65625, |
| "learning_rate": 3.0354660945091763e-05, |
| "loss": 0.02265625, |
| "memory(GiB)": 43.05, |
| "step": 9985, |
| "train_speed(iter/s)": 0.631167 |
| }, |
| { |
| "epoch": 6.474400518470512, |
| "grad_norm": 1.3125, |
| "learning_rate": 3.0305399816131884e-05, |
| "loss": 0.0193359375, |
| "memory(GiB)": 43.05, |
| "step": 9990, |
| "train_speed(iter/s)": 0.631197 |
| }, |
| { |
| "epoch": 6.477640959170447, |
| "grad_norm": 0.9375, |
| "learning_rate": 3.0256161304299514e-05, |
| "loss": 0.0197265625, |
| "memory(GiB)": 43.05, |
| "step": 9995, |
| "train_speed(iter/s)": 0.63122 |
| }, |
| { |
| "epoch": 6.480881399870382, |
| "grad_norm": 2.015625, |
| "learning_rate": 3.0206945466139812e-05, |
| "loss": 0.033984375, |
| "memory(GiB)": 43.05, |
| "step": 10000, |
| "train_speed(iter/s)": 0.631328 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 15430, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": true, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.478228814502298e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|