| { | |
| "best_metric": 4.79392624, | |
| "best_model_checkpoint": "/mnt/bn/haiyang-dataset-lq/medical/outputde2d/qwen2-vl-2b-instruct/v1-20241108-205643/checkpoint-500", | |
| "epoch": 49.31506849315068, | |
| "eval_steps": 500, | |
| "global_step": 1800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "acc": 0.85866278, | |
| "epoch": 0.0273972602739726, | |
| "grad_norm": 11.529897689819336, | |
| "learning_rate": 0.0, | |
| "loss": 0.41227522, | |
| "memory(GiB)": 12.7, | |
| "step": 1, | |
| "train_speed(iter/s)": 0.042692 | |
| }, | |
| { | |
| "acc": 0.82054573, | |
| "epoch": 0.136986301369863, | |
| "grad_norm": 13.506338119506836, | |
| "learning_rate": 3.576679971701948e-06, | |
| "loss": 0.50167066, | |
| "memory(GiB)": 14.16, | |
| "step": 5, | |
| "train_speed(iter/s)": 0.146289 | |
| }, | |
| { | |
| "acc": 0.87029715, | |
| "epoch": 0.273972602739726, | |
| "grad_norm": 11.584628105163574, | |
| "learning_rate": 5.117072191244584e-06, | |
| "loss": 0.41271429, | |
| "memory(GiB)": 14.16, | |
| "step": 10, | |
| "train_speed(iter/s)": 0.206768 | |
| }, | |
| { | |
| "acc": 0.86857662, | |
| "epoch": 0.410958904109589, | |
| "grad_norm": 17.546506881713867, | |
| "learning_rate": 6.018143876079656e-06, | |
| "loss": 0.39275663, | |
| "memory(GiB)": 14.16, | |
| "step": 15, | |
| "train_speed(iter/s)": 0.242483 | |
| }, | |
| { | |
| "acc": 0.86033154, | |
| "epoch": 0.547945205479452, | |
| "grad_norm": 19.42036247253418, | |
| "learning_rate": 6.65746441078722e-06, | |
| "loss": 0.42753201, | |
| "memory(GiB)": 14.16, | |
| "step": 20, | |
| "train_speed(iter/s)": 0.263319 | |
| }, | |
| { | |
| "acc": 0.8581007, | |
| "epoch": 0.684931506849315, | |
| "grad_norm": 17.666065216064453, | |
| "learning_rate": 7.153359943403896e-06, | |
| "loss": 0.43485794, | |
| "memory(GiB)": 14.16, | |
| "step": 25, | |
| "train_speed(iter/s)": 0.277966 | |
| }, | |
| { | |
| "acc": 0.85695076, | |
| "epoch": 0.821917808219178, | |
| "grad_norm": 12.121685028076172, | |
| "learning_rate": 7.558536095622292e-06, | |
| "loss": 0.42965946, | |
| "memory(GiB)": 14.16, | |
| "step": 30, | |
| "train_speed(iter/s)": 0.289626 | |
| }, | |
| { | |
| "acc": 0.85145502, | |
| "epoch": 0.958904109589041, | |
| "grad_norm": 12.511621475219727, | |
| "learning_rate": 7.901107651134205e-06, | |
| "loss": 0.45605674, | |
| "memory(GiB)": 14.16, | |
| "step": 35, | |
| "train_speed(iter/s)": 0.297359 | |
| }, | |
| { | |
| "acc": 0.88358593, | |
| "epoch": 1.095890410958904, | |
| "grad_norm": 10.614742279052734, | |
| "learning_rate": 8.197856630329855e-06, | |
| "loss": 0.36642389, | |
| "memory(GiB)": 14.16, | |
| "step": 40, | |
| "train_speed(iter/s)": 0.304027 | |
| }, | |
| { | |
| "acc": 0.87005787, | |
| "epoch": 1.2328767123287672, | |
| "grad_norm": 12.311365127563477, | |
| "learning_rate": 8.459607780457364e-06, | |
| "loss": 0.43741484, | |
| "memory(GiB)": 14.16, | |
| "step": 45, | |
| "train_speed(iter/s)": 0.309632 | |
| }, | |
| { | |
| "acc": 0.87604589, | |
| "epoch": 1.36986301369863, | |
| "grad_norm": 13.369311332702637, | |
| "learning_rate": 8.693752162946532e-06, | |
| "loss": 0.39061749, | |
| "memory(GiB)": 14.16, | |
| "step": 50, | |
| "train_speed(iter/s)": 0.313752 | |
| }, | |
| { | |
| "acc": 0.89183826, | |
| "epoch": 1.5068493150684932, | |
| "grad_norm": 13.052772521972656, | |
| "learning_rate": 8.905561521090629e-06, | |
| "loss": 0.34727774, | |
| "memory(GiB)": 14.16, | |
| "step": 55, | |
| "train_speed(iter/s)": 0.317026 | |
| }, | |
| { | |
| "acc": 0.88800755, | |
| "epoch": 1.643835616438356, | |
| "grad_norm": 14.654471397399902, | |
| "learning_rate": 9.098928315164927e-06, | |
| "loss": 0.34038644, | |
| "memory(GiB)": 14.16, | |
| "step": 60, | |
| "train_speed(iter/s)": 0.320617 | |
| }, | |
| { | |
| "acc": 0.88333483, | |
| "epoch": 1.7808219178082192, | |
| "grad_norm": 12.527990341186523, | |
| "learning_rate": 9.27680852241303e-06, | |
| "loss": 0.34145203, | |
| "memory(GiB)": 14.16, | |
| "step": 65, | |
| "train_speed(iter/s)": 0.323067 | |
| }, | |
| { | |
| "acc": 0.88713379, | |
| "epoch": 1.9178082191780823, | |
| "grad_norm": 13.173103332519531, | |
| "learning_rate": 9.441499870676842e-06, | |
| "loss": 0.3459826, | |
| "memory(GiB)": 14.16, | |
| "step": 70, | |
| "train_speed(iter/s)": 0.326063 | |
| }, | |
| { | |
| "acc": 0.90485744, | |
| "epoch": 2.0547945205479454, | |
| "grad_norm": 11.286486625671387, | |
| "learning_rate": 9.594823847781604e-06, | |
| "loss": 0.29746895, | |
| "memory(GiB)": 14.16, | |
| "step": 75, | |
| "train_speed(iter/s)": 0.328132 | |
| }, | |
| { | |
| "acc": 0.91220999, | |
| "epoch": 2.191780821917808, | |
| "grad_norm": 10.608201026916504, | |
| "learning_rate": 9.73824884987249e-06, | |
| "loss": 0.27589982, | |
| "memory(GiB)": 14.16, | |
| "step": 80, | |
| "train_speed(iter/s)": 0.329693 | |
| }, | |
| { | |
| "acc": 0.92394562, | |
| "epoch": 2.328767123287671, | |
| "grad_norm": 13.439018249511719, | |
| "learning_rate": 9.872975930033608e-06, | |
| "loss": 0.26322646, | |
| "memory(GiB)": 14.16, | |
| "step": 85, | |
| "train_speed(iter/s)": 0.331593 | |
| }, | |
| { | |
| "acc": 0.91119957, | |
| "epoch": 2.4657534246575343, | |
| "grad_norm": 12.91903018951416, | |
| "learning_rate": 1e-05, | |
| "loss": 0.26378374, | |
| "memory(GiB)": 14.16, | |
| "step": 90, | |
| "train_speed(iter/s)": 0.333498 | |
| }, | |
| { | |
| "acc": 0.9276722, | |
| "epoch": 2.602739726027397, | |
| "grad_norm": 10.661258697509766, | |
| "learning_rate": 9.999789068686803e-06, | |
| "loss": 0.23127136, | |
| "memory(GiB)": 14.16, | |
| "step": 95, | |
| "train_speed(iter/s)": 0.335181 | |
| }, | |
| { | |
| "acc": 0.94177589, | |
| "epoch": 2.73972602739726, | |
| "grad_norm": 8.819624900817871, | |
| "learning_rate": 9.999156292545797e-06, | |
| "loss": 0.21489761, | |
| "memory(GiB)": 14.16, | |
| "step": 100, | |
| "train_speed(iter/s)": 0.336761 | |
| }, | |
| { | |
| "acc": 0.93883839, | |
| "epoch": 2.8767123287671232, | |
| "grad_norm": 9.24251937866211, | |
| "learning_rate": 9.998101724971245e-06, | |
| "loss": 0.20122993, | |
| "memory(GiB)": 14.16, | |
| "step": 105, | |
| "train_speed(iter/s)": 0.337843 | |
| }, | |
| { | |
| "acc": 0.93721886, | |
| "epoch": 3.0136986301369864, | |
| "grad_norm": 6.485929012298584, | |
| "learning_rate": 9.996625454948572e-06, | |
| "loss": 0.19496574, | |
| "memory(GiB)": 14.16, | |
| "step": 110, | |
| "train_speed(iter/s)": 0.338939 | |
| }, | |
| { | |
| "acc": 0.94114161, | |
| "epoch": 3.1506849315068495, | |
| "grad_norm": 9.10759449005127, | |
| "learning_rate": 9.99472760704687e-06, | |
| "loss": 0.20500426, | |
| "memory(GiB)": 14.16, | |
| "step": 115, | |
| "train_speed(iter/s)": 0.340322 | |
| }, | |
| { | |
| "acc": 0.96779289, | |
| "epoch": 3.287671232876712, | |
| "grad_norm": 9.064125061035156, | |
| "learning_rate": 9.992408341408366e-06, | |
| "loss": 0.11549917, | |
| "memory(GiB)": 14.16, | |
| "step": 120, | |
| "train_speed(iter/s)": 0.341348 | |
| }, | |
| { | |
| "acc": 0.93218994, | |
| "epoch": 3.4246575342465753, | |
| "grad_norm": 10.008238792419434, | |
| "learning_rate": 9.989667853734933e-06, | |
| "loss": 0.21996279, | |
| "memory(GiB)": 14.16, | |
| "step": 125, | |
| "train_speed(iter/s)": 0.342439 | |
| }, | |
| { | |
| "acc": 0.93686333, | |
| "epoch": 3.5616438356164384, | |
| "grad_norm": 11.974565505981445, | |
| "learning_rate": 9.98650637527156e-06, | |
| "loss": 0.19973722, | |
| "memory(GiB)": 14.16, | |
| "step": 130, | |
| "train_speed(iter/s)": 0.343123 | |
| }, | |
| { | |
| "acc": 0.95756645, | |
| "epoch": 3.6986301369863015, | |
| "grad_norm": 9.8711576461792, | |
| "learning_rate": 9.982924172786847e-06, | |
| "loss": 0.15214539, | |
| "memory(GiB)": 14.16, | |
| "step": 135, | |
| "train_speed(iter/s)": 0.34449 | |
| }, | |
| { | |
| "acc": 0.95660496, | |
| "epoch": 3.8356164383561646, | |
| "grad_norm": 6.757988452911377, | |
| "learning_rate": 9.97892154855049e-06, | |
| "loss": 0.15905871, | |
| "memory(GiB)": 14.16, | |
| "step": 140, | |
| "train_speed(iter/s)": 0.345239 | |
| }, | |
| { | |
| "acc": 0.95482464, | |
| "epoch": 3.9726027397260273, | |
| "grad_norm": 8.047441482543945, | |
| "learning_rate": 9.974498840307775e-06, | |
| "loss": 0.16302727, | |
| "memory(GiB)": 14.16, | |
| "step": 145, | |
| "train_speed(iter/s)": 0.345602 | |
| }, | |
| { | |
| "acc": 0.94146061, | |
| "epoch": 4.109589041095891, | |
| "grad_norm": 7.961703777313232, | |
| "learning_rate": 9.96965642125109e-06, | |
| "loss": 0.19785479, | |
| "memory(GiB)": 14.16, | |
| "step": 150, | |
| "train_speed(iter/s)": 0.346007 | |
| }, | |
| { | |
| "acc": 0.96698723, | |
| "epoch": 4.2465753424657535, | |
| "grad_norm": 6.472661972045898, | |
| "learning_rate": 9.964394699988415e-06, | |
| "loss": 0.11739849, | |
| "memory(GiB)": 14.16, | |
| "step": 155, | |
| "train_speed(iter/s)": 0.346863 | |
| }, | |
| { | |
| "acc": 0.9542901, | |
| "epoch": 4.383561643835616, | |
| "grad_norm": 8.756787300109863, | |
| "learning_rate": 9.958714120508861e-06, | |
| "loss": 0.13702551, | |
| "memory(GiB)": 14.16, | |
| "step": 160, | |
| "train_speed(iter/s)": 0.348349 | |
| }, | |
| { | |
| "acc": 0.95916128, | |
| "epoch": 4.52054794520548, | |
| "grad_norm": 9.755017280578613, | |
| "learning_rate": 9.952615162145197e-06, | |
| "loss": 0.13223737, | |
| "memory(GiB)": 14.16, | |
| "step": 165, | |
| "train_speed(iter/s)": 0.349345 | |
| }, | |
| { | |
| "acc": 0.96206884, | |
| "epoch": 4.657534246575342, | |
| "grad_norm": 8.553181648254395, | |
| "learning_rate": 9.946098339533407e-06, | |
| "loss": 0.11991118, | |
| "memory(GiB)": 14.16, | |
| "step": 170, | |
| "train_speed(iter/s)": 0.349712 | |
| }, | |
| { | |
| "acc": 0.96347275, | |
| "epoch": 4.794520547945205, | |
| "grad_norm": 7.194893836975098, | |
| "learning_rate": 9.93916420256926e-06, | |
| "loss": 0.10365121, | |
| "memory(GiB)": 14.16, | |
| "step": 175, | |
| "train_speed(iter/s)": 0.350314 | |
| }, | |
| { | |
| "acc": 0.97044868, | |
| "epoch": 4.931506849315069, | |
| "grad_norm": 6.540927410125732, | |
| "learning_rate": 9.93181333636191e-06, | |
| "loss": 0.10110762, | |
| "memory(GiB)": 14.16, | |
| "step": 180, | |
| "train_speed(iter/s)": 0.350746 | |
| }, | |
| { | |
| "acc": 0.97869854, | |
| "epoch": 5.068493150684931, | |
| "grad_norm": 6.64502477645874, | |
| "learning_rate": 9.924046361184535e-06, | |
| "loss": 0.06834425, | |
| "memory(GiB)": 14.16, | |
| "step": 185, | |
| "train_speed(iter/s)": 0.351047 | |
| }, | |
| { | |
| "acc": 0.97149448, | |
| "epoch": 5.205479452054795, | |
| "grad_norm": 7.438776016235352, | |
| "learning_rate": 9.91586393242198e-06, | |
| "loss": 0.09642395, | |
| "memory(GiB)": 14.16, | |
| "step": 190, | |
| "train_speed(iter/s)": 0.351567 | |
| }, | |
| { | |
| "acc": 0.96277952, | |
| "epoch": 5.342465753424658, | |
| "grad_norm": 9.334355354309082, | |
| "learning_rate": 9.907266740515464e-06, | |
| "loss": 0.10700824, | |
| "memory(GiB)": 14.16, | |
| "step": 195, | |
| "train_speed(iter/s)": 0.352169 | |
| }, | |
| { | |
| "acc": 0.97186604, | |
| "epoch": 5.47945205479452, | |
| "grad_norm": 5.772711753845215, | |
| "learning_rate": 9.898255510904326e-06, | |
| "loss": 0.07952163, | |
| "memory(GiB)": 14.16, | |
| "step": 200, | |
| "train_speed(iter/s)": 0.352683 | |
| }, | |
| { | |
| "acc": 0.98101072, | |
| "epoch": 5.616438356164384, | |
| "grad_norm": 9.092942237854004, | |
| "learning_rate": 9.888831003964803e-06, | |
| "loss": 0.06738672, | |
| "memory(GiB)": 14.16, | |
| "step": 205, | |
| "train_speed(iter/s)": 0.353043 | |
| }, | |
| { | |
| "acc": 0.97831497, | |
| "epoch": 5.7534246575342465, | |
| "grad_norm": 8.003717422485352, | |
| "learning_rate": 9.878994014945866e-06, | |
| "loss": 0.06806564, | |
| "memory(GiB)": 14.16, | |
| "step": 210, | |
| "train_speed(iter/s)": 0.354182 | |
| }, | |
| { | |
| "acc": 0.97665091, | |
| "epoch": 5.890410958904109, | |
| "grad_norm": 6.545485496520996, | |
| "learning_rate": 9.868745373902128e-06, | |
| "loss": 0.07062781, | |
| "memory(GiB)": 14.16, | |
| "step": 215, | |
| "train_speed(iter/s)": 0.354891 | |
| }, | |
| { | |
| "acc": 0.97873678, | |
| "epoch": 6.027397260273973, | |
| "grad_norm": 4.454226493835449, | |
| "learning_rate": 9.85808594562379e-06, | |
| "loss": 0.07400095, | |
| "memory(GiB)": 14.16, | |
| "step": 220, | |
| "train_speed(iter/s)": 0.355094 | |
| }, | |
| { | |
| "acc": 0.97500896, | |
| "epoch": 6.164383561643835, | |
| "grad_norm": 9.327370643615723, | |
| "learning_rate": 9.847016629563683e-06, | |
| "loss": 0.07909623, | |
| "memory(GiB)": 14.16, | |
| "step": 225, | |
| "train_speed(iter/s)": 0.355416 | |
| }, | |
| { | |
| "acc": 0.97549095, | |
| "epoch": 6.301369863013699, | |
| "grad_norm": 7.767273426055908, | |
| "learning_rate": 9.835538359761359e-06, | |
| "loss": 0.08394684, | |
| "memory(GiB)": 14.16, | |
| "step": 230, | |
| "train_speed(iter/s)": 0.35587 | |
| }, | |
| { | |
| "acc": 0.98198967, | |
| "epoch": 6.438356164383562, | |
| "grad_norm": 8.520513534545898, | |
| "learning_rate": 9.823652104764282e-06, | |
| "loss": 0.06493338, | |
| "memory(GiB)": 14.16, | |
| "step": 235, | |
| "train_speed(iter/s)": 0.356338 | |
| }, | |
| { | |
| "acc": 0.98189783, | |
| "epoch": 6.575342465753424, | |
| "grad_norm": 6.741430282592773, | |
| "learning_rate": 9.811358867546099e-06, | |
| "loss": 0.06953114, | |
| "memory(GiB)": 14.16, | |
| "step": 240, | |
| "train_speed(iter/s)": 0.356559 | |
| }, | |
| { | |
| "acc": 0.9792799, | |
| "epoch": 6.712328767123288, | |
| "grad_norm": 6.579135894775391, | |
| "learning_rate": 9.798659685422008e-06, | |
| "loss": 0.07183629, | |
| "memory(GiB)": 14.16, | |
| "step": 245, | |
| "train_speed(iter/s)": 0.357198 | |
| }, | |
| { | |
| "acc": 0.97903948, | |
| "epoch": 6.8493150684931505, | |
| "grad_norm": 7.918185234069824, | |
| "learning_rate": 9.785555629961232e-06, | |
| "loss": 0.06570032, | |
| "memory(GiB)": 14.16, | |
| "step": 250, | |
| "train_speed(iter/s)": 0.35739 | |
| }, | |
| { | |
| "acc": 0.98690357, | |
| "epoch": 6.986301369863014, | |
| "grad_norm": 4.936428546905518, | |
| "learning_rate": 9.772047806896599e-06, | |
| "loss": 0.04573858, | |
| "memory(GiB)": 14.16, | |
| "step": 255, | |
| "train_speed(iter/s)": 0.358183 | |
| }, | |
| { | |
| "acc": 0.98013973, | |
| "epoch": 7.123287671232877, | |
| "grad_norm": 6.603614330291748, | |
| "learning_rate": 9.758137356031226e-06, | |
| "loss": 0.06317404, | |
| "memory(GiB)": 14.16, | |
| "step": 260, | |
| "train_speed(iter/s)": 0.358435 | |
| }, | |
| { | |
| "acc": 0.98552742, | |
| "epoch": 7.260273972602739, | |
| "grad_norm": 5.6785173416137695, | |
| "learning_rate": 9.74382545114236e-06, | |
| "loss": 0.05590855, | |
| "memory(GiB)": 14.16, | |
| "step": 265, | |
| "train_speed(iter/s)": 0.359116 | |
| }, | |
| { | |
| "acc": 0.98451328, | |
| "epoch": 7.397260273972603, | |
| "grad_norm": 6.470608711242676, | |
| "learning_rate": 9.729113299882324e-06, | |
| "loss": 0.05722108, | |
| "memory(GiB)": 14.16, | |
| "step": 270, | |
| "train_speed(iter/s)": 0.359102 | |
| }, | |
| { | |
| "acc": 0.98782816, | |
| "epoch": 7.534246575342466, | |
| "grad_norm": 4.879244804382324, | |
| "learning_rate": 9.714002143676614e-06, | |
| "loss": 0.0392652, | |
| "memory(GiB)": 14.16, | |
| "step": 275, | |
| "train_speed(iter/s)": 0.359249 | |
| }, | |
| { | |
| "acc": 0.98015614, | |
| "epoch": 7.671232876712329, | |
| "grad_norm": 5.897606372833252, | |
| "learning_rate": 9.69849325761915e-06, | |
| "loss": 0.0653078, | |
| "memory(GiB)": 14.16, | |
| "step": 280, | |
| "train_speed(iter/s)": 0.359463 | |
| }, | |
| { | |
| "acc": 0.98269339, | |
| "epoch": 7.808219178082192, | |
| "grad_norm": 8.748714447021484, | |
| "learning_rate": 9.682587950364676e-06, | |
| "loss": 0.04879735, | |
| "memory(GiB)": 14.16, | |
| "step": 285, | |
| "train_speed(iter/s)": 0.359431 | |
| }, | |
| { | |
| "acc": 0.99092007, | |
| "epoch": 7.945205479452055, | |
| "grad_norm": 4.962334156036377, | |
| "learning_rate": 9.666287564018344e-06, | |
| "loss": 0.03704912, | |
| "memory(GiB)": 14.16, | |
| "step": 290, | |
| "train_speed(iter/s)": 0.359385 | |
| }, | |
| { | |
| "acc": 0.98640242, | |
| "epoch": 8.082191780821917, | |
| "grad_norm": 7.194764137268066, | |
| "learning_rate": 9.649593474022452e-06, | |
| "loss": 0.05298281, | |
| "memory(GiB)": 14.16, | |
| "step": 295, | |
| "train_speed(iter/s)": 0.359641 | |
| }, | |
| { | |
| "acc": 0.98602715, | |
| "epoch": 8.219178082191782, | |
| "grad_norm": 7.44851541519165, | |
| "learning_rate": 9.632507089040402e-06, | |
| "loss": 0.04129619, | |
| "memory(GiB)": 14.16, | |
| "step": 300, | |
| "train_speed(iter/s)": 0.359864 | |
| }, | |
| { | |
| "acc": 0.98549156, | |
| "epoch": 8.356164383561644, | |
| "grad_norm": 8.171492576599121, | |
| "learning_rate": 9.615029850837819e-06, | |
| "loss": 0.04942346, | |
| "memory(GiB)": 14.16, | |
| "step": 305, | |
| "train_speed(iter/s)": 0.359882 | |
| }, | |
| { | |
| "acc": 0.98449697, | |
| "epoch": 8.493150684931507, | |
| "grad_norm": 6.328600883483887, | |
| "learning_rate": 9.597163234160894e-06, | |
| "loss": 0.05851363, | |
| "memory(GiB)": 14.16, | |
| "step": 310, | |
| "train_speed(iter/s)": 0.359848 | |
| }, | |
| { | |
| "acc": 0.99007683, | |
| "epoch": 8.63013698630137, | |
| "grad_norm": 5.6946258544921875, | |
| "learning_rate": 9.57890874661196e-06, | |
| "loss": 0.03352974, | |
| "memory(GiB)": 14.16, | |
| "step": 315, | |
| "train_speed(iter/s)": 0.360126 | |
| }, | |
| { | |
| "acc": 0.98694916, | |
| "epoch": 8.767123287671232, | |
| "grad_norm": 4.585356712341309, | |
| "learning_rate": 9.56026792852226e-06, | |
| "loss": 0.04656056, | |
| "memory(GiB)": 14.16, | |
| "step": 320, | |
| "train_speed(iter/s)": 0.360741 | |
| }, | |
| { | |
| "acc": 0.98873882, | |
| "epoch": 8.904109589041095, | |
| "grad_norm": 7.50302791595459, | |
| "learning_rate": 9.541242352821985e-06, | |
| "loss": 0.03722157, | |
| "memory(GiB)": 14.16, | |
| "step": 325, | |
| "train_speed(iter/s)": 0.360963 | |
| }, | |
| { | |
| "acc": 0.98872223, | |
| "epoch": 9.04109589041096, | |
| "grad_norm": 8.641664505004883, | |
| "learning_rate": 9.52183362490754e-06, | |
| "loss": 0.04286454, | |
| "memory(GiB)": 14.16, | |
| "step": 330, | |
| "train_speed(iter/s)": 0.361166 | |
| }, | |
| { | |
| "acc": 0.99097099, | |
| "epoch": 9.178082191780822, | |
| "grad_norm": 5.386726379394531, | |
| "learning_rate": 9.502043382506082e-06, | |
| "loss": 0.02755214, | |
| "memory(GiB)": 14.16, | |
| "step": 335, | |
| "train_speed(iter/s)": 0.361519 | |
| }, | |
| { | |
| "acc": 0.99000244, | |
| "epoch": 9.315068493150685, | |
| "grad_norm": 4.545804977416992, | |
| "learning_rate": 9.481873295537333e-06, | |
| "loss": 0.04025009, | |
| "memory(GiB)": 14.16, | |
| "step": 340, | |
| "train_speed(iter/s)": 0.361469 | |
| }, | |
| { | |
| "acc": 0.99092007, | |
| "epoch": 9.452054794520548, | |
| "grad_norm": 8.062037467956543, | |
| "learning_rate": 9.461325065972662e-06, | |
| "loss": 0.04117663, | |
| "memory(GiB)": 14.16, | |
| "step": 345, | |
| "train_speed(iter/s)": 0.361763 | |
| }, | |
| { | |
| "acc": 0.99032946, | |
| "epoch": 9.58904109589041, | |
| "grad_norm": 5.639761924743652, | |
| "learning_rate": 9.440400427691476e-06, | |
| "loss": 0.02993804, | |
| "memory(GiB)": 14.16, | |
| "step": 350, | |
| "train_speed(iter/s)": 0.361739 | |
| }, | |
| { | |
| "acc": 0.98722763, | |
| "epoch": 9.726027397260275, | |
| "grad_norm": 5.573471546173096, | |
| "learning_rate": 9.419101146334908e-06, | |
| "loss": 0.04273846, | |
| "memory(GiB)": 14.16, | |
| "step": 355, | |
| "train_speed(iter/s)": 0.361815 | |
| }, | |
| { | |
| "acc": 0.98906002, | |
| "epoch": 9.863013698630137, | |
| "grad_norm": 5.205529689788818, | |
| "learning_rate": 9.397429019156841e-06, | |
| "loss": 0.04300301, | |
| "memory(GiB)": 14.16, | |
| "step": 360, | |
| "train_speed(iter/s)": 0.361905 | |
| }, | |
| { | |
| "acc": 0.9917551, | |
| "epoch": 10.0, | |
| "grad_norm": 5.506292343139648, | |
| "learning_rate": 9.375385874872248e-06, | |
| "loss": 0.03177897, | |
| "memory(GiB)": 14.16, | |
| "step": 365, | |
| "train_speed(iter/s)": 0.361986 | |
| }, | |
| { | |
| "acc": 0.99265499, | |
| "epoch": 10.136986301369863, | |
| "grad_norm": 5.0279035568237305, | |
| "learning_rate": 9.352973573502874e-06, | |
| "loss": 0.03047763, | |
| "memory(GiB)": 14.16, | |
| "step": 370, | |
| "train_speed(iter/s)": 0.3619 | |
| }, | |
| { | |
| "acc": 0.99043932, | |
| "epoch": 10.273972602739725, | |
| "grad_norm": 7.282947540283203, | |
| "learning_rate": 9.330194006220301e-06, | |
| "loss": 0.03883767, | |
| "memory(GiB)": 14.16, | |
| "step": 375, | |
| "train_speed(iter/s)": 0.3619 | |
| }, | |
| { | |
| "acc": 0.99266891, | |
| "epoch": 10.41095890410959, | |
| "grad_norm": 6.475697040557861, | |
| "learning_rate": 9.307049095186364e-06, | |
| "loss": 0.03223814, | |
| "memory(GiB)": 14.16, | |
| "step": 380, | |
| "train_speed(iter/s)": 0.361879 | |
| }, | |
| { | |
| "acc": 0.98734608, | |
| "epoch": 10.547945205479452, | |
| "grad_norm": 2.9214179515838623, | |
| "learning_rate": 9.28354079339095e-06, | |
| "loss": 0.04384069, | |
| "memory(GiB)": 14.16, | |
| "step": 385, | |
| "train_speed(iter/s)": 0.361963 | |
| }, | |
| { | |
| "acc": 0.99313297, | |
| "epoch": 10.684931506849315, | |
| "grad_norm": 4.704584121704102, | |
| "learning_rate": 9.259671084487218e-06, | |
| "loss": 0.02514983, | |
| "memory(GiB)": 14.16, | |
| "step": 390, | |
| "train_speed(iter/s)": 0.361864 | |
| }, | |
| { | |
| "acc": 0.990868, | |
| "epoch": 10.821917808219178, | |
| "grad_norm": 4.704314231872559, | |
| "learning_rate": 9.235441982624191e-06, | |
| "loss": 0.02952582, | |
| "memory(GiB)": 14.16, | |
| "step": 395, | |
| "train_speed(iter/s)": 0.36222 | |
| }, | |
| { | |
| "acc": 0.99545174, | |
| "epoch": 10.95890410958904, | |
| "grad_norm": 4.499762058258057, | |
| "learning_rate": 9.210855532276836e-06, | |
| "loss": 0.01564558, | |
| "memory(GiB)": 14.16, | |
| "step": 400, | |
| "train_speed(iter/s)": 0.362296 | |
| }, | |
| { | |
| "acc": 0.9944725, | |
| "epoch": 11.095890410958905, | |
| "grad_norm": 7.498542785644531, | |
| "learning_rate": 9.185913808073513e-06, | |
| "loss": 0.02198397, | |
| "memory(GiB)": 14.16, | |
| "step": 405, | |
| "train_speed(iter/s)": 0.362254 | |
| }, | |
| { | |
| "acc": 0.98989115, | |
| "epoch": 11.232876712328768, | |
| "grad_norm": 3.8143303394317627, | |
| "learning_rate": 9.16061891462094e-06, | |
| "loss": 0.0327835, | |
| "memory(GiB)": 14.16, | |
| "step": 410, | |
| "train_speed(iter/s)": 0.362508 | |
| }, | |
| { | |
| "acc": 0.99730492, | |
| "epoch": 11.36986301369863, | |
| "grad_norm": 3.9523301124572754, | |
| "learning_rate": 9.134972986326595e-06, | |
| "loss": 0.01258684, | |
| "memory(GiB)": 14.16, | |
| "step": 415, | |
| "train_speed(iter/s)": 0.362542 | |
| }, | |
| { | |
| "acc": 0.99241066, | |
| "epoch": 11.506849315068493, | |
| "grad_norm": 6.334254741668701, | |
| "learning_rate": 9.108978187218613e-06, | |
| "loss": 0.03454852, | |
| "memory(GiB)": 14.16, | |
| "step": 420, | |
| "train_speed(iter/s)": 0.362651 | |
| }, | |
| { | |
| "acc": 0.99217281, | |
| "epoch": 11.643835616438356, | |
| "grad_norm": 6.370650291442871, | |
| "learning_rate": 9.08263671076319e-06, | |
| "loss": 0.03252776, | |
| "memory(GiB)": 14.16, | |
| "step": 425, | |
| "train_speed(iter/s)": 0.362697 | |
| }, | |
| { | |
| "acc": 0.98822365, | |
| "epoch": 11.780821917808218, | |
| "grad_norm": 3.232943534851074, | |
| "learning_rate": 9.05595077967948e-06, | |
| "loss": 0.04269191, | |
| "memory(GiB)": 14.16, | |
| "step": 430, | |
| "train_speed(iter/s)": 0.362683 | |
| }, | |
| { | |
| "acc": 0.99225941, | |
| "epoch": 11.917808219178083, | |
| "grad_norm": 4.822254180908203, | |
| "learning_rate": 9.028922645752062e-06, | |
| "loss": 0.02760777, | |
| "memory(GiB)": 14.16, | |
| "step": 435, | |
| "train_speed(iter/s)": 0.362655 | |
| }, | |
| { | |
| "acc": 0.9954505, | |
| "epoch": 12.054794520547945, | |
| "grad_norm": 3.2365639209747314, | |
| "learning_rate": 9.00155458964091e-06, | |
| "loss": 0.01916433, | |
| "memory(GiB)": 14.16, | |
| "step": 440, | |
| "train_speed(iter/s)": 0.3626 | |
| }, | |
| { | |
| "acc": 0.99313316, | |
| "epoch": 12.191780821917808, | |
| "grad_norm": 3.7720203399658203, | |
| "learning_rate": 8.973848920688967e-06, | |
| "loss": 0.03937365, | |
| "memory(GiB)": 14.16, | |
| "step": 445, | |
| "train_speed(iter/s)": 0.362571 | |
| }, | |
| { | |
| "acc": 0.99251375, | |
| "epoch": 12.32876712328767, | |
| "grad_norm": 4.069283485412598, | |
| "learning_rate": 8.94580797672727e-06, | |
| "loss": 0.02898619, | |
| "memory(GiB)": 14.16, | |
| "step": 450, | |
| "train_speed(iter/s)": 0.362736 | |
| }, | |
| { | |
| "acc": 0.99321842, | |
| "epoch": 12.465753424657533, | |
| "grad_norm": 0.9725887775421143, | |
| "learning_rate": 8.917434123877686e-06, | |
| "loss": 0.02265764, | |
| "memory(GiB)": 14.16, | |
| "step": 455, | |
| "train_speed(iter/s)": 0.362774 | |
| }, | |
| { | |
| "acc": 0.99323349, | |
| "epoch": 12.602739726027398, | |
| "grad_norm": 4.508816719055176, | |
| "learning_rate": 8.888729756353248e-06, | |
| "loss": 0.02885826, | |
| "memory(GiB)": 14.16, | |
| "step": 460, | |
| "train_speed(iter/s)": 0.362813 | |
| }, | |
| { | |
| "acc": 0.99727192, | |
| "epoch": 12.73972602739726, | |
| "grad_norm": 2.479684352874756, | |
| "learning_rate": 8.859697296256147e-06, | |
| "loss": 0.01712638, | |
| "memory(GiB)": 14.16, | |
| "step": 465, | |
| "train_speed(iter/s)": 0.362768 | |
| }, | |
| { | |
| "acc": 0.99502192, | |
| "epoch": 12.876712328767123, | |
| "grad_norm": 1.5512564182281494, | |
| "learning_rate": 8.83033919337333e-06, | |
| "loss": 0.022619, | |
| "memory(GiB)": 14.16, | |
| "step": 470, | |
| "train_speed(iter/s)": 0.362919 | |
| }, | |
| { | |
| "acc": 0.99404383, | |
| "epoch": 13.013698630136986, | |
| "grad_norm": 5.0392680168151855, | |
| "learning_rate": 8.800657924969805e-06, | |
| "loss": 0.0215001, | |
| "memory(GiB)": 14.16, | |
| "step": 475, | |
| "train_speed(iter/s)": 0.362773 | |
| }, | |
| { | |
| "acc": 0.99045715, | |
| "epoch": 13.150684931506849, | |
| "grad_norm": 3.143148183822632, | |
| "learning_rate": 8.770655995579593e-06, | |
| "loss": 0.02810604, | |
| "memory(GiB)": 14.16, | |
| "step": 480, | |
| "train_speed(iter/s)": 0.362874 | |
| }, | |
| { | |
| "acc": 0.99417992, | |
| "epoch": 13.287671232876713, | |
| "grad_norm": 2.0431466102600098, | |
| "learning_rate": 8.740335936794398e-06, | |
| "loss": 0.02953114, | |
| "memory(GiB)": 14.16, | |
| "step": 485, | |
| "train_speed(iter/s)": 0.362814 | |
| }, | |
| { | |
| "acc": 0.99732151, | |
| "epoch": 13.424657534246576, | |
| "grad_norm": 2.4842429161071777, | |
| "learning_rate": 8.709700307049991e-06, | |
| "loss": 0.01085737, | |
| "memory(GiB)": 14.16, | |
| "step": 490, | |
| "train_speed(iter/s)": 0.362739 | |
| }, | |
| { | |
| "acc": 0.99217415, | |
| "epoch": 13.561643835616438, | |
| "grad_norm": 4.454080581665039, | |
| "learning_rate": 8.678751691410323e-06, | |
| "loss": 0.02852642, | |
| "memory(GiB)": 14.16, | |
| "step": 495, | |
| "train_speed(iter/s)": 0.363042 | |
| }, | |
| { | |
| "acc": 0.99452591, | |
| "epoch": 13.698630136986301, | |
| "grad_norm": 6.032941818237305, | |
| "learning_rate": 8.647492701349395e-06, | |
| "loss": 0.02294705, | |
| "memory(GiB)": 14.16, | |
| "step": 500, | |
| "train_speed(iter/s)": 0.363179 | |
| }, | |
| { | |
| "epoch": 13.698630136986301, | |
| "eval_acc": 0.3818755593383692, | |
| "eval_loss": 4.793926239013672, | |
| "eval_runtime": 2033.163, | |
| "eval_samples_per_second": 15.751, | |
| "eval_steps_per_second": 1.969, | |
| "step": 500 | |
| }, | |
| { | |
| "acc": 0.99273891, | |
| "epoch": 13.835616438356164, | |
| "grad_norm": 7.570253849029541, | |
| "learning_rate": 8.615925974530906e-06, | |
| "loss": 0.03025962, | |
| "memory(GiB)": 14.16, | |
| "step": 505, | |
| "train_speed(iter/s)": 0.146499 | |
| }, | |
| { | |
| "acc": 0.99452457, | |
| "epoch": 13.972602739726028, | |
| "grad_norm": 0.6901392936706543, | |
| "learning_rate": 8.584054174585673e-06, | |
| "loss": 0.01943414, | |
| "memory(GiB)": 14.16, | |
| "step": 510, | |
| "train_speed(iter/s)": 0.147597 | |
| }, | |
| { | |
| "acc": 0.99586115, | |
| "epoch": 14.10958904109589, | |
| "grad_norm": 2.8410799503326416, | |
| "learning_rate": 8.551879990886881e-06, | |
| "loss": 0.02195611, | |
| "memory(GiB)": 14.16, | |
| "step": 515, | |
| "train_speed(iter/s)": 0.148679 | |
| }, | |
| { | |
| "acc": 0.99596558, | |
| "epoch": 14.246575342465754, | |
| "grad_norm": 1.6700148582458496, | |
| "learning_rate": 8.519406138323145e-06, | |
| "loss": 0.01128972, | |
| "memory(GiB)": 14.16, | |
| "step": 520, | |
| "train_speed(iter/s)": 0.149765 | |
| }, | |
| { | |
| "acc": 0.99503975, | |
| "epoch": 14.383561643835616, | |
| "grad_norm": 1.0917117595672607, | |
| "learning_rate": 8.486635357069431e-06, | |
| "loss": 0.01859367, | |
| "memory(GiB)": 14.16, | |
| "step": 525, | |
| "train_speed(iter/s)": 0.15087 | |
| }, | |
| { | |
| "acc": 0.99261799, | |
| "epoch": 14.520547945205479, | |
| "grad_norm": 7.631021022796631, | |
| "learning_rate": 8.45357041235583e-06, | |
| "loss": 0.02078509, | |
| "memory(GiB)": 14.16, | |
| "step": 530, | |
| "train_speed(iter/s)": 0.151949 | |
| }, | |
| { | |
| "acc": 0.99308357, | |
| "epoch": 14.657534246575342, | |
| "grad_norm": 3.847642421722412, | |
| "learning_rate": 8.42021409423423e-06, | |
| "loss": 0.02047177, | |
| "memory(GiB)": 14.16, | |
| "step": 535, | |
| "train_speed(iter/s)": 0.153023 | |
| }, | |
| { | |
| "acc": 0.99270458, | |
| "epoch": 14.794520547945206, | |
| "grad_norm": 6.042537689208984, | |
| "learning_rate": 8.386569217342893e-06, | |
| "loss": 0.0270274, | |
| "memory(GiB)": 14.16, | |
| "step": 540, | |
| "train_speed(iter/s)": 0.154086 | |
| }, | |
| { | |
| "acc": 0.99546833, | |
| "epoch": 14.931506849315069, | |
| "grad_norm": 4.633887767791748, | |
| "learning_rate": 8.352638620668941e-06, | |
| "loss": 0.01502355, | |
| "memory(GiB)": 14.16, | |
| "step": 545, | |
| "train_speed(iter/s)": 0.155151 | |
| }, | |
| { | |
| "acc": 0.99634466, | |
| "epoch": 15.068493150684931, | |
| "grad_norm": 1.901209831237793, | |
| "learning_rate": 8.318425167308806e-06, | |
| "loss": 0.01356835, | |
| "memory(GiB)": 14.16, | |
| "step": 550, | |
| "train_speed(iter/s)": 0.156214 | |
| }, | |
| { | |
| "acc": 0.99639549, | |
| "epoch": 15.205479452054794, | |
| "grad_norm": 4.843277931213379, | |
| "learning_rate": 8.28393174422665e-06, | |
| "loss": 0.01601259, | |
| "memory(GiB)": 14.16, | |
| "step": 555, | |
| "train_speed(iter/s)": 0.157262 | |
| }, | |
| { | |
| "acc": 0.99320316, | |
| "epoch": 15.342465753424657, | |
| "grad_norm": 5.583487033843994, | |
| "learning_rate": 8.249161262010735e-06, | |
| "loss": 0.01526148, | |
| "memory(GiB)": 14.16, | |
| "step": 560, | |
| "train_speed(iter/s)": 0.158308 | |
| }, | |
| { | |
| "acc": 0.99721832, | |
| "epoch": 15.479452054794521, | |
| "grad_norm": 5.734185218811035, | |
| "learning_rate": 8.214116654627853e-06, | |
| "loss": 0.01092491, | |
| "memory(GiB)": 14.16, | |
| "step": 565, | |
| "train_speed(iter/s)": 0.159373 | |
| }, | |
| { | |
| "acc": 0.99818001, | |
| "epoch": 15.616438356164384, | |
| "grad_norm": 2.6671762466430664, | |
| "learning_rate": 8.178800879175737e-06, | |
| "loss": 0.00814181, | |
| "memory(GiB)": 14.16, | |
| "step": 570, | |
| "train_speed(iter/s)": 0.160399 | |
| }, | |
| { | |
| "acc": 0.99492016, | |
| "epoch": 15.753424657534246, | |
| "grad_norm": 4.252832889556885, | |
| "learning_rate": 8.143216915633535e-06, | |
| "loss": 0.01607218, | |
| "memory(GiB)": 14.16, | |
| "step": 575, | |
| "train_speed(iter/s)": 0.161443 | |
| }, | |
| { | |
| "acc": 0.9963459, | |
| "epoch": 15.89041095890411, | |
| "grad_norm": 2.7702836990356445, | |
| "learning_rate": 8.107367766610379e-06, | |
| "loss": 0.01704216, | |
| "memory(GiB)": 14.16, | |
| "step": 580, | |
| "train_speed(iter/s)": 0.162459 | |
| }, | |
| { | |
| "acc": 0.99641209, | |
| "epoch": 16.027397260273972, | |
| "grad_norm": 3.121049404144287, | |
| "learning_rate": 8.071256457091995e-06, | |
| "loss": 0.01695579, | |
| "memory(GiB)": 14.16, | |
| "step": 585, | |
| "train_speed(iter/s)": 0.163471 | |
| }, | |
| { | |
| "acc": 0.99682541, | |
| "epoch": 16.164383561643834, | |
| "grad_norm": 3.980106830596924, | |
| "learning_rate": 8.03488603418547e-06, | |
| "loss": 0.01948266, | |
| "memory(GiB)": 14.16, | |
| "step": 590, | |
| "train_speed(iter/s)": 0.164479 | |
| }, | |
| { | |
| "acc": 0.99080048, | |
| "epoch": 16.301369863013697, | |
| "grad_norm": 4.650881290435791, | |
| "learning_rate": 7.99825956686213e-06, | |
| "loss": 0.02414289, | |
| "memory(GiB)": 14.16, | |
| "step": 595, | |
| "train_speed(iter/s)": 0.16549 | |
| }, | |
| { | |
| "acc": 0.99316874, | |
| "epoch": 16.438356164383563, | |
| "grad_norm": 3.7769477367401123, | |
| "learning_rate": 7.96138014569857e-06, | |
| "loss": 0.02379684, | |
| "memory(GiB)": 14.16, | |
| "step": 600, | |
| "train_speed(iter/s)": 0.166493 | |
| }, | |
| { | |
| "acc": 0.99821434, | |
| "epoch": 16.575342465753426, | |
| "grad_norm": 2.486539363861084, | |
| "learning_rate": 7.924250882615874e-06, | |
| "loss": 0.01166953, | |
| "memory(GiB)": 14.16, | |
| "step": 605, | |
| "train_speed(iter/s)": 0.167483 | |
| }, | |
| { | |
| "acc": 0.99491873, | |
| "epoch": 16.71232876712329, | |
| "grad_norm": 0.6995792984962463, | |
| "learning_rate": 7.886874910617037e-06, | |
| "loss": 0.01726856, | |
| "memory(GiB)": 14.16, | |
| "step": 610, | |
| "train_speed(iter/s)": 0.168479 | |
| }, | |
| { | |
| "acc": 0.99727192, | |
| "epoch": 16.84931506849315, | |
| "grad_norm": 1.6550129652023315, | |
| "learning_rate": 7.849255383522576e-06, | |
| "loss": 0.0158612, | |
| "memory(GiB)": 14.16, | |
| "step": 615, | |
| "train_speed(iter/s)": 0.169527 | |
| }, | |
| { | |
| "acc": 0.99721966, | |
| "epoch": 16.986301369863014, | |
| "grad_norm": 2.894073724746704, | |
| "learning_rate": 7.811395475704436e-06, | |
| "loss": 0.01161546, | |
| "memory(GiB)": 14.16, | |
| "step": 620, | |
| "train_speed(iter/s)": 0.170515 | |
| }, | |
| { | |
| "acc": 0.99818001, | |
| "epoch": 17.123287671232877, | |
| "grad_norm": 2.339505910873413, | |
| "learning_rate": 7.773298381818106e-06, | |
| "loss": 0.00709306, | |
| "memory(GiB)": 14.16, | |
| "step": 625, | |
| "train_speed(iter/s)": 0.171471 | |
| }, | |
| { | |
| "acc": 0.997717, | |
| "epoch": 17.26027397260274, | |
| "grad_norm": 2.1085383892059326, | |
| "learning_rate": 7.734967316533076e-06, | |
| "loss": 0.00879358, | |
| "memory(GiB)": 14.16, | |
| "step": 630, | |
| "train_speed(iter/s)": 0.172477 | |
| }, | |
| { | |
| "acc": 0.99593258, | |
| "epoch": 17.397260273972602, | |
| "grad_norm": 3.778745174407959, | |
| "learning_rate": 7.696405514261554e-06, | |
| "loss": 0.01262949, | |
| "memory(GiB)": 14.16, | |
| "step": 635, | |
| "train_speed(iter/s)": 0.173456 | |
| }, | |
| { | |
| "acc": 0.99641209, | |
| "epoch": 17.534246575342465, | |
| "grad_norm": 4.980679512023926, | |
| "learning_rate": 7.657616228885571e-06, | |
| "loss": 0.00957234, | |
| "memory(GiB)": 14.16, | |
| "step": 640, | |
| "train_speed(iter/s)": 0.174442 | |
| }, | |
| { | |
| "acc": 0.99673891, | |
| "epoch": 17.671232876712327, | |
| "grad_norm": 1.6658488512039185, | |
| "learning_rate": 7.618602733482395e-06, | |
| "loss": 0.01483861, | |
| "memory(GiB)": 14.16, | |
| "step": 645, | |
| "train_speed(iter/s)": 0.1754 | |
| }, | |
| { | |
| "acc": 0.995401, | |
| "epoch": 17.80821917808219, | |
| "grad_norm": 7.899285793304443, | |
| "learning_rate": 7.579368320048354e-06, | |
| "loss": 0.02291541, | |
| "memory(GiB)": 14.16, | |
| "step": 650, | |
| "train_speed(iter/s)": 0.176359 | |
| }, | |
| { | |
| "acc": 0.99588165, | |
| "epoch": 17.945205479452056, | |
| "grad_norm": 4.884225368499756, | |
| "learning_rate": 7.539916299221047e-06, | |
| "loss": 0.0132565, | |
| "memory(GiB)": 14.16, | |
| "step": 655, | |
| "train_speed(iter/s)": 0.177313 | |
| }, | |
| { | |
| "acc": 0.99720192, | |
| "epoch": 18.08219178082192, | |
| "grad_norm": 1.3362199068069458, | |
| "learning_rate": 7.50025e-06, | |
| "loss": 0.01240759, | |
| "memory(GiB)": 14.16, | |
| "step": 660, | |
| "train_speed(iter/s)": 0.178257 | |
| }, | |
| { | |
| "acc": 0.99860992, | |
| "epoch": 18.21917808219178, | |
| "grad_norm": 0.9003859758377075, | |
| "learning_rate": 7.4603727694657576e-06, | |
| "loss": 0.00468392, | |
| "memory(GiB)": 14.16, | |
| "step": 665, | |
| "train_speed(iter/s)": 0.179216 | |
| }, | |
| { | |
| "acc": 0.99587898, | |
| "epoch": 18.356164383561644, | |
| "grad_norm": 2.7398738861083984, | |
| "learning_rate": 7.420287972497446e-06, | |
| "loss": 0.01100588, | |
| "memory(GiB)": 14.16, | |
| "step": 670, | |
| "train_speed(iter/s)": 0.180158 | |
| }, | |
| { | |
| "acc": 0.99493923, | |
| "epoch": 18.493150684931507, | |
| "grad_norm": 5.460540294647217, | |
| "learning_rate": 7.3799989914888506e-06, | |
| "loss": 0.01662439, | |
| "memory(GiB)": 14.16, | |
| "step": 675, | |
| "train_speed(iter/s)": 0.181127 | |
| }, | |
| { | |
| "acc": 0.99722099, | |
| "epoch": 18.63013698630137, | |
| "grad_norm": 0.33460837602615356, | |
| "learning_rate": 7.3395092260630015e-06, | |
| "loss": 0.00906119, | |
| "memory(GiB)": 14.16, | |
| "step": 680, | |
| "train_speed(iter/s)": 0.182049 | |
| }, | |
| { | |
| "acc": 0.99589815, | |
| "epoch": 18.767123287671232, | |
| "grad_norm": 4.61140251159668, | |
| "learning_rate": 7.298822092785316e-06, | |
| "loss": 0.0160338, | |
| "memory(GiB)": 14.16, | |
| "step": 685, | |
| "train_speed(iter/s)": 0.182979 | |
| }, | |
| { | |
| "acc": 0.99541874, | |
| "epoch": 18.904109589041095, | |
| "grad_norm": 1.5101581811904907, | |
| "learning_rate": 7.257941024875293e-06, | |
| "loss": 0.01577311, | |
| "memory(GiB)": 14.16, | |
| "step": 690, | |
| "train_speed(iter/s)": 0.183925 | |
| }, | |
| { | |
| "acc": 0.9977005, | |
| "epoch": 19.041095890410958, | |
| "grad_norm": 2.02103853225708, | |
| "learning_rate": 7.216869471916828e-06, | |
| "loss": 0.00827418, | |
| "memory(GiB)": 14.16, | |
| "step": 695, | |
| "train_speed(iter/s)": 0.184826 | |
| }, | |
| { | |
| "acc": 0.99538565, | |
| "epoch": 19.17808219178082, | |
| "grad_norm": 4.640865325927734, | |
| "learning_rate": 7.175610899567126e-06, | |
| "loss": 0.02137535, | |
| "memory(GiB)": 14.16, | |
| "step": 700, | |
| "train_speed(iter/s)": 0.185756 | |
| }, | |
| { | |
| "acc": 0.99816341, | |
| "epoch": 19.315068493150687, | |
| "grad_norm": 2.2678844928741455, | |
| "learning_rate": 7.1341687892642705e-06, | |
| "loss": 0.01489109, | |
| "memory(GiB)": 14.16, | |
| "step": 705, | |
| "train_speed(iter/s)": 0.186685 | |
| }, | |
| { | |
| "acc": 0.997717, | |
| "epoch": 19.45205479452055, | |
| "grad_norm": 8.91321086883545, | |
| "learning_rate": 7.092546637933454e-06, | |
| "loss": 0.00950522, | |
| "memory(GiB)": 14.16, | |
| "step": 710, | |
| "train_speed(iter/s)": 0.187598 | |
| }, | |
| { | |
| "acc": 0.99584599, | |
| "epoch": 19.589041095890412, | |
| "grad_norm": 3.019415855407715, | |
| "learning_rate": 7.0507479576919026e-06, | |
| "loss": 0.01135417, | |
| "memory(GiB)": 14.16, | |
| "step": 715, | |
| "train_speed(iter/s)": 0.188517 | |
| }, | |
| { | |
| "acc": 0.99723749, | |
| "epoch": 19.726027397260275, | |
| "grad_norm": 3.8561668395996094, | |
| "learning_rate": 7.0087762755525214e-06, | |
| "loss": 0.00678846, | |
| "memory(GiB)": 14.16, | |
| "step": 720, | |
| "train_speed(iter/s)": 0.18942 | |
| }, | |
| { | |
| "acc": 0.99725533, | |
| "epoch": 19.863013698630137, | |
| "grad_norm": 0.6471136212348938, | |
| "learning_rate": 6.966635133126286e-06, | |
| "loss": 0.01252564, | |
| "memory(GiB)": 14.16, | |
| "step": 725, | |
| "train_speed(iter/s)": 0.1903 | |
| }, | |
| { | |
| "acc": 0.99634466, | |
| "epoch": 20.0, | |
| "grad_norm": 3.774871587753296, | |
| "learning_rate": 6.924328086323392e-06, | |
| "loss": 0.01890204, | |
| "memory(GiB)": 14.16, | |
| "step": 730, | |
| "train_speed(iter/s)": 0.191223 | |
| }, | |
| { | |
| "acc": 0.99721966, | |
| "epoch": 20.136986301369863, | |
| "grad_norm": 4.085058689117432, | |
| "learning_rate": 6.881858705053205e-06, | |
| "loss": 0.01011662, | |
| "memory(GiB)": 14.16, | |
| "step": 735, | |
| "train_speed(iter/s)": 0.192097 | |
| }, | |
| { | |
| "acc": 0.99905624, | |
| "epoch": 20.273972602739725, | |
| "grad_norm": 2.026254892349243, | |
| "learning_rate": 6.8392305729230305e-06, | |
| "loss": 0.00847432, | |
| "memory(GiB)": 14.16, | |
| "step": 740, | |
| "train_speed(iter/s)": 0.192981 | |
| }, | |
| { | |
| "acc": 0.99864426, | |
| "epoch": 20.410958904109588, | |
| "grad_norm": 1.7818002700805664, | |
| "learning_rate": 6.796447286935725e-06, | |
| "loss": 0.00707859, | |
| "memory(GiB)": 14.16, | |
| "step": 745, | |
| "train_speed(iter/s)": 0.193875 | |
| }, | |
| { | |
| "acc": 0.99816341, | |
| "epoch": 20.54794520547945, | |
| "grad_norm": 0.09219258278608322, | |
| "learning_rate": 6.7535124571861766e-06, | |
| "loss": 0.01978692, | |
| "memory(GiB)": 14.16, | |
| "step": 750, | |
| "train_speed(iter/s)": 0.194746 | |
| }, | |
| { | |
| "acc": 0.99819775, | |
| "epoch": 20.684931506849313, | |
| "grad_norm": 3.1013734340667725, | |
| "learning_rate": 6.710429706556683e-06, | |
| "loss": 0.00450487, | |
| "memory(GiB)": 14.16, | |
| "step": 755, | |
| "train_speed(iter/s)": 0.195624 | |
| }, | |
| { | |
| "acc": 0.99859333, | |
| "epoch": 20.82191780821918, | |
| "grad_norm": 0.29254209995269775, | |
| "learning_rate": 6.667202670411245e-06, | |
| "loss": 0.00461008, | |
| "memory(GiB)": 14.16, | |
| "step": 760, | |
| "train_speed(iter/s)": 0.196517 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 20.958904109589042, | |
| "grad_norm": 0.2512110471725464, | |
| "learning_rate": 6.623834996288815e-06, | |
| "loss": 0.00767698, | |
| "memory(GiB)": 14.16, | |
| "step": 765, | |
| "train_speed(iter/s)": 0.197396 | |
| }, | |
| { | |
| "acc": 0.99591599, | |
| "epoch": 21.095890410958905, | |
| "grad_norm": 0.5388877987861633, | |
| "learning_rate": 6.580330343595521e-06, | |
| "loss": 0.01597615, | |
| "memory(GiB)": 14.16, | |
| "step": 770, | |
| "train_speed(iter/s)": 0.198263 | |
| }, | |
| { | |
| "acc": 0.99862766, | |
| "epoch": 21.232876712328768, | |
| "grad_norm": 0.15328700840473175, | |
| "learning_rate": 6.536692383295863e-06, | |
| "loss": 0.00608862, | |
| "memory(GiB)": 14.16, | |
| "step": 775, | |
| "train_speed(iter/s)": 0.19913 | |
| }, | |
| { | |
| "acc": 0.99775133, | |
| "epoch": 21.36986301369863, | |
| "grad_norm": 0.17136460542678833, | |
| "learning_rate": 6.492924797602972e-06, | |
| "loss": 0.00846671, | |
| "memory(GiB)": 14.16, | |
| "step": 780, | |
| "train_speed(iter/s)": 0.199987 | |
| }, | |
| { | |
| "acc": 0.99768267, | |
| "epoch": 21.506849315068493, | |
| "grad_norm": 3.5933849811553955, | |
| "learning_rate": 6.449031279667896e-06, | |
| "loss": 0.0071015, | |
| "memory(GiB)": 14.16, | |
| "step": 785, | |
| "train_speed(iter/s)": 0.200857 | |
| }, | |
| { | |
| "acc": 0.99627323, | |
| "epoch": 21.643835616438356, | |
| "grad_norm": 2.7347967624664307, | |
| "learning_rate": 6.4050155332679606e-06, | |
| "loss": 0.01377204, | |
| "memory(GiB)": 14.16, | |
| "step": 790, | |
| "train_speed(iter/s)": 0.201739 | |
| }, | |
| { | |
| "acc": 0.99720316, | |
| "epoch": 21.78082191780822, | |
| "grad_norm": 3.391113758087158, | |
| "learning_rate": 6.360881272494254e-06, | |
| "loss": 0.00560406, | |
| "memory(GiB)": 14.16, | |
| "step": 795, | |
| "train_speed(iter/s)": 0.202584 | |
| }, | |
| { | |
| "acc": 0.99770041, | |
| "epoch": 21.91780821917808, | |
| "grad_norm": 0.9516264796257019, | |
| "learning_rate": 6.316632221438214e-06, | |
| "loss": 0.01059882, | |
| "memory(GiB)": 14.16, | |
| "step": 800, | |
| "train_speed(iter/s)": 0.20342 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 22.054794520547944, | |
| "grad_norm": 4.009815216064453, | |
| "learning_rate": 6.2722721138774e-06, | |
| "loss": 0.00493859, | |
| "memory(GiB)": 14.16, | |
| "step": 805, | |
| "train_speed(iter/s)": 0.20423 | |
| }, | |
| { | |
| "acc": 0.99905624, | |
| "epoch": 22.19178082191781, | |
| "grad_norm": 0.8211438059806824, | |
| "learning_rate": 6.2278046929604265e-06, | |
| "loss": 0.00547095, | |
| "memory(GiB)": 14.16, | |
| "step": 810, | |
| "train_speed(iter/s)": 0.205109 | |
| }, | |
| { | |
| "acc": 0.99680634, | |
| "epoch": 22.328767123287673, | |
| "grad_norm": 1.1279343366622925, | |
| "learning_rate": 6.183233710891103e-06, | |
| "loss": 0.01568028, | |
| "memory(GiB)": 14.16, | |
| "step": 815, | |
| "train_speed(iter/s)": 0.205958 | |
| }, | |
| { | |
| "acc": 0.99821434, | |
| "epoch": 22.465753424657535, | |
| "grad_norm": 2.2662060260772705, | |
| "learning_rate": 6.1385629286118375e-06, | |
| "loss": 0.00696406, | |
| "memory(GiB)": 14.16, | |
| "step": 820, | |
| "train_speed(iter/s)": 0.206791 | |
| }, | |
| { | |
| "acc": 0.99821434, | |
| "epoch": 22.602739726027398, | |
| "grad_norm": 3.2888071537017822, | |
| "learning_rate": 6.093796115486277e-06, | |
| "loss": 0.00824727, | |
| "memory(GiB)": 14.16, | |
| "step": 825, | |
| "train_speed(iter/s)": 0.207611 | |
| }, | |
| { | |
| "acc": 0.99864426, | |
| "epoch": 22.73972602739726, | |
| "grad_norm": 0.9464216232299805, | |
| "learning_rate": 6.048937048981235e-06, | |
| "loss": 0.00788838, | |
| "memory(GiB)": 14.16, | |
| "step": 830, | |
| "train_speed(iter/s)": 0.20843 | |
| }, | |
| { | |
| "acc": 0.99864426, | |
| "epoch": 22.876712328767123, | |
| "grad_norm": 0.23246127367019653, | |
| "learning_rate": 6.003989514347962e-06, | |
| "loss": 0.00401598, | |
| "memory(GiB)": 14.16, | |
| "step": 835, | |
| "train_speed(iter/s)": 0.209242 | |
| }, | |
| { | |
| "acc": 0.99866076, | |
| "epoch": 23.013698630136986, | |
| "grad_norm": 3.2754745483398438, | |
| "learning_rate": 5.9589573043027314e-06, | |
| "loss": 0.00324695, | |
| "memory(GiB)": 14.16, | |
| "step": 840, | |
| "train_speed(iter/s)": 0.210024 | |
| }, | |
| { | |
| "acc": 0.9980547, | |
| "epoch": 23.15068493150685, | |
| "grad_norm": 4.7171711921691895, | |
| "learning_rate": 5.913844218706809e-06, | |
| "loss": 0.0035405, | |
| "memory(GiB)": 14.16, | |
| "step": 845, | |
| "train_speed(iter/s)": 0.210863 | |
| }, | |
| { | |
| "acc": 0.99637899, | |
| "epoch": 23.28767123287671, | |
| "grad_norm": 0.22946955263614655, | |
| "learning_rate": 5.8686540642458204e-06, | |
| "loss": 0.01147373, | |
| "memory(GiB)": 14.16, | |
| "step": 850, | |
| "train_speed(iter/s)": 0.211701 | |
| }, | |
| { | |
| "acc": 0.99811125, | |
| "epoch": 23.424657534246574, | |
| "grad_norm": 5.581859588623047, | |
| "learning_rate": 5.82339065410853e-06, | |
| "loss": 0.00868064, | |
| "memory(GiB)": 14.16, | |
| "step": 855, | |
| "train_speed(iter/s)": 0.212537 | |
| }, | |
| { | |
| "acc": 0.99769344, | |
| "epoch": 23.561643835616437, | |
| "grad_norm": 5.6449360847473145, | |
| "learning_rate": 5.7780578076650925e-06, | |
| "loss": 0.01117077, | |
| "memory(GiB)": 14.16, | |
| "step": 860, | |
| "train_speed(iter/s)": 0.213401 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 23.698630136986303, | |
| "grad_norm": 0.058708298951387405, | |
| "learning_rate": 5.732659350144769e-06, | |
| "loss": 0.00182705, | |
| "memory(GiB)": 14.16, | |
| "step": 865, | |
| "train_speed(iter/s)": 0.214229 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 23.835616438356166, | |
| "grad_norm": 1.460488200187683, | |
| "learning_rate": 5.687199112313132e-06, | |
| "loss": 0.00358091, | |
| "memory(GiB)": 14.16, | |
| "step": 870, | |
| "train_speed(iter/s)": 0.215057 | |
| }, | |
| { | |
| "acc": 0.99818001, | |
| "epoch": 23.972602739726028, | |
| "grad_norm": 0.8150052428245544, | |
| "learning_rate": 5.64168093014885e-06, | |
| "loss": 0.00942515, | |
| "memory(GiB)": 14.16, | |
| "step": 875, | |
| "train_speed(iter/s)": 0.215894 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 24.10958904109589, | |
| "grad_norm": 1.0939289331436157, | |
| "learning_rate": 5.596108644519984e-06, | |
| "loss": 0.00471724, | |
| "memory(GiB)": 14.16, | |
| "step": 880, | |
| "train_speed(iter/s)": 0.216682 | |
| }, | |
| { | |
| "acc": 0.99808903, | |
| "epoch": 24.246575342465754, | |
| "grad_norm": 6.786465644836426, | |
| "learning_rate": 5.5504861008599e-06, | |
| "loss": 0.00497846, | |
| "memory(GiB)": 14.16, | |
| "step": 885, | |
| "train_speed(iter/s)": 0.217499 | |
| }, | |
| { | |
| "acc": 0.99864416, | |
| "epoch": 24.383561643835616, | |
| "grad_norm": 5.07835054397583, | |
| "learning_rate": 5.504817148842783e-06, | |
| "loss": 0.00418225, | |
| "memory(GiB)": 14.16, | |
| "step": 890, | |
| "train_speed(iter/s)": 0.218324 | |
| }, | |
| { | |
| "acc": 0.99821434, | |
| "epoch": 24.52054794520548, | |
| "grad_norm": 2.3909006118774414, | |
| "learning_rate": 5.4591056420587975e-06, | |
| "loss": 0.00267169, | |
| "memory(GiB)": 14.16, | |
| "step": 895, | |
| "train_speed(iter/s)": 0.219135 | |
| }, | |
| { | |
| "acc": 0.99864416, | |
| "epoch": 24.65753424657534, | |
| "grad_norm": 0.555738627910614, | |
| "learning_rate": 5.413355437688927e-06, | |
| "loss": 0.00462395, | |
| "memory(GiB)": 14.16, | |
| "step": 900, | |
| "train_speed(iter/s)": 0.219963 | |
| }, | |
| { | |
| "acc": 0.99594774, | |
| "epoch": 24.794520547945204, | |
| "grad_norm": 5.093243598937988, | |
| "learning_rate": 5.367570396179488e-06, | |
| "loss": 0.01022252, | |
| "memory(GiB)": 14.16, | |
| "step": 905, | |
| "train_speed(iter/s)": 0.220772 | |
| }, | |
| { | |
| "acc": 0.99728842, | |
| "epoch": 24.931506849315067, | |
| "grad_norm": 3.7607083320617676, | |
| "learning_rate": 5.321754380916395e-06, | |
| "loss": 0.01267306, | |
| "memory(GiB)": 14.16, | |
| "step": 910, | |
| "train_speed(iter/s)": 0.221621 | |
| }, | |
| { | |
| "acc": 0.99598217, | |
| "epoch": 25.068493150684933, | |
| "grad_norm": 2.7842702865600586, | |
| "learning_rate": 5.275911257899149e-06, | |
| "loss": 0.01249768, | |
| "memory(GiB)": 14.16, | |
| "step": 915, | |
| "train_speed(iter/s)": 0.222392 | |
| }, | |
| { | |
| "acc": 0.99814568, | |
| "epoch": 25.205479452054796, | |
| "grad_norm": 2.1749532222747803, | |
| "learning_rate": 5.23004489541464e-06, | |
| "loss": 0.01107962, | |
| "memory(GiB)": 14.16, | |
| "step": 920, | |
| "train_speed(iter/s)": 0.223218 | |
| }, | |
| { | |
| "acc": 0.99907284, | |
| "epoch": 25.34246575342466, | |
| "grad_norm": 0.12532441318035126, | |
| "learning_rate": 5.184159163710717e-06, | |
| "loss": 0.00567983, | |
| "memory(GiB)": 14.16, | |
| "step": 925, | |
| "train_speed(iter/s)": 0.22402 | |
| }, | |
| { | |
| "acc": 0.99862642, | |
| "epoch": 25.47945205479452, | |
| "grad_norm": 3.7313835620880127, | |
| "learning_rate": 5.1382579346696275e-06, | |
| "loss": 0.00543302, | |
| "memory(GiB)": 14.16, | |
| "step": 930, | |
| "train_speed(iter/s)": 0.22481 | |
| }, | |
| { | |
| "acc": 0.99594784, | |
| "epoch": 25.616438356164384, | |
| "grad_norm": 3.0569019317626953, | |
| "learning_rate": 5.092345081481297e-06, | |
| "loss": 0.01230588, | |
| "memory(GiB)": 14.16, | |
| "step": 935, | |
| "train_speed(iter/s)": 0.225594 | |
| }, | |
| { | |
| "acc": 0.99909058, | |
| "epoch": 25.753424657534246, | |
| "grad_norm": 0.2874479293823242, | |
| "learning_rate": 5.0464244783165105e-06, | |
| "loss": 0.0029504, | |
| "memory(GiB)": 14.16, | |
| "step": 940, | |
| "train_speed(iter/s)": 0.226373 | |
| }, | |
| { | |
| "acc": 0.99818115, | |
| "epoch": 25.89041095890411, | |
| "grad_norm": 6.2819695472717285, | |
| "learning_rate": 5.000500000000001e-06, | |
| "loss": 0.00704549, | |
| "memory(GiB)": 14.16, | |
| "step": 945, | |
| "train_speed(iter/s)": 0.227172 | |
| }, | |
| { | |
| "acc": 0.99818001, | |
| "epoch": 26.027397260273972, | |
| "grad_norm": 2.6562278270721436, | |
| "learning_rate": 4.954575521683491e-06, | |
| "loss": 0.00467317, | |
| "memory(GiB)": 14.16, | |
| "step": 950, | |
| "train_speed(iter/s)": 0.227913 | |
| }, | |
| { | |
| "acc": 0.997717, | |
| "epoch": 26.164383561643834, | |
| "grad_norm": 0.46010449528694153, | |
| "learning_rate": 4.908654918518704e-06, | |
| "loss": 0.0066583, | |
| "memory(GiB)": 14.16, | |
| "step": 955, | |
| "train_speed(iter/s)": 0.228686 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 26.301369863013697, | |
| "grad_norm": 1.1016509532928467, | |
| "learning_rate": 4.862742065330375e-06, | |
| "loss": 0.00110117, | |
| "memory(GiB)": 14.16, | |
| "step": 960, | |
| "train_speed(iter/s)": 0.229538 | |
| }, | |
| { | |
| "acc": 0.99866076, | |
| "epoch": 26.438356164383563, | |
| "grad_norm": 3.4421184062957764, | |
| "learning_rate": 4.816840836289285e-06, | |
| "loss": 0.00389256, | |
| "memory(GiB)": 14.16, | |
| "step": 965, | |
| "train_speed(iter/s)": 0.230473 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 26.575342465753426, | |
| "grad_norm": 1.7454206943511963, | |
| "learning_rate": 4.770955104585361e-06, | |
| "loss": 0.00087426, | |
| "memory(GiB)": 14.16, | |
| "step": 970, | |
| "train_speed(iter/s)": 0.231175 | |
| }, | |
| { | |
| "acc": 0.99866076, | |
| "epoch": 26.71232876712329, | |
| "grad_norm": 0.08459863811731339, | |
| "learning_rate": 4.725088742100851e-06, | |
| "loss": 0.00253912, | |
| "memory(GiB)": 14.16, | |
| "step": 975, | |
| "train_speed(iter/s)": 0.231908 | |
| }, | |
| { | |
| "acc": 0.99821434, | |
| "epoch": 26.84931506849315, | |
| "grad_norm": 0.7988649010658264, | |
| "learning_rate": 4.679245619083607e-06, | |
| "loss": 0.00461807, | |
| "memory(GiB)": 14.16, | |
| "step": 980, | |
| "train_speed(iter/s)": 0.232806 | |
| }, | |
| { | |
| "acc": 0.99866076, | |
| "epoch": 26.986301369863014, | |
| "grad_norm": 0.03472264111042023, | |
| "learning_rate": 4.633429603820513e-06, | |
| "loss": 0.0050515, | |
| "memory(GiB)": 14.16, | |
| "step": 985, | |
| "train_speed(iter/s)": 0.233323 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 27.123287671232877, | |
| "grad_norm": 1.552517056465149, | |
| "learning_rate": 4.587644562311076e-06, | |
| "loss": 0.00116102, | |
| "memory(GiB)": 14.16, | |
| "step": 990, | |
| "train_speed(iter/s)": 0.234015 | |
| }, | |
| { | |
| "acc": 0.99866076, | |
| "epoch": 27.26027397260274, | |
| "grad_norm": 2.796733856201172, | |
| "learning_rate": 4.541894357941205e-06, | |
| "loss": 0.0039554, | |
| "memory(GiB)": 14.16, | |
| "step": 995, | |
| "train_speed(iter/s)": 0.234715 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 27.397260273972602, | |
| "grad_norm": 0.08924784511327744, | |
| "learning_rate": 4.4961828511572195e-06, | |
| "loss": 0.00281882, | |
| "memory(GiB)": 14.16, | |
| "step": 1000, | |
| "train_speed(iter/s)": 0.235411 | |
| }, | |
| { | |
| "epoch": 27.397260273972602, | |
| "eval_acc": 0.376108506949877, | |
| "eval_loss": 5.226269721984863, | |
| "eval_runtime": 1966.6531, | |
| "eval_samples_per_second": 16.284, | |
| "eval_steps_per_second": 2.035, | |
| "step": 1000 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 27.534246575342465, | |
| "grad_norm": 0.20645824074745178, | |
| "learning_rate": 4.450513899140101e-06, | |
| "loss": 0.00193416, | |
| "memory(GiB)": 14.16, | |
| "step": 1005, | |
| "train_speed(iter/s)": 0.160727 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 27.671232876712327, | |
| "grad_norm": 2.83465838432312, | |
| "learning_rate": 4.404891355480016e-06, | |
| "loss": 0.00531424, | |
| "memory(GiB)": 14.16, | |
| "step": 1010, | |
| "train_speed(iter/s)": 0.161302 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 27.80821917808219, | |
| "grad_norm": 1.058475375175476, | |
| "learning_rate": 4.359319069851151e-06, | |
| "loss": 0.00214943, | |
| "memory(GiB)": 14.16, | |
| "step": 1015, | |
| "train_speed(iter/s)": 0.161884 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 27.945205479452056, | |
| "grad_norm": 0.5197652578353882, | |
| "learning_rate": 4.313800887686869e-06, | |
| "loss": 0.00063238, | |
| "memory(GiB)": 14.16, | |
| "step": 1020, | |
| "train_speed(iter/s)": 0.162463 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 28.08219178082192, | |
| "grad_norm": 0.02172599360346794, | |
| "learning_rate": 4.268340649855233e-06, | |
| "loss": 0.00572151, | |
| "memory(GiB)": 14.16, | |
| "step": 1025, | |
| "train_speed(iter/s)": 0.163028 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 28.21917808219178, | |
| "grad_norm": 1.721336007118225, | |
| "learning_rate": 4.222942192334907e-06, | |
| "loss": 0.00307167, | |
| "memory(GiB)": 14.16, | |
| "step": 1030, | |
| "train_speed(iter/s)": 0.163606 | |
| }, | |
| { | |
| "acc": 0.99866076, | |
| "epoch": 28.356164383561644, | |
| "grad_norm": 0.07104966044425964, | |
| "learning_rate": 4.1776093458914725e-06, | |
| "loss": 0.0070457, | |
| "memory(GiB)": 14.16, | |
| "step": 1035, | |
| "train_speed(iter/s)": 0.164181 | |
| }, | |
| { | |
| "acc": 0.99866076, | |
| "epoch": 28.493150684931507, | |
| "grad_norm": 4.222721099853516, | |
| "learning_rate": 4.1323459357541826e-06, | |
| "loss": 0.00498358, | |
| "memory(GiB)": 14.16, | |
| "step": 1040, | |
| "train_speed(iter/s)": 0.164784 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 28.63013698630137, | |
| "grad_norm": 0.08688988536596298, | |
| "learning_rate": 4.087155781293192e-06, | |
| "loss": 0.0007615, | |
| "memory(GiB)": 14.16, | |
| "step": 1045, | |
| "train_speed(iter/s)": 0.165367 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 28.767123287671232, | |
| "grad_norm": 0.03878637030720711, | |
| "learning_rate": 4.042042695697272e-06, | |
| "loss": 0.00484578, | |
| "memory(GiB)": 14.16, | |
| "step": 1050, | |
| "train_speed(iter/s)": 0.165931 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 28.904109589041095, | |
| "grad_norm": 0.5024237036705017, | |
| "learning_rate": 3.997010485652039e-06, | |
| "loss": 0.00233584, | |
| "memory(GiB)": 14.16, | |
| "step": 1055, | |
| "train_speed(iter/s)": 0.166493 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 29.041095890410958, | |
| "grad_norm": 0.9032835960388184, | |
| "learning_rate": 3.952062951018766e-06, | |
| "loss": 0.00431595, | |
| "memory(GiB)": 14.16, | |
| "step": 1060, | |
| "train_speed(iter/s)": 0.167071 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 29.17808219178082, | |
| "grad_norm": 0.07504996657371521, | |
| "learning_rate": 3.907203884513724e-06, | |
| "loss": 0.00013832, | |
| "memory(GiB)": 14.16, | |
| "step": 1065, | |
| "train_speed(iter/s)": 0.167643 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 29.315068493150687, | |
| "grad_norm": 0.041768305003643036, | |
| "learning_rate": 3.862437071388162e-06, | |
| "loss": 0.00039022, | |
| "memory(GiB)": 14.16, | |
| "step": 1070, | |
| "train_speed(iter/s)": 0.168201 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 29.45205479452055, | |
| "grad_norm": 0.09327519685029984, | |
| "learning_rate": 3.817766289108899e-06, | |
| "loss": 0.00143108, | |
| "memory(GiB)": 14.16, | |
| "step": 1075, | |
| "train_speed(iter/s)": 0.168777 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 29.589041095890412, | |
| "grad_norm": 0.03159390017390251, | |
| "learning_rate": 3.773195307039575e-06, | |
| "loss": 0.00081171, | |
| "memory(GiB)": 14.16, | |
| "step": 1080, | |
| "train_speed(iter/s)": 0.169341 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 29.726027397260275, | |
| "grad_norm": 2.1395320892333984, | |
| "learning_rate": 3.728727886122599e-06, | |
| "loss": 0.00190442, | |
| "memory(GiB)": 14.16, | |
| "step": 1085, | |
| "train_speed(iter/s)": 0.169923 | |
| }, | |
| { | |
| "acc": 0.99866076, | |
| "epoch": 29.863013698630137, | |
| "grad_norm": 5.531327724456787, | |
| "learning_rate": 3.6843677785617874e-06, | |
| "loss": 0.00634567, | |
| "memory(GiB)": 14.16, | |
| "step": 1090, | |
| "train_speed(iter/s)": 0.170491 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 30.0, | |
| "grad_norm": 0.03689517825841904, | |
| "learning_rate": 3.640118727505748e-06, | |
| "loss": 0.00395082, | |
| "memory(GiB)": 14.16, | |
| "step": 1095, | |
| "train_speed(iter/s)": 0.171066 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 30.136986301369863, | |
| "grad_norm": 0.017297176644206047, | |
| "learning_rate": 3.5959844667320403e-06, | |
| "loss": 0.00074339, | |
| "memory(GiB)": 14.16, | |
| "step": 1100, | |
| "train_speed(iter/s)": 0.171615 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 30.273972602739725, | |
| "grad_norm": 0.017922429367899895, | |
| "learning_rate": 3.5519687203321056e-06, | |
| "loss": 0.00269048, | |
| "memory(GiB)": 14.16, | |
| "step": 1105, | |
| "train_speed(iter/s)": 0.172174 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 30.410958904109588, | |
| "grad_norm": 0.04719838500022888, | |
| "learning_rate": 3.5080752023970284e-06, | |
| "loss": 0.00416398, | |
| "memory(GiB)": 14.16, | |
| "step": 1110, | |
| "train_speed(iter/s)": 0.172722 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 30.54794520547945, | |
| "grad_norm": 0.02601473033428192, | |
| "learning_rate": 3.4643076167041395e-06, | |
| "loss": 0.0034888, | |
| "memory(GiB)": 14.16, | |
| "step": 1115, | |
| "train_speed(iter/s)": 0.173288 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 30.684931506849313, | |
| "grad_norm": 0.03908325359225273, | |
| "learning_rate": 3.4206696564044813e-06, | |
| "loss": 0.00179875, | |
| "memory(GiB)": 14.16, | |
| "step": 1120, | |
| "train_speed(iter/s)": 0.173848 | |
| }, | |
| { | |
| "acc": 0.99819775, | |
| "epoch": 30.82191780821918, | |
| "grad_norm": 2.540851354598999, | |
| "learning_rate": 3.377165003711185e-06, | |
| "loss": 0.00962915, | |
| "memory(GiB)": 14.16, | |
| "step": 1125, | |
| "train_speed(iter/s)": 0.174398 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 30.958904109589042, | |
| "grad_norm": 0.023008601740002632, | |
| "learning_rate": 3.3337973295887587e-06, | |
| "loss": 0.00751298, | |
| "memory(GiB)": 14.16, | |
| "step": 1130, | |
| "train_speed(iter/s)": 0.174948 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 31.095890410958905, | |
| "grad_norm": 0.0744655579328537, | |
| "learning_rate": 3.2905702934433197e-06, | |
| "loss": 0.00133921, | |
| "memory(GiB)": 14.16, | |
| "step": 1135, | |
| "train_speed(iter/s)": 0.175478 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 31.232876712328768, | |
| "grad_norm": 0.04030351713299751, | |
| "learning_rate": 3.247487542813825e-06, | |
| "loss": 0.0002436, | |
| "memory(GiB)": 14.16, | |
| "step": 1140, | |
| "train_speed(iter/s)": 0.176017 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 31.36986301369863, | |
| "grad_norm": 0.21374386548995972, | |
| "learning_rate": 3.204552713064278e-06, | |
| "loss": 0.00017974, | |
| "memory(GiB)": 14.16, | |
| "step": 1145, | |
| "train_speed(iter/s)": 0.176558 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 31.506849315068493, | |
| "grad_norm": 0.012107456102967262, | |
| "learning_rate": 3.1617694270769713e-06, | |
| "loss": 0.00015874, | |
| "memory(GiB)": 14.16, | |
| "step": 1150, | |
| "train_speed(iter/s)": 0.177106 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 31.643835616438356, | |
| "grad_norm": 0.12086984515190125, | |
| "learning_rate": 3.119141294946797e-06, | |
| "loss": 0.00084028, | |
| "memory(GiB)": 14.16, | |
| "step": 1155, | |
| "train_speed(iter/s)": 0.177658 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 31.78082191780822, | |
| "grad_norm": 3.515671968460083, | |
| "learning_rate": 3.0766719136766093e-06, | |
| "loss": 0.00295761, | |
| "memory(GiB)": 14.16, | |
| "step": 1160, | |
| "train_speed(iter/s)": 0.178197 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 31.91780821917808, | |
| "grad_norm": 0.011739728040993214, | |
| "learning_rate": 3.034364866873715e-06, | |
| "loss": 0.00487542, | |
| "memory(GiB)": 14.16, | |
| "step": 1165, | |
| "train_speed(iter/s)": 0.178737 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 32.054794520547944, | |
| "grad_norm": 0.6842294335365295, | |
| "learning_rate": 2.9922237244474807e-06, | |
| "loss": 0.00015365, | |
| "memory(GiB)": 14.16, | |
| "step": 1170, | |
| "train_speed(iter/s)": 0.179255 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 32.19178082191781, | |
| "grad_norm": 0.22703398764133453, | |
| "learning_rate": 2.950252042308099e-06, | |
| "loss": 0.00036469, | |
| "memory(GiB)": 14.16, | |
| "step": 1175, | |
| "train_speed(iter/s)": 0.179789 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 32.32876712328767, | |
| "grad_norm": 0.48301902413368225, | |
| "learning_rate": 2.9084533620665478e-06, | |
| "loss": 0.00040778, | |
| "memory(GiB)": 14.16, | |
| "step": 1180, | |
| "train_speed(iter/s)": 0.18032 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 32.465753424657535, | |
| "grad_norm": 3.3718252182006836, | |
| "learning_rate": 2.86683121073573e-06, | |
| "loss": 0.00395589, | |
| "memory(GiB)": 14.16, | |
| "step": 1185, | |
| "train_speed(iter/s)": 0.180852 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 32.602739726027394, | |
| "grad_norm": 0.021095439791679382, | |
| "learning_rate": 2.825389100432876e-06, | |
| "loss": 0.0002179, | |
| "memory(GiB)": 14.16, | |
| "step": 1190, | |
| "train_speed(iter/s)": 0.181384 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 32.73972602739726, | |
| "grad_norm": 0.03395378589630127, | |
| "learning_rate": 2.7841305280831743e-06, | |
| "loss": 0.0001625, | |
| "memory(GiB)": 14.16, | |
| "step": 1195, | |
| "train_speed(iter/s)": 0.181912 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 32.87671232876713, | |
| "grad_norm": 0.015184338204562664, | |
| "learning_rate": 2.743058975124708e-06, | |
| "loss": 0.00051196, | |
| "memory(GiB)": 14.16, | |
| "step": 1200, | |
| "train_speed(iter/s)": 0.182449 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 33.013698630136986, | |
| "grad_norm": 0.03929471969604492, | |
| "learning_rate": 2.7021779072146866e-06, | |
| "loss": 0.0004342, | |
| "memory(GiB)": 14.16, | |
| "step": 1205, | |
| "train_speed(iter/s)": 0.182964 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 33.15068493150685, | |
| "grad_norm": 0.014112686738371849, | |
| "learning_rate": 2.661490773937e-06, | |
| "loss": 6.217e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1210, | |
| "train_speed(iter/s)": 0.183491 | |
| }, | |
| { | |
| "acc": 0.99910717, | |
| "epoch": 33.28767123287671, | |
| "grad_norm": 0.07489810883998871, | |
| "learning_rate": 2.6210010085111507e-06, | |
| "loss": 0.00106858, | |
| "memory(GiB)": 14.16, | |
| "step": 1215, | |
| "train_speed(iter/s)": 0.184013 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 33.42465753424658, | |
| "grad_norm": 0.013716904446482658, | |
| "learning_rate": 2.580712027502557e-06, | |
| "loss": 0.00010475, | |
| "memory(GiB)": 14.16, | |
| "step": 1220, | |
| "train_speed(iter/s)": 0.184539 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 33.56164383561644, | |
| "grad_norm": 0.03437013924121857, | |
| "learning_rate": 2.5406272305342438e-06, | |
| "loss": 8.954e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1225, | |
| "train_speed(iter/s)": 0.185072 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 33.6986301369863, | |
| "grad_norm": 0.0419132262468338, | |
| "learning_rate": 2.500749999999999e-06, | |
| "loss": 0.00213626, | |
| "memory(GiB)": 14.16, | |
| "step": 1230, | |
| "train_speed(iter/s)": 0.185598 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 33.83561643835616, | |
| "grad_norm": 0.00895242765545845, | |
| "learning_rate": 2.461083700778954e-06, | |
| "loss": 0.00524443, | |
| "memory(GiB)": 14.16, | |
| "step": 1235, | |
| "train_speed(iter/s)": 0.186129 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 33.97260273972603, | |
| "grad_norm": 0.0046184309758245945, | |
| "learning_rate": 2.4216316799516488e-06, | |
| "loss": 9.459e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1240, | |
| "train_speed(iter/s)": 0.186651 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 34.10958904109589, | |
| "grad_norm": 0.00644602719694376, | |
| "learning_rate": 2.3823972665176048e-06, | |
| "loss": 0.00276474, | |
| "memory(GiB)": 14.16, | |
| "step": 1245, | |
| "train_speed(iter/s)": 0.187167 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 34.24657534246575, | |
| "grad_norm": 0.04513326287269592, | |
| "learning_rate": 2.34338377111443e-06, | |
| "loss": 8.529e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1250, | |
| "train_speed(iter/s)": 0.187696 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 34.38356164383562, | |
| "grad_norm": 0.00604225741699338, | |
| "learning_rate": 2.304594485738447e-06, | |
| "loss": 0.00061723, | |
| "memory(GiB)": 14.16, | |
| "step": 1255, | |
| "train_speed(iter/s)": 0.188214 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 34.52054794520548, | |
| "grad_norm": 0.007053479552268982, | |
| "learning_rate": 2.266032683466928e-06, | |
| "loss": 0.00040206, | |
| "memory(GiB)": 14.16, | |
| "step": 1260, | |
| "train_speed(iter/s)": 0.188743 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 34.657534246575345, | |
| "grad_norm": 0.012396584264934063, | |
| "learning_rate": 2.227701618181895e-06, | |
| "loss": 4.734e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1265, | |
| "train_speed(iter/s)": 0.189268 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 34.794520547945204, | |
| "grad_norm": 0.2615416944026947, | |
| "learning_rate": 2.189604524295565e-06, | |
| "loss": 0.00017459, | |
| "memory(GiB)": 14.16, | |
| "step": 1270, | |
| "train_speed(iter/s)": 0.18978 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 34.93150684931507, | |
| "grad_norm": 0.9177928566932678, | |
| "learning_rate": 2.1517446164774243e-06, | |
| "loss": 0.00016437, | |
| "memory(GiB)": 14.16, | |
| "step": 1275, | |
| "train_speed(iter/s)": 0.190291 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 35.06849315068493, | |
| "grad_norm": 0.005634276662021875, | |
| "learning_rate": 2.114125089382966e-06, | |
| "loss": 6.107e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1280, | |
| "train_speed(iter/s)": 0.190787 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 35.205479452054796, | |
| "grad_norm": 0.005844338797032833, | |
| "learning_rate": 2.0767491173841266e-06, | |
| "loss": 0.00024583, | |
| "memory(GiB)": 14.16, | |
| "step": 1285, | |
| "train_speed(iter/s)": 0.191287 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 35.342465753424655, | |
| "grad_norm": 0.008257429115474224, | |
| "learning_rate": 2.039619854301433e-06, | |
| "loss": 0.00127686, | |
| "memory(GiB)": 14.16, | |
| "step": 1290, | |
| "train_speed(iter/s)": 0.191799 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 35.47945205479452, | |
| "grad_norm": 0.14188018441200256, | |
| "learning_rate": 2.0027404331378715e-06, | |
| "loss": 0.00085992, | |
| "memory(GiB)": 14.16, | |
| "step": 1295, | |
| "train_speed(iter/s)": 0.192303 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 35.61643835616438, | |
| "grad_norm": 0.004589778371155262, | |
| "learning_rate": 1.9661139658145304e-06, | |
| "loss": 5.643e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1300, | |
| "train_speed(iter/s)": 0.192822 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 35.75342465753425, | |
| "grad_norm": 0.005317226517945528, | |
| "learning_rate": 1.929743542908006e-06, | |
| "loss": 3.488e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1305, | |
| "train_speed(iter/s)": 0.193346 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 35.89041095890411, | |
| "grad_norm": 0.17880931496620178, | |
| "learning_rate": 1.8936322333896213e-06, | |
| "loss": 0.00010323, | |
| "memory(GiB)": 14.16, | |
| "step": 1310, | |
| "train_speed(iter/s)": 0.193861 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 36.02739726027397, | |
| "grad_norm": 0.026366814970970154, | |
| "learning_rate": 1.857783084366465e-06, | |
| "loss": 0.00022611, | |
| "memory(GiB)": 14.16, | |
| "step": 1315, | |
| "train_speed(iter/s)": 0.194343 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 36.16438356164384, | |
| "grad_norm": 0.011201135814189911, | |
| "learning_rate": 1.8221991208242658e-06, | |
| "loss": 3.839e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1320, | |
| "train_speed(iter/s)": 0.194845 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 36.3013698630137, | |
| "grad_norm": 0.008422702550888062, | |
| "learning_rate": 1.7868833453721465e-06, | |
| "loss": 8.309e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1325, | |
| "train_speed(iter/s)": 0.195352 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 36.43835616438356, | |
| "grad_norm": 0.027778884395956993, | |
| "learning_rate": 1.7518387379892654e-06, | |
| "loss": 4.668e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1330, | |
| "train_speed(iter/s)": 0.195857 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 36.57534246575342, | |
| "grad_norm": 0.010711952112615108, | |
| "learning_rate": 1.717068255773352e-06, | |
| "loss": 3.179e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1335, | |
| "train_speed(iter/s)": 0.196353 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 36.71232876712329, | |
| "grad_norm": 0.004275246057659388, | |
| "learning_rate": 1.6825748326911945e-06, | |
| "loss": 0.00023135, | |
| "memory(GiB)": 14.16, | |
| "step": 1340, | |
| "train_speed(iter/s)": 0.196844 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 36.84931506849315, | |
| "grad_norm": 0.003764290129765868, | |
| "learning_rate": 1.6483613793310607e-06, | |
| "loss": 4.05e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1345, | |
| "train_speed(iter/s)": 0.197337 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 36.986301369863014, | |
| "grad_norm": 0.025480693206191063, | |
| "learning_rate": 1.6144307826571068e-06, | |
| "loss": 3.893e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1350, | |
| "train_speed(iter/s)": 0.19784 | |
| }, | |
| { | |
| "acc": 0.99955359, | |
| "epoch": 37.12328767123287, | |
| "grad_norm": 0.002638956531882286, | |
| "learning_rate": 1.580785905765769e-06, | |
| "loss": 0.00189444, | |
| "memory(GiB)": 14.16, | |
| "step": 1355, | |
| "train_speed(iter/s)": 0.198304 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 37.26027397260274, | |
| "grad_norm": 2.218669891357422, | |
| "learning_rate": 1.5474295876441716e-06, | |
| "loss": 0.00040235, | |
| "memory(GiB)": 14.16, | |
| "step": 1360, | |
| "train_speed(iter/s)": 0.198788 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 37.397260273972606, | |
| "grad_norm": 0.004326330963522196, | |
| "learning_rate": 1.51436464293057e-06, | |
| "loss": 2.629e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1365, | |
| "train_speed(iter/s)": 0.199269 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 37.534246575342465, | |
| "grad_norm": 0.00336137181147933, | |
| "learning_rate": 1.4815938616768564e-06, | |
| "loss": 2.298e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1370, | |
| "train_speed(iter/s)": 0.199749 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 37.67123287671233, | |
| "grad_norm": 0.0043685161508619785, | |
| "learning_rate": 1.4491200091131203e-06, | |
| "loss": 2.556e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1375, | |
| "train_speed(iter/s)": 0.200239 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 37.80821917808219, | |
| "grad_norm": 0.003170077223330736, | |
| "learning_rate": 1.4169458254143287e-06, | |
| "loss": 2.185e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1380, | |
| "train_speed(iter/s)": 0.200734 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 37.945205479452056, | |
| "grad_norm": 0.0025261647533625364, | |
| "learning_rate": 1.3850740254690947e-06, | |
| "loss": 0.00010961, | |
| "memory(GiB)": 14.16, | |
| "step": 1385, | |
| "train_speed(iter/s)": 0.201222 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 38.082191780821915, | |
| "grad_norm": 0.002983207581564784, | |
| "learning_rate": 1.3535072986506058e-06, | |
| "loss": 2.55e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1390, | |
| "train_speed(iter/s)": 0.201683 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 38.21917808219178, | |
| "grad_norm": 0.7226250171661377, | |
| "learning_rate": 1.3222483085896786e-06, | |
| "loss": 0.00014088, | |
| "memory(GiB)": 14.16, | |
| "step": 1395, | |
| "train_speed(iter/s)": 0.202156 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 38.35616438356164, | |
| "grad_norm": 0.0026145747397094965, | |
| "learning_rate": 1.2912996929500105e-06, | |
| "loss": 1.867e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1400, | |
| "train_speed(iter/s)": 0.202643 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 38.49315068493151, | |
| "grad_norm": 0.002422385849058628, | |
| "learning_rate": 1.2606640632056035e-06, | |
| "loss": 2.782e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1405, | |
| "train_speed(iter/s)": 0.203134 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 38.63013698630137, | |
| "grad_norm": 0.005694146268069744, | |
| "learning_rate": 1.230344004420408e-06, | |
| "loss": 2.287e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1410, | |
| "train_speed(iter/s)": 0.20361 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 38.76712328767123, | |
| "grad_norm": 0.0027258628979325294, | |
| "learning_rate": 1.2003420750301944e-06, | |
| "loss": 0.00018693, | |
| "memory(GiB)": 14.16, | |
| "step": 1415, | |
| "train_speed(iter/s)": 0.204094 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 38.9041095890411, | |
| "grad_norm": 0.0033724328968673944, | |
| "learning_rate": 1.1706608066266701e-06, | |
| "loss": 2.27e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1420, | |
| "train_speed(iter/s)": 0.204571 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 39.04109589041096, | |
| "grad_norm": 0.0026123709976673126, | |
| "learning_rate": 1.141302703743854e-06, | |
| "loss": 1.855e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1425, | |
| "train_speed(iter/s)": 0.205022 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 39.178082191780824, | |
| "grad_norm": 0.0019495452288538218, | |
| "learning_rate": 1.1122702436467527e-06, | |
| "loss": 1.743e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1430, | |
| "train_speed(iter/s)": 0.205495 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 39.31506849315068, | |
| "grad_norm": 0.004156290087848902, | |
| "learning_rate": 1.083565876122317e-06, | |
| "loss": 1.877e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1435, | |
| "train_speed(iter/s)": 0.205967 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 39.45205479452055, | |
| "grad_norm": 0.002886646194383502, | |
| "learning_rate": 1.0551920232727294e-06, | |
| "loss": 1.751e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1440, | |
| "train_speed(iter/s)": 0.206455 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 39.58904109589041, | |
| "grad_norm": 0.004523637238889933, | |
| "learning_rate": 1.0271510793110337e-06, | |
| "loss": 1.72e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1445, | |
| "train_speed(iter/s)": 0.206938 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 39.726027397260275, | |
| "grad_norm": 0.002039379673078656, | |
| "learning_rate": 9.994454103590919e-07, | |
| "loss": 0.00028988, | |
| "memory(GiB)": 14.16, | |
| "step": 1450, | |
| "train_speed(iter/s)": 0.207413 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 39.863013698630134, | |
| "grad_norm": 0.0019272951176390052, | |
| "learning_rate": 9.720773542479399e-07, | |
| "loss": 1.637e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1455, | |
| "train_speed(iter/s)": 0.207887 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 40.0, | |
| "grad_norm": 0.0028609074652194977, | |
| "learning_rate": 9.450492203205211e-07, | |
| "loss": 1.569e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1460, | |
| "train_speed(iter/s)": 0.208361 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 40.136986301369866, | |
| "grad_norm": 0.0020151259377598763, | |
| "learning_rate": 9.183632892368126e-07, | |
| "loss": 1.691e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1465, | |
| "train_speed(iter/s)": 0.208817 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 40.273972602739725, | |
| "grad_norm": 0.0018815897637978196, | |
| "learning_rate": 8.920218127813862e-07, | |
| "loss": 1.482e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1470, | |
| "train_speed(iter/s)": 0.209272 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 40.41095890410959, | |
| "grad_norm": 0.0017830530414357781, | |
| "learning_rate": 8.660270136734065e-07, | |
| "loss": 1.544e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1475, | |
| "train_speed(iter/s)": 0.209727 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 40.54794520547945, | |
| "grad_norm": 0.0013965211110189557, | |
| "learning_rate": 8.403810853790619e-07, | |
| "loss": 1.878e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1480, | |
| "train_speed(iter/s)": 0.210196 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 40.68493150684932, | |
| "grad_norm": 0.0022451053373515606, | |
| "learning_rate": 8.150861919264897e-07, | |
| "loss": 1.5e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1485, | |
| "train_speed(iter/s)": 0.210655 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 40.821917808219176, | |
| "grad_norm": 0.0060085877776145935, | |
| "learning_rate": 7.901444677231659e-07, | |
| "loss": 1.531e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1490, | |
| "train_speed(iter/s)": 0.211128 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 40.95890410958904, | |
| "grad_norm": 0.0016883641947060823, | |
| "learning_rate": 7.65558017375808e-07, | |
| "loss": 1.456e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1495, | |
| "train_speed(iter/s)": 0.211591 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 41.0958904109589, | |
| "grad_norm": 0.007958967238664627, | |
| "learning_rate": 7.413289155127845e-07, | |
| "loss": 1.438e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1500, | |
| "train_speed(iter/s)": 0.212023 | |
| }, | |
| { | |
| "epoch": 41.0958904109589, | |
| "eval_acc": 0.3744250158022868, | |
| "eval_loss": 5.587606430053711, | |
| "eval_runtime": 1965.1675, | |
| "eval_samples_per_second": 16.296, | |
| "eval_steps_per_second": 2.037, | |
| "step": 1500 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 41.23287671232877, | |
| "grad_norm": 0.0023259874433279037, | |
| "learning_rate": 7.174592066090488e-07, | |
| "loss": 1.527e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1505, | |
| "train_speed(iter/s)": 0.165697 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 41.36986301369863, | |
| "grad_norm": 0.0012143112253397703, | |
| "learning_rate": 6.939509048136372e-07, | |
| "loss": 1.22e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1510, | |
| "train_speed(iter/s)": 0.166098 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 41.50684931506849, | |
| "grad_norm": 0.0015943313483148813, | |
| "learning_rate": 6.708059937796999e-07, | |
| "loss": 1.281e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1515, | |
| "train_speed(iter/s)": 0.166498 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 41.64383561643836, | |
| "grad_norm": 0.0013340068981051445, | |
| "learning_rate": 6.480264264971263e-07, | |
| "loss": 1.167e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1520, | |
| "train_speed(iter/s)": 0.166899 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 41.78082191780822, | |
| "grad_norm": 0.0011501106200739741, | |
| "learning_rate": 6.256141251277526e-07, | |
| "loss": 1.187e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1525, | |
| "train_speed(iter/s)": 0.167303 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 41.917808219178085, | |
| "grad_norm": 0.0013626530999317765, | |
| "learning_rate": 6.035709808431585e-07, | |
| "loss": 1.142e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1530, | |
| "train_speed(iter/s)": 0.167706 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 42.054794520547944, | |
| "grad_norm": 0.0012834669323638082, | |
| "learning_rate": 5.818988536650921e-07, | |
| "loss": 1.071e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1535, | |
| "train_speed(iter/s)": 0.16809 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 42.19178082191781, | |
| "grad_norm": 0.0013576337369158864, | |
| "learning_rate": 5.605995723085264e-07, | |
| "loss": 1.142e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1540, | |
| "train_speed(iter/s)": 0.168482 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 42.32876712328767, | |
| "grad_norm": 0.0012725357664749026, | |
| "learning_rate": 5.396749340273402e-07, | |
| "loss": 1.051e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1545, | |
| "train_speed(iter/s)": 0.168875 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 42.465753424657535, | |
| "grad_norm": 0.0012627997202798724, | |
| "learning_rate": 5.191267044626674e-07, | |
| "loss": 2.435e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1550, | |
| "train_speed(iter/s)": 0.169269 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 42.602739726027394, | |
| "grad_norm": 0.051344264298677444, | |
| "learning_rate": 4.989566174939183e-07, | |
| "loss": 2.21e-05, | |
| "memory(GiB)": 14.16, | |
| "step": 1555, | |
| "train_speed(iter/s)": 0.169675 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 42.73972602739726, | |
| "grad_norm": 0.0012037245323881507, | |
| "learning_rate": 4.791663750924617e-07, | |
| "loss": 8.75e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1560, | |
| "train_speed(iter/s)": 0.170075 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 42.87671232876713, | |
| "grad_norm": 0.0008853294420987368, | |
| "learning_rate": 4.5975764717801586e-07, | |
| "loss": 9.42e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1565, | |
| "train_speed(iter/s)": 0.170474 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 43.013698630136986, | |
| "grad_norm": 0.0008409248548559844, | |
| "learning_rate": 4.407320714777398e-07, | |
| "loss": 8.56e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1570, | |
| "train_speed(iter/s)": 0.170865 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 43.15068493150685, | |
| "grad_norm": 0.000995440874248743, | |
| "learning_rate": 4.2209125338804007e-07, | |
| "loss": 9.18e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1575, | |
| "train_speed(iter/s)": 0.171254 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 43.28767123287671, | |
| "grad_norm": 0.0008801660733297467, | |
| "learning_rate": 4.0383676583910706e-07, | |
| "loss": 7.03e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1580, | |
| "train_speed(iter/s)": 0.17164 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 43.42465753424658, | |
| "grad_norm": 0.0007337812567129731, | |
| "learning_rate": 3.859701491621833e-07, | |
| "loss": 7.99e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1585, | |
| "train_speed(iter/s)": 0.172037 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 43.56164383561644, | |
| "grad_norm": 0.0009611019631847739, | |
| "learning_rate": 3.6849291095959866e-07, | |
| "loss": 8.49e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1590, | |
| "train_speed(iter/s)": 0.172427 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 43.6986301369863, | |
| "grad_norm": 0.0009541260078549385, | |
| "learning_rate": 3.5140652597754917e-07, | |
| "loss": 7.95e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1595, | |
| "train_speed(iter/s)": 0.172819 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 43.83561643835616, | |
| "grad_norm": 0.0007106245611794293, | |
| "learning_rate": 3.3471243598165825e-07, | |
| "loss": 7.9e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1600, | |
| "train_speed(iter/s)": 0.173214 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 43.97260273972603, | |
| "grad_norm": 0.0014047367731109262, | |
| "learning_rate": 3.184120496353248e-07, | |
| "loss": 7.76e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1605, | |
| "train_speed(iter/s)": 0.173593 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 44.10958904109589, | |
| "grad_norm": 0.0007362644537352026, | |
| "learning_rate": 3.025067423808514e-07, | |
| "loss": 7.02e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1610, | |
| "train_speed(iter/s)": 0.17397 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 44.24657534246575, | |
| "grad_norm": 0.0006499322480522096, | |
| "learning_rate": 2.8699785632338603e-07, | |
| "loss": 6.68e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1615, | |
| "train_speed(iter/s)": 0.174356 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 44.38356164383562, | |
| "grad_norm": 0.0008433948969468474, | |
| "learning_rate": 2.7188670011767715e-07, | |
| "loss": 6.54e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1620, | |
| "train_speed(iter/s)": 0.174739 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 44.52054794520548, | |
| "grad_norm": 0.0009852251969277859, | |
| "learning_rate": 2.571745488576417e-07, | |
| "loss": 6.99e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1625, | |
| "train_speed(iter/s)": 0.175119 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 44.657534246575345, | |
| "grad_norm": 0.0006441141595132649, | |
| "learning_rate": 2.42862643968775e-07, | |
| "loss": 6.3e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1630, | |
| "train_speed(iter/s)": 0.175506 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 44.794520547945204, | |
| "grad_norm": 0.0006608326220884919, | |
| "learning_rate": 2.289521931034023e-07, | |
| "loss": 5.96e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1635, | |
| "train_speed(iter/s)": 0.175891 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 44.93150684931507, | |
| "grad_norm": 0.0005597140407189727, | |
| "learning_rate": 2.1544437003876737e-07, | |
| "loss": 5.43e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1640, | |
| "train_speed(iter/s)": 0.176268 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 45.06849315068493, | |
| "grad_norm": 0.000521883659530431, | |
| "learning_rate": 2.023403145779931e-07, | |
| "loss": 5.55e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1645, | |
| "train_speed(iter/s)": 0.176636 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 45.205479452054796, | |
| "grad_norm": 0.000552397221326828, | |
| "learning_rate": 1.8964113245390256e-07, | |
| "loss": 5.25e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1650, | |
| "train_speed(iter/s)": 0.17702 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 45.342465753424655, | |
| "grad_norm": 0.0008391111623495817, | |
| "learning_rate": 1.7734789523571958e-07, | |
| "loss": 5.77e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1655, | |
| "train_speed(iter/s)": 0.177399 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 45.47945205479452, | |
| "grad_norm": 0.0007058508927002549, | |
| "learning_rate": 1.654616402386414e-07, | |
| "loss": 5.41e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1660, | |
| "train_speed(iter/s)": 0.17778 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 45.61643835616438, | |
| "grad_norm": 0.0005362857482396066, | |
| "learning_rate": 1.5398337043631723e-07, | |
| "loss": 5.57e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1665, | |
| "train_speed(iter/s)": 0.178163 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 45.75342465753425, | |
| "grad_norm": 0.0008399708895012736, | |
| "learning_rate": 1.429140543762108e-07, | |
| "loss": 5.51e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1670, | |
| "train_speed(iter/s)": 0.178535 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 45.89041095890411, | |
| "grad_norm": 0.0004938300116918981, | |
| "learning_rate": 1.3225462609787475e-07, | |
| "loss": 4.95e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1675, | |
| "train_speed(iter/s)": 0.178916 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 46.02739726027397, | |
| "grad_norm": 0.00045353075256571174, | |
| "learning_rate": 1.220059850541356e-07, | |
| "loss": 4.52e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1680, | |
| "train_speed(iter/s)": 0.179293 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 46.16438356164384, | |
| "grad_norm": 0.000485074648167938, | |
| "learning_rate": 1.1216899603519877e-07, | |
| "loss": 4.22e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1685, | |
| "train_speed(iter/s)": 0.179668 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 46.3013698630137, | |
| "grad_norm": 0.0004754703550133854, | |
| "learning_rate": 1.0274448909567412e-07, | |
| "loss": 4.68e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1690, | |
| "train_speed(iter/s)": 0.180037 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 46.43835616438356, | |
| "grad_norm": 0.0005320140044204891, | |
| "learning_rate": 9.373325948453684e-08, | |
| "loss": 4.76e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1695, | |
| "train_speed(iter/s)": 0.180414 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 46.57534246575342, | |
| "grad_norm": 0.0006507379002869129, | |
| "learning_rate": 8.513606757802232e-08, | |
| "loss": 4.92e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1700, | |
| "train_speed(iter/s)": 0.180781 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 46.71232876712329, | |
| "grad_norm": 0.00036800041561946273, | |
| "learning_rate": 7.695363881546601e-08, | |
| "loss": 4.32e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1705, | |
| "train_speed(iter/s)": 0.181149 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 46.84931506849315, | |
| "grad_norm": 0.0005077983951196074, | |
| "learning_rate": 6.918666363808976e-08, | |
| "loss": 4.51e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1710, | |
| "train_speed(iter/s)": 0.181526 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 46.986301369863014, | |
| "grad_norm": 0.00036885106237605214, | |
| "learning_rate": 6.183579743074136e-08, | |
| "loss": 3.97e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1715, | |
| "train_speed(iter/s)": 0.181896 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 47.12328767123287, | |
| "grad_norm": 0.0005106101161800325, | |
| "learning_rate": 5.49016604665933e-08, | |
| "loss": 4.26e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1720, | |
| "train_speed(iter/s)": 0.182255 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 47.26027397260274, | |
| "grad_norm": 0.0004045426903758198, | |
| "learning_rate": 4.838483785480255e-08, | |
| "loss": 4.1e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1725, | |
| "train_speed(iter/s)": 0.182623 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 47.397260273972606, | |
| "grad_norm": 0.00039062247378751636, | |
| "learning_rate": 4.2285879491139524e-08, | |
| "loss": 4.1e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1730, | |
| "train_speed(iter/s)": 0.182986 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 47.534246575342465, | |
| "grad_norm": 0.0004385727515909821, | |
| "learning_rate": 3.660530001158591e-08, | |
| "loss": 4.19e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1735, | |
| "train_speed(iter/s)": 0.183351 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 47.67123287671233, | |
| "grad_norm": 0.0004706868203356862, | |
| "learning_rate": 3.1343578748911556e-08, | |
| "loss": 3.9e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1740, | |
| "train_speed(iter/s)": 0.183717 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 47.80821917808219, | |
| "grad_norm": 0.0005658628651872277, | |
| "learning_rate": 2.6501159692225276e-08, | |
| "loss": 3.95e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1745, | |
| "train_speed(iter/s)": 0.184078 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 47.945205479452056, | |
| "grad_norm": 0.00047224326408468187, | |
| "learning_rate": 2.2078451449511957e-08, | |
| "loss": 4.29e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1750, | |
| "train_speed(iter/s)": 0.184474 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 48.082191780821915, | |
| "grad_norm": 0.0004509200807660818, | |
| "learning_rate": 1.80758272131541e-08, | |
| "loss": 4.3e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1755, | |
| "train_speed(iter/s)": 0.184853 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 48.21917808219178, | |
| "grad_norm": 0.00045020331162959337, | |
| "learning_rate": 1.4493624728440738e-08, | |
| "loss": 4.35e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1760, | |
| "train_speed(iter/s)": 0.185233 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 48.35616438356164, | |
| "grad_norm": 0.00043748278403654695, | |
| "learning_rate": 1.1332146265068076e-08, | |
| "loss": 4.28e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1765, | |
| "train_speed(iter/s)": 0.18561 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 48.49315068493151, | |
| "grad_norm": 0.0003865604812745005, | |
| "learning_rate": 8.591658591635788e-09, | |
| "loss": 3.95e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1770, | |
| "train_speed(iter/s)": 0.185983 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 48.63013698630137, | |
| "grad_norm": 0.0005739238113164902, | |
| "learning_rate": 6.272392953132284e-09, | |
| "loss": 4.09e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1775, | |
| "train_speed(iter/s)": 0.186366 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 48.76712328767123, | |
| "grad_norm": 0.0004063249798491597, | |
| "learning_rate": 4.3745450514278e-09, | |
| "loss": 3.76e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1780, | |
| "train_speed(iter/s)": 0.186745 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 48.9041095890411, | |
| "grad_norm": 0.0003818414988927543, | |
| "learning_rate": 2.8982750287553984e-09, | |
| "loss": 3.65e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1785, | |
| "train_speed(iter/s)": 0.187115 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 49.04109589041096, | |
| "grad_norm": 0.0005809293361380696, | |
| "learning_rate": 1.843707454203115e-09, | |
| "loss": 4.11e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1790, | |
| "train_speed(iter/s)": 0.187485 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 49.178082191780824, | |
| "grad_norm": 0.00041514140320941806, | |
| "learning_rate": 1.210931313197315e-09, | |
| "loss": 3.93e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1795, | |
| "train_speed(iter/s)": 0.187855 | |
| }, | |
| { | |
| "acc": 1.0, | |
| "epoch": 49.31506849315068, | |
| "grad_norm": 0.0005256779259070754, | |
| "learning_rate": 1e-09, | |
| "loss": 3.98e-06, | |
| "memory(GiB)": 14.16, | |
| "step": 1800, | |
| "train_speed(iter/s)": 0.188225 | |
| }, | |
| { | |
| "epoch": 49.31506849315068, | |
| "eval_acc": 0.3744860345334727, | |
| "eval_loss": 5.818352222442627, | |
| "eval_runtime": 1948.2317, | |
| "eval_samples_per_second": 16.437, | |
| "eval_steps_per_second": 2.055, | |
| "step": 1800 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1800, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.503384077997179e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |