| { | |
| "best_metric": 0.33170731707317075, | |
| "best_model_checkpoint": "kharato/videomae-finetuned_41\\checkpoint-55494", | |
| "epoch": 19.049691758598314, | |
| "eval_steps": 500, | |
| "global_step": 61640, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 10.622191429138184, | |
| "learning_rate": 8.111615833874108e-07, | |
| "loss": 2.4377, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 16.160688400268555, | |
| "learning_rate": 1.6223231667748216e-06, | |
| "loss": 1.981, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 20.07671546936035, | |
| "learning_rate": 2.4334847501622322e-06, | |
| "loss": 1.5439, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 14.07224178314209, | |
| "learning_rate": 3.244646333549643e-06, | |
| "loss": 1.2921, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 10.083611488342285, | |
| "learning_rate": 4.055807916937054e-06, | |
| "loss": 1.0956, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 34.907562255859375, | |
| "learning_rate": 4.8669695003244645e-06, | |
| "loss": 0.9225, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 15.239115715026855, | |
| "learning_rate": 5.678131083711875e-06, | |
| "loss": 0.9422, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 38.96265411376953, | |
| "learning_rate": 6.489292667099286e-06, | |
| "loss": 0.7609, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.6985247135162354, | |
| "learning_rate": 7.300454250486698e-06, | |
| "loss": 0.733, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.35616084933280945, | |
| "learning_rate": 8.111615833874107e-06, | |
| "loss": 0.6481, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 40.93389892578125, | |
| "learning_rate": 8.922777417261519e-06, | |
| "loss": 0.735, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.05193087458610535, | |
| "learning_rate": 9.733939000648929e-06, | |
| "loss": 0.6549, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 31.53761100769043, | |
| "learning_rate": 1.054510058403634e-05, | |
| "loss": 0.6235, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.07755979150533676, | |
| "learning_rate": 1.135626216742375e-05, | |
| "loss": 0.6016, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 7.711596965789795, | |
| "learning_rate": 1.2167423750811163e-05, | |
| "loss": 0.5175, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.3008130081300813, | |
| "eval_loss": 4.697707653045654, | |
| "eval_runtime": 416.069, | |
| "eval_samples_per_second": 5.912, | |
| "eval_steps_per_second": 2.956, | |
| "step": 3083 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 49.3885612487793, | |
| "learning_rate": 1.2978585334198573e-05, | |
| "loss": 0.6634, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 0.030238119885325432, | |
| "learning_rate": 1.3789746917585983e-05, | |
| "loss": 0.6283, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 0.3487834334373474, | |
| "learning_rate": 1.4600908500973396e-05, | |
| "loss": 0.4957, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 0.025577368214726448, | |
| "learning_rate": 1.5412070084360804e-05, | |
| "loss": 0.4649, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 0.12513820827007294, | |
| "learning_rate": 1.6223231667748214e-05, | |
| "loss": 0.4836, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 0.9465208053588867, | |
| "learning_rate": 1.7034393251135628e-05, | |
| "loss": 0.5041, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 72.26908874511719, | |
| "learning_rate": 1.7845554834523038e-05, | |
| "loss": 0.4779, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 0.20234385132789612, | |
| "learning_rate": 1.865671641791045e-05, | |
| "loss": 0.563, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 0.036579377949237823, | |
| "learning_rate": 1.9467878001297858e-05, | |
| "loss": 0.4415, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 1.33750319480896, | |
| "learning_rate": 2.0279039584685268e-05, | |
| "loss": 0.4937, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 0.014029554091393948, | |
| "learning_rate": 2.109020116807268e-05, | |
| "loss": 0.5405, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 52.25617218017578, | |
| "learning_rate": 2.190136275146009e-05, | |
| "loss": 0.5149, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 94.20246124267578, | |
| "learning_rate": 2.27125243348475e-05, | |
| "loss": 0.5254, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 1.6156303882598877, | |
| "learning_rate": 2.3523685918234915e-05, | |
| "loss": 0.3886, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 0.011294134892523289, | |
| "learning_rate": 2.4334847501622325e-05, | |
| "loss": 0.5311, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_accuracy": 0.308130081300813, | |
| "eval_loss": 5.601172924041748, | |
| "eval_runtime": 435.0935, | |
| "eval_samples_per_second": 5.654, | |
| "eval_steps_per_second": 2.827, | |
| "step": 6166 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 29.597869873046875, | |
| "learning_rate": 2.5146009085009735e-05, | |
| "loss": 0.5912, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.028715774416923523, | |
| "learning_rate": 2.5957170668397145e-05, | |
| "loss": 0.3763, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.07937850058078766, | |
| "learning_rate": 2.6768332251784555e-05, | |
| "loss": 0.4722, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.005830916576087475, | |
| "learning_rate": 2.7579493835171965e-05, | |
| "loss": 0.5422, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.1301320195198059, | |
| "learning_rate": 2.8390655418559382e-05, | |
| "loss": 0.624, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 0.011467114090919495, | |
| "learning_rate": 2.9201817001946792e-05, | |
| "loss": 0.3777, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 0.0031489874236285686, | |
| "learning_rate": 3.0012978585334202e-05, | |
| "loss": 0.5277, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 16.498056411743164, | |
| "learning_rate": 3.082414016872161e-05, | |
| "loss": 0.6094, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 4.466196060180664, | |
| "learning_rate": 3.163530175210902e-05, | |
| "loss": 0.5631, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 0.01907368004322052, | |
| "learning_rate": 3.244646333549643e-05, | |
| "loss": 0.4888, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 0.035668205469846725, | |
| "learning_rate": 3.325762491888384e-05, | |
| "loss": 0.6714, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.12581172585487366, | |
| "learning_rate": 3.4068786502271256e-05, | |
| "loss": 0.542, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 97.48197174072266, | |
| "learning_rate": 3.487994808565866e-05, | |
| "loss": 0.4788, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 3.1435787677764893, | |
| "learning_rate": 3.5691109669046076e-05, | |
| "loss": 0.5118, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 0.08236628770828247, | |
| "learning_rate": 3.650227125243348e-05, | |
| "loss": 0.5886, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 0.015331330709159374, | |
| "learning_rate": 3.73134328358209e-05, | |
| "loss": 0.5884, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "eval_accuracy": 0.317479674796748, | |
| "eval_loss": 6.225230693817139, | |
| "eval_runtime": 413.95, | |
| "eval_samples_per_second": 5.943, | |
| "eval_steps_per_second": 2.971, | |
| "step": 9249 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.9976195096969604, | |
| "learning_rate": 3.812459441920831e-05, | |
| "loss": 0.4934, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "grad_norm": 122.0944595336914, | |
| "learning_rate": 3.8935756002595716e-05, | |
| "loss": 0.6045, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "grad_norm": 0.06623541563749313, | |
| "learning_rate": 3.974691758598313e-05, | |
| "loss": 0.4791, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "grad_norm": 66.04131317138672, | |
| "learning_rate": 4.0558079169370536e-05, | |
| "loss": 0.6775, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "grad_norm": 55.43952178955078, | |
| "learning_rate": 4.1369240752757956e-05, | |
| "loss": 0.5957, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "grad_norm": 0.3894006311893463, | |
| "learning_rate": 4.218040233614536e-05, | |
| "loss": 0.4787, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "grad_norm": 0.6404015421867371, | |
| "learning_rate": 4.2991563919532776e-05, | |
| "loss": 0.5884, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "grad_norm": 0.4849078357219696, | |
| "learning_rate": 4.380272550292018e-05, | |
| "loss": 0.5479, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "grad_norm": 47.85725402832031, | |
| "learning_rate": 4.461388708630759e-05, | |
| "loss": 0.6813, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "grad_norm": 34.00189971923828, | |
| "learning_rate": 4.5425048669695e-05, | |
| "loss": 0.6548, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "grad_norm": 0.06875142455101013, | |
| "learning_rate": 4.6236210253082417e-05, | |
| "loss": 0.549, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 30.920330047607422, | |
| "learning_rate": 4.704737183646983e-05, | |
| "loss": 0.4287, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 0.029797730967402458, | |
| "learning_rate": 4.785853341985724e-05, | |
| "loss": 0.3832, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 143.6451873779297, | |
| "learning_rate": 4.866969500324465e-05, | |
| "loss": 0.6604, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "grad_norm": 16.26555824279785, | |
| "learning_rate": 4.948085658663206e-05, | |
| "loss": 0.5206, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "eval_accuracy": 0.32479674796747965, | |
| "eval_loss": 6.791728496551514, | |
| "eval_runtime": 418.2496, | |
| "eval_samples_per_second": 5.882, | |
| "eval_steps_per_second": 2.941, | |
| "step": 12332 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 95.94632720947266, | |
| "learning_rate": 4.992699545749514e-05, | |
| "loss": 0.5541, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.03315176069736481, | |
| "learning_rate": 4.9724205061648285e-05, | |
| "loss": 0.6088, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "grad_norm": 0.04930103197693825, | |
| "learning_rate": 4.952141466580143e-05, | |
| "loss": 0.5268, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "grad_norm": 0.12131338566541672, | |
| "learning_rate": 4.9318624269954575e-05, | |
| "loss": 0.4941, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "grad_norm": 18.080978393554688, | |
| "learning_rate": 4.9115833874107724e-05, | |
| "loss": 0.663, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "grad_norm": 26.085647583007812, | |
| "learning_rate": 4.891304347826087e-05, | |
| "loss": 0.6625, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "grad_norm": 2.3316972255706787, | |
| "learning_rate": 4.871025308241402e-05, | |
| "loss": 0.6131, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "grad_norm": 8.68952751159668, | |
| "learning_rate": 4.850746268656717e-05, | |
| "loss": 0.6193, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "grad_norm": 0.022995395585894585, | |
| "learning_rate": 4.830467229072032e-05, | |
| "loss": 0.6274, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "grad_norm": 0.037255994975566864, | |
| "learning_rate": 4.8101881894873465e-05, | |
| "loss": 0.5041, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "grad_norm": 0.034200407564640045, | |
| "learning_rate": 4.7899091499026614e-05, | |
| "loss": 0.6563, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 0.019007038325071335, | |
| "learning_rate": 4.7696301103179755e-05, | |
| "loss": 0.6542, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 0.019861804321408272, | |
| "learning_rate": 4.7493510707332904e-05, | |
| "loss": 0.4505, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 7.428478240966797, | |
| "learning_rate": 4.7290720311486045e-05, | |
| "loss": 0.5076, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "grad_norm": 0.04943707585334778, | |
| "learning_rate": 4.7087929915639194e-05, | |
| "loss": 0.5715, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "grad_norm": 0.3862577974796295, | |
| "learning_rate": 4.688513951979234e-05, | |
| "loss": 0.4449, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "eval_accuracy": 0.3008130081300813, | |
| "eval_loss": 6.194313049316406, | |
| "eval_runtime": 417.8428, | |
| "eval_samples_per_second": 5.887, | |
| "eval_steps_per_second": 2.944, | |
| "step": 15415 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.017423830926418304, | |
| "learning_rate": 4.668234912394549e-05, | |
| "loss": 0.5838, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "grad_norm": 0.0035349351819604635, | |
| "learning_rate": 4.647955872809864e-05, | |
| "loss": 0.3539, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "grad_norm": 0.009847632609307766, | |
| "learning_rate": 4.627676833225179e-05, | |
| "loss": 0.5626, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "grad_norm": 4.297732353210449, | |
| "learning_rate": 4.6073977936404935e-05, | |
| "loss": 0.4804, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "grad_norm": 1.121057152748108, | |
| "learning_rate": 4.5871187540558084e-05, | |
| "loss": 0.4478, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "grad_norm": 4.427486896514893, | |
| "learning_rate": 4.5668397144711225e-05, | |
| "loss": 0.5708, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "grad_norm": 9.073264122009277, | |
| "learning_rate": 4.5465606748864373e-05, | |
| "loss": 0.5403, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "grad_norm": 0.024741439148783684, | |
| "learning_rate": 4.526281635301752e-05, | |
| "loss": 0.4263, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "grad_norm": 0.03147607669234276, | |
| "learning_rate": 4.506002595717067e-05, | |
| "loss": 0.3275, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "grad_norm": 0.013691963627934456, | |
| "learning_rate": 4.485723556132382e-05, | |
| "loss": 0.5028, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 0.08244924992322922, | |
| "learning_rate": 4.465444516547697e-05, | |
| "loss": 0.5051, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 0.012058720923960209, | |
| "learning_rate": 4.4451654769630115e-05, | |
| "loss": 0.4097, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 0.0613565556704998, | |
| "learning_rate": 4.4248864373783263e-05, | |
| "loss": 0.4333, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "grad_norm": 39.56247329711914, | |
| "learning_rate": 4.404607397793641e-05, | |
| "loss": 0.598, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "grad_norm": 15.885381698608398, | |
| "learning_rate": 4.384328358208955e-05, | |
| "loss": 0.3783, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "eval_accuracy": 0.3150406504065041, | |
| "eval_loss": 6.833878993988037, | |
| "eval_runtime": 418.6571, | |
| "eval_samples_per_second": 5.876, | |
| "eval_steps_per_second": 2.938, | |
| "step": 18498 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.02771819196641445, | |
| "learning_rate": 4.36404931862427e-05, | |
| "loss": 0.4326, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.018607838079333305, | |
| "learning_rate": 4.343770279039585e-05, | |
| "loss": 0.4859, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "grad_norm": 0.5698729157447815, | |
| "learning_rate": 4.3234912394549e-05, | |
| "loss": 0.4803, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "grad_norm": 0.041256897151470184, | |
| "learning_rate": 4.303212199870215e-05, | |
| "loss": 0.4549, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "grad_norm": 27.683008193969727, | |
| "learning_rate": 4.282933160285529e-05, | |
| "loss": 0.5179, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "grad_norm": 0.08311375975608826, | |
| "learning_rate": 4.2626541207008437e-05, | |
| "loss": 0.3603, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "grad_norm": 0.19924193620681763, | |
| "learning_rate": 4.2423750811161585e-05, | |
| "loss": 0.4939, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "grad_norm": 0.035903461277484894, | |
| "learning_rate": 4.222096041531473e-05, | |
| "loss": 0.4792, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "grad_norm": 0.004007269628345966, | |
| "learning_rate": 4.201817001946788e-05, | |
| "loss": 0.453, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "grad_norm": 0.02423214167356491, | |
| "learning_rate": 4.181537962362102e-05, | |
| "loss": 0.3846, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "grad_norm": 0.023962557315826416, | |
| "learning_rate": 4.161258922777417e-05, | |
| "loss": 0.4691, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "grad_norm": 0.08679631352424622, | |
| "learning_rate": 4.140979883192732e-05, | |
| "loss": 0.484, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "grad_norm": 55.87190246582031, | |
| "learning_rate": 4.120700843608047e-05, | |
| "loss": 0.3933, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "grad_norm": 0.0048807836137712, | |
| "learning_rate": 4.1004218040233617e-05, | |
| "loss": 0.3291, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "grad_norm": 0.015589645132422447, | |
| "learning_rate": 4.0801427644386765e-05, | |
| "loss": 0.5032, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "eval_accuracy": 0.30772357723577237, | |
| "eval_loss": 6.656611919403076, | |
| "eval_runtime": 419.2486, | |
| "eval_samples_per_second": 5.868, | |
| "eval_steps_per_second": 2.934, | |
| "step": 21581 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.02622975967824459, | |
| "learning_rate": 4.059863724853991e-05, | |
| "loss": 0.4267, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.039106499403715134, | |
| "learning_rate": 4.039584685269306e-05, | |
| "loss": 0.3826, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "grad_norm": 0.012655826285481453, | |
| "learning_rate": 4.019305645684621e-05, | |
| "loss": 0.3964, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "grad_norm": 0.0034593914169818163, | |
| "learning_rate": 3.999026606099935e-05, | |
| "loss": 0.4858, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "grad_norm": 0.3785853087902069, | |
| "learning_rate": 3.97874756651525e-05, | |
| "loss": 0.4492, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "grad_norm": 0.09961965680122375, | |
| "learning_rate": 3.958468526930565e-05, | |
| "loss": 0.3808, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "grad_norm": 31.060867309570312, | |
| "learning_rate": 3.9381894873458796e-05, | |
| "loss": 0.3712, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "grad_norm": 0.023167919367551804, | |
| "learning_rate": 3.9179104477611945e-05, | |
| "loss": 0.4784, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "grad_norm": 0.030744561925530434, | |
| "learning_rate": 3.897631408176509e-05, | |
| "loss": 0.3833, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "grad_norm": 49.66160202026367, | |
| "learning_rate": 3.877352368591824e-05, | |
| "loss": 0.3776, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "grad_norm": 0.0036757669877260923, | |
| "learning_rate": 3.857073329007138e-05, | |
| "loss": 0.4413, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "grad_norm": 0.24145947396755219, | |
| "learning_rate": 3.836794289422453e-05, | |
| "loss": 0.3737, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "grad_norm": 0.36743244528770447, | |
| "learning_rate": 3.816515249837768e-05, | |
| "loss": 0.3371, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "grad_norm": 0.5987332463264465, | |
| "learning_rate": 3.796236210253082e-05, | |
| "loss": 0.4583, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "grad_norm": 0.009368489496409893, | |
| "learning_rate": 3.775957170668397e-05, | |
| "loss": 0.2901, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "grad_norm": 0.033730778843164444, | |
| "learning_rate": 3.755678131083712e-05, | |
| "loss": 0.4091, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "eval_accuracy": 0.29715447154471547, | |
| "eval_loss": 6.801322937011719, | |
| "eval_runtime": 420.5633, | |
| "eval_samples_per_second": 5.849, | |
| "eval_steps_per_second": 2.925, | |
| "step": 24664 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.5106008648872375, | |
| "learning_rate": 3.7353990914990266e-05, | |
| "loss": 0.4906, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "grad_norm": 0.027176540344953537, | |
| "learning_rate": 3.7151200519143415e-05, | |
| "loss": 0.3089, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "grad_norm": 0.005631732754409313, | |
| "learning_rate": 3.694841012329656e-05, | |
| "loss": 0.3547, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "grad_norm": 0.004955723416060209, | |
| "learning_rate": 3.674561972744971e-05, | |
| "loss": 0.3477, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "grad_norm": 0.23866023123264313, | |
| "learning_rate": 3.654282933160286e-05, | |
| "loss": 0.3229, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "grad_norm": 0.025327768176794052, | |
| "learning_rate": 3.6340038935756e-05, | |
| "loss": 0.3664, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "grad_norm": 0.004922116175293922, | |
| "learning_rate": 3.613724853990915e-05, | |
| "loss": 0.3222, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "grad_norm": 0.49947389960289, | |
| "learning_rate": 3.59344581440623e-05, | |
| "loss": 0.3044, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "grad_norm": 2.2153525352478027, | |
| "learning_rate": 3.5731667748215446e-05, | |
| "loss": 0.3326, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "grad_norm": 62.326072692871094, | |
| "learning_rate": 3.5528877352368594e-05, | |
| "loss": 0.4252, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "grad_norm": 0.028839513659477234, | |
| "learning_rate": 3.532608695652174e-05, | |
| "loss": 0.3653, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "grad_norm": 110.81560516357422, | |
| "learning_rate": 3.512329656067489e-05, | |
| "loss": 0.3337, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "grad_norm": 0.008337341248989105, | |
| "learning_rate": 3.492050616482804e-05, | |
| "loss": 0.2245, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "grad_norm": 0.019596580415964127, | |
| "learning_rate": 3.471771576898119e-05, | |
| "loss": 0.2529, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "grad_norm": 0.005616022273898125, | |
| "learning_rate": 3.451492537313433e-05, | |
| "loss": 0.4436, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "eval_accuracy": 0.3, | |
| "eval_loss": 6.854862213134766, | |
| "eval_runtime": 419.8758, | |
| "eval_samples_per_second": 5.859, | |
| "eval_steps_per_second": 2.929, | |
| "step": 27747 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.022624719887971878, | |
| "learning_rate": 3.431213497728748e-05, | |
| "loss": 0.3726, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.001994416816160083, | |
| "learning_rate": 3.410934458144062e-05, | |
| "loss": 0.1703, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "grad_norm": 48.61260986328125, | |
| "learning_rate": 3.390655418559377e-05, | |
| "loss": 0.2406, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "grad_norm": 0.03608255833387375, | |
| "learning_rate": 3.3703763789746916e-05, | |
| "loss": 0.3916, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "grad_norm": 0.005167305935174227, | |
| "learning_rate": 3.3500973393900064e-05, | |
| "loss": 0.3041, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "grad_norm": 0.09229105710983276, | |
| "learning_rate": 3.329818299805321e-05, | |
| "loss": 0.2965, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "grad_norm": 0.0017744365613907576, | |
| "learning_rate": 3.309539260220636e-05, | |
| "loss": 0.1857, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "grad_norm": 0.0021416887175291777, | |
| "learning_rate": 3.289260220635951e-05, | |
| "loss": 0.3291, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "grad_norm": 0.02957375906407833, | |
| "learning_rate": 3.268981181051266e-05, | |
| "loss": 0.3716, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "grad_norm": 0.04252824932336807, | |
| "learning_rate": 3.24870214146658e-05, | |
| "loss": 0.4442, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "grad_norm": 0.00921566691249609, | |
| "learning_rate": 3.228423101881895e-05, | |
| "loss": 0.3244, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "grad_norm": 0.02388688549399376, | |
| "learning_rate": 3.2081440622972096e-05, | |
| "loss": 0.4729, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "grad_norm": 0.02142206020653248, | |
| "learning_rate": 3.1878650227125244e-05, | |
| "loss": 0.3775, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "grad_norm": 110.55142211914062, | |
| "learning_rate": 3.167585983127839e-05, | |
| "loss": 0.1999, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "grad_norm": 0.7394188642501831, | |
| "learning_rate": 3.147306943543154e-05, | |
| "loss": 0.3141, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "grad_norm": 0.06363216042518616, | |
| "learning_rate": 3.127027903958469e-05, | |
| "loss": 0.3474, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "eval_accuracy": 0.32682926829268294, | |
| "eval_loss": 7.001511573791504, | |
| "eval_runtime": 421.6251, | |
| "eval_samples_per_second": 5.835, | |
| "eval_steps_per_second": 2.917, | |
| "step": 30830 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.17996995151042938, | |
| "learning_rate": 3.106748864373784e-05, | |
| "loss": 0.2746, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 10.01, | |
| "grad_norm": 0.007576479576528072, | |
| "learning_rate": 3.0864698247890986e-05, | |
| "loss": 0.2044, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 10.01, | |
| "grad_norm": 0.013998846523463726, | |
| "learning_rate": 3.066190785204413e-05, | |
| "loss": 0.2771, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 10.01, | |
| "grad_norm": 0.0031363102607429028, | |
| "learning_rate": 3.045911745619728e-05, | |
| "loss": 0.3009, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 10.02, | |
| "grad_norm": 0.006260915659368038, | |
| "learning_rate": 3.0256327060350424e-05, | |
| "loss": 0.3288, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 10.02, | |
| "grad_norm": 0.08372914791107178, | |
| "learning_rate": 3.0053536664503572e-05, | |
| "loss": 0.3404, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 10.02, | |
| "grad_norm": 91.87712860107422, | |
| "learning_rate": 2.9850746268656714e-05, | |
| "loss": 0.429, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 10.03, | |
| "grad_norm": 0.0794682428240776, | |
| "learning_rate": 2.9647955872809862e-05, | |
| "loss": 0.3603, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 10.03, | |
| "grad_norm": 0.054609477519989014, | |
| "learning_rate": 2.944516547696301e-05, | |
| "loss": 0.2766, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 10.03, | |
| "grad_norm": 0.04762391373515129, | |
| "learning_rate": 2.924237508111616e-05, | |
| "loss": 0.229, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "grad_norm": 0.004877444822341204, | |
| "learning_rate": 2.9039584685269304e-05, | |
| "loss": 0.3206, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "grad_norm": 0.00354503421112895, | |
| "learning_rate": 2.8836794289422452e-05, | |
| "loss": 0.2281, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "grad_norm": 0.0026297084987163544, | |
| "learning_rate": 2.86340038935756e-05, | |
| "loss": 0.2265, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "grad_norm": 59.78186798095703, | |
| "learning_rate": 2.843121349772875e-05, | |
| "loss": 0.3863, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 10.05, | |
| "grad_norm": 0.036124564707279205, | |
| "learning_rate": 2.8228423101881897e-05, | |
| "loss": 0.2151, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 10.05, | |
| "eval_accuracy": 0.3040650406504065, | |
| "eval_loss": 7.767071723937988, | |
| "eval_runtime": 419.266, | |
| "eval_samples_per_second": 5.867, | |
| "eval_steps_per_second": 2.934, | |
| "step": 33913 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.011912085115909576, | |
| "learning_rate": 2.8025632706035042e-05, | |
| "loss": 0.1451, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.016937492415308952, | |
| "learning_rate": 2.782284231018819e-05, | |
| "loss": 0.3187, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 11.01, | |
| "grad_norm": 1.2451800107955933, | |
| "learning_rate": 2.762005191434134e-05, | |
| "loss": 0.2229, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 11.01, | |
| "grad_norm": 0.004575447645038366, | |
| "learning_rate": 2.7417261518494487e-05, | |
| "loss": 0.2205, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 11.01, | |
| "grad_norm": 0.004918406717479229, | |
| "learning_rate": 2.7214471122647632e-05, | |
| "loss": 0.1633, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 11.02, | |
| "grad_norm": 0.14480261504650116, | |
| "learning_rate": 2.701168072680078e-05, | |
| "loss": 0.2251, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 11.02, | |
| "grad_norm": 0.004981683101505041, | |
| "learning_rate": 2.680889033095393e-05, | |
| "loss": 0.1801, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 11.02, | |
| "grad_norm": 0.008614394813776016, | |
| "learning_rate": 2.6606099935107077e-05, | |
| "loss": 0.3143, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "grad_norm": 0.40735259652137756, | |
| "learning_rate": 2.6403309539260222e-05, | |
| "loss": 0.3673, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "grad_norm": 0.014218580909073353, | |
| "learning_rate": 2.620051914341337e-05, | |
| "loss": 0.2534, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "grad_norm": 0.021330924704670906, | |
| "learning_rate": 2.599772874756652e-05, | |
| "loss": 0.263, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "grad_norm": 0.013331553898751736, | |
| "learning_rate": 2.5794938351719667e-05, | |
| "loss": 0.3093, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "grad_norm": 0.43031027913093567, | |
| "learning_rate": 2.559214795587281e-05, | |
| "loss": 0.3005, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "grad_norm": 47.17981719970703, | |
| "learning_rate": 2.5389357560025957e-05, | |
| "loss": 0.1316, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 11.05, | |
| "grad_norm": 96.86531829833984, | |
| "learning_rate": 2.5186567164179102e-05, | |
| "loss": 0.3597, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 11.05, | |
| "eval_accuracy": 0.32926829268292684, | |
| "eval_loss": 7.072375297546387, | |
| "eval_runtime": 419.8993, | |
| "eval_samples_per_second": 5.859, | |
| "eval_steps_per_second": 2.929, | |
| "step": 36996 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.014351383782923222, | |
| "learning_rate": 2.4983776768332254e-05, | |
| "loss": 0.1933, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.07165095955133438, | |
| "learning_rate": 2.4780986372485402e-05, | |
| "loss": 0.1762, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 12.01, | |
| "grad_norm": 0.004130370914936066, | |
| "learning_rate": 2.457819597663855e-05, | |
| "loss": 0.2113, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 12.01, | |
| "grad_norm": 0.0024869125336408615, | |
| "learning_rate": 2.4375405580791695e-05, | |
| "loss": 0.274, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 12.01, | |
| "grad_norm": 0.00669575622305274, | |
| "learning_rate": 2.417261518494484e-05, | |
| "loss": 0.1877, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 12.02, | |
| "grad_norm": 0.2720281183719635, | |
| "learning_rate": 2.396982478909799e-05, | |
| "loss": 0.2101, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 12.02, | |
| "grad_norm": 0.00792625080794096, | |
| "learning_rate": 2.3767034393251137e-05, | |
| "loss": 0.2125, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 12.02, | |
| "grad_norm": 0.0033484152518212795, | |
| "learning_rate": 2.3564243997404285e-05, | |
| "loss": 0.115, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 12.03, | |
| "grad_norm": 0.0022839007433503866, | |
| "learning_rate": 2.336145360155743e-05, | |
| "loss": 0.1773, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 12.03, | |
| "grad_norm": 0.0013085936661809683, | |
| "learning_rate": 2.315866320571058e-05, | |
| "loss": 0.1654, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 12.03, | |
| "grad_norm": 0.023824598640203476, | |
| "learning_rate": 2.2955872809863727e-05, | |
| "loss": 0.2326, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 12.04, | |
| "grad_norm": 0.002132503315806389, | |
| "learning_rate": 2.2753082414016875e-05, | |
| "loss": 0.2223, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 12.04, | |
| "grad_norm": 0.0020501285325735807, | |
| "learning_rate": 2.255029201817002e-05, | |
| "loss": 0.2558, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 12.04, | |
| "grad_norm": 0.03084419295191765, | |
| "learning_rate": 2.2347501622323165e-05, | |
| "loss": 0.2664, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 12.05, | |
| "grad_norm": 0.026181140914559364, | |
| "learning_rate": 2.2144711226476314e-05, | |
| "loss": 0.3349, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 12.05, | |
| "grad_norm": 62.58173751831055, | |
| "learning_rate": 2.1941920830629462e-05, | |
| "loss": 0.1673, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 12.05, | |
| "eval_accuracy": 0.32479674796747965, | |
| "eval_loss": 7.580522537231445, | |
| "eval_runtime": 419.9065, | |
| "eval_samples_per_second": 5.858, | |
| "eval_steps_per_second": 2.929, | |
| "step": 40079 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 0.0021006192546337843, | |
| "learning_rate": 2.173913043478261e-05, | |
| "loss": 0.1841, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 13.01, | |
| "grad_norm": 0.0030403181444853544, | |
| "learning_rate": 2.1536340038935755e-05, | |
| "loss": 0.2535, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 13.01, | |
| "grad_norm": 0.008125518448650837, | |
| "learning_rate": 2.1333549643088903e-05, | |
| "loss": 0.2208, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 13.01, | |
| "grad_norm": 0.007459101267158985, | |
| "learning_rate": 2.1130759247242052e-05, | |
| "loss": 0.2123, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 13.01, | |
| "grad_norm": 0.0015231677098199725, | |
| "learning_rate": 2.09279688513952e-05, | |
| "loss": 0.2145, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 13.02, | |
| "grad_norm": 0.017687492072582245, | |
| "learning_rate": 2.072517845554835e-05, | |
| "loss": 0.1135, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 13.02, | |
| "grad_norm": 0.16305746138095856, | |
| "learning_rate": 2.0522388059701493e-05, | |
| "loss": 0.1654, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 13.02, | |
| "grad_norm": 0.0016574672190472484, | |
| "learning_rate": 2.0319597663854642e-05, | |
| "loss": 0.2207, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 13.03, | |
| "grad_norm": 0.007612856104969978, | |
| "learning_rate": 2.0116807268007787e-05, | |
| "loss": 0.1663, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 13.03, | |
| "grad_norm": 0.0031971693970263004, | |
| "learning_rate": 1.9914016872160935e-05, | |
| "loss": 0.1238, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 13.03, | |
| "grad_norm": 1.114168643951416, | |
| "learning_rate": 1.9711226476314083e-05, | |
| "loss": 0.1508, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "grad_norm": 0.00322701339609921, | |
| "learning_rate": 1.950843608046723e-05, | |
| "loss": 0.0757, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "grad_norm": 0.00376499374397099, | |
| "learning_rate": 1.9305645684620377e-05, | |
| "loss": 0.1904, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "grad_norm": 0.006022193934768438, | |
| "learning_rate": 1.9102855288773525e-05, | |
| "loss": 0.2528, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 13.05, | |
| "grad_norm": 0.003072307910770178, | |
| "learning_rate": 1.8900064892926673e-05, | |
| "loss": 0.114, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 13.05, | |
| "eval_accuracy": 0.317479674796748, | |
| "eval_loss": 7.819610595703125, | |
| "eval_runtime": 419.7353, | |
| "eval_samples_per_second": 5.861, | |
| "eval_steps_per_second": 2.93, | |
| "step": 43162 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 85.4753646850586, | |
| "learning_rate": 1.8697274497079818e-05, | |
| "loss": 0.155, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.034654486924409866, | |
| "learning_rate": 1.8494484101232967e-05, | |
| "loss": 0.1435, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 14.01, | |
| "grad_norm": 0.004176551941782236, | |
| "learning_rate": 1.8291693705386115e-05, | |
| "loss": 0.1181, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 14.01, | |
| "grad_norm": 0.0013031965354457498, | |
| "learning_rate": 1.8088903309539263e-05, | |
| "loss": 0.2033, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 14.01, | |
| "grad_norm": 0.0004607917508110404, | |
| "learning_rate": 1.7886112913692408e-05, | |
| "loss": 0.2331, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "grad_norm": 2.1360397338867188, | |
| "learning_rate": 1.7683322517845553e-05, | |
| "loss": 0.1684, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "grad_norm": 0.0016966286348178983, | |
| "learning_rate": 1.74805321219987e-05, | |
| "loss": 0.1155, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "grad_norm": 0.005142655223608017, | |
| "learning_rate": 1.727774172615185e-05, | |
| "loss": 0.1469, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 14.03, | |
| "grad_norm": 1.043182134628296, | |
| "learning_rate": 1.7074951330304998e-05, | |
| "loss": 0.0513, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 14.03, | |
| "grad_norm": 0.003510431619361043, | |
| "learning_rate": 1.6872160934458147e-05, | |
| "loss": 0.2092, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 14.03, | |
| "grad_norm": 0.004158710595220327, | |
| "learning_rate": 1.666937053861129e-05, | |
| "loss": 0.166, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 14.04, | |
| "grad_norm": 0.006816697306931019, | |
| "learning_rate": 1.646658014276444e-05, | |
| "loss": 0.1756, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 14.04, | |
| "grad_norm": 0.0019508616533130407, | |
| "learning_rate": 1.6263789746917588e-05, | |
| "loss": 0.0895, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 14.04, | |
| "grad_norm": 0.011971150524914265, | |
| "learning_rate": 1.6060999351070736e-05, | |
| "loss": 0.2169, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 14.05, | |
| "grad_norm": 0.0015203008661046624, | |
| "learning_rate": 1.585820895522388e-05, | |
| "loss": 0.1652, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 14.05, | |
| "grad_norm": 0.011360148899257183, | |
| "learning_rate": 1.5655418559377026e-05, | |
| "loss": 0.2088, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 14.05, | |
| "eval_accuracy": 0.32723577235772355, | |
| "eval_loss": 7.71033239364624, | |
| "eval_runtime": 424.2708, | |
| "eval_samples_per_second": 5.798, | |
| "eval_steps_per_second": 2.899, | |
| "step": 46245 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.0022605557460337877, | |
| "learning_rate": 1.5452628163530175e-05, | |
| "loss": 0.0726, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 15.01, | |
| "grad_norm": 0.024754885584115982, | |
| "learning_rate": 1.5249837767683323e-05, | |
| "loss": 0.1156, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 15.01, | |
| "grad_norm": 0.10212986171245575, | |
| "learning_rate": 1.504704737183647e-05, | |
| "loss": 0.0741, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 15.01, | |
| "grad_norm": 0.18410304188728333, | |
| "learning_rate": 1.4844256975989618e-05, | |
| "loss": 0.1824, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 15.02, | |
| "grad_norm": 0.00476705189794302, | |
| "learning_rate": 1.4641466580142765e-05, | |
| "loss": 0.1592, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 15.02, | |
| "grad_norm": 0.361130028963089, | |
| "learning_rate": 1.4438676184295913e-05, | |
| "loss": 0.053, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 15.02, | |
| "grad_norm": 0.0020235551055520773, | |
| "learning_rate": 1.423588578844906e-05, | |
| "loss": 0.1294, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 15.03, | |
| "grad_norm": 0.0014645768096670508, | |
| "learning_rate": 1.4033095392602208e-05, | |
| "loss": 0.119, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 15.03, | |
| "grad_norm": 0.3808715045452118, | |
| "learning_rate": 1.3830304996755356e-05, | |
| "loss": 0.1903, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 15.03, | |
| "grad_norm": 0.0016527449479326606, | |
| "learning_rate": 1.36275146009085e-05, | |
| "loss": 0.1918, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 15.03, | |
| "grad_norm": 50.08372497558594, | |
| "learning_rate": 1.3424724205061648e-05, | |
| "loss": 0.157, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 15.04, | |
| "grad_norm": 259.1443786621094, | |
| "learning_rate": 1.3221933809214796e-05, | |
| "loss": 0.1894, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 15.04, | |
| "grad_norm": 0.002545048715546727, | |
| "learning_rate": 1.3019143413367943e-05, | |
| "loss": 0.1257, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 15.04, | |
| "grad_norm": 0.2304425984621048, | |
| "learning_rate": 1.2816353017521091e-05, | |
| "loss": 0.1608, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 15.05, | |
| "grad_norm": 28.786087036132812, | |
| "learning_rate": 1.2613562621674238e-05, | |
| "loss": 0.1662, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 15.05, | |
| "eval_accuracy": 0.32479674796747965, | |
| "eval_loss": 7.761257648468018, | |
| "eval_runtime": 419.2032, | |
| "eval_samples_per_second": 5.868, | |
| "eval_steps_per_second": 2.934, | |
| "step": 49328 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.0010336956474930048, | |
| "learning_rate": 1.2410772225827386e-05, | |
| "loss": 0.1146, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.005545318126678467, | |
| "learning_rate": 1.2207981829980533e-05, | |
| "loss": 0.1001, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 16.01, | |
| "grad_norm": 0.0031858233269304037, | |
| "learning_rate": 1.200519143413368e-05, | |
| "loss": 0.1257, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 16.01, | |
| "grad_norm": 0.004460224881768227, | |
| "learning_rate": 1.1802401038286826e-05, | |
| "loss": 0.2025, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 16.01, | |
| "grad_norm": 93.79480743408203, | |
| "learning_rate": 1.1599610642439974e-05, | |
| "loss": 0.0833, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 16.02, | |
| "grad_norm": 0.003030031453818083, | |
| "learning_rate": 1.1396820246593123e-05, | |
| "loss": 0.1456, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 16.02, | |
| "grad_norm": 0.0017526369774714112, | |
| "learning_rate": 1.119402985074627e-05, | |
| "loss": 0.1136, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 16.02, | |
| "grad_norm": 0.002222576644271612, | |
| "learning_rate": 1.0991239454899418e-05, | |
| "loss": 0.1357, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 16.03, | |
| "grad_norm": 0.005358373746275902, | |
| "learning_rate": 1.0788449059052563e-05, | |
| "loss": 0.144, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 16.03, | |
| "grad_norm": 0.037487562745809555, | |
| "learning_rate": 1.0585658663205711e-05, | |
| "loss": 0.2054, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 16.03, | |
| "grad_norm": 0.052653077989816666, | |
| "learning_rate": 1.0382868267358858e-05, | |
| "loss": 0.2247, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 16.04, | |
| "grad_norm": 0.001781440805643797, | |
| "learning_rate": 1.0180077871512006e-05, | |
| "loss": 0.1049, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 16.04, | |
| "grad_norm": 0.001019317307509482, | |
| "learning_rate": 9.977287475665154e-06, | |
| "loss": 0.0469, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 16.04, | |
| "grad_norm": 0.6995309591293335, | |
| "learning_rate": 9.7744970798183e-06, | |
| "loss": 0.1745, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 16.05, | |
| "grad_norm": 0.19804002344608307, | |
| "learning_rate": 9.571706683971448e-06, | |
| "loss": 0.1293, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 16.05, | |
| "grad_norm": 0.005632157437503338, | |
| "learning_rate": 9.368916288124594e-06, | |
| "loss": 0.1961, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 16.05, | |
| "eval_accuracy": 0.32967479674796746, | |
| "eval_loss": 7.7729597091674805, | |
| "eval_runtime": 420.2271, | |
| "eval_samples_per_second": 5.854, | |
| "eval_steps_per_second": 2.927, | |
| "step": 52411 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.04664480686187744, | |
| "learning_rate": 9.166125892277743e-06, | |
| "loss": 0.1644, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 17.01, | |
| "grad_norm": 0.0033755158074200153, | |
| "learning_rate": 8.96333549643089e-06, | |
| "loss": 0.112, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 17.01, | |
| "grad_norm": 0.0010055933380499482, | |
| "learning_rate": 8.760545100584036e-06, | |
| "loss": 0.0775, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 17.01, | |
| "grad_norm": 0.002419169992208481, | |
| "learning_rate": 8.557754704737184e-06, | |
| "loss": 0.0887, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 17.02, | |
| "grad_norm": 114.45608520507812, | |
| "learning_rate": 8.354964308890331e-06, | |
| "loss": 0.1805, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 17.02, | |
| "grad_norm": 0.009727099910378456, | |
| "learning_rate": 8.15217391304348e-06, | |
| "loss": 0.1292, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 17.02, | |
| "grad_norm": 0.01231451891362667, | |
| "learning_rate": 7.949383517196626e-06, | |
| "loss": 0.0268, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 17.03, | |
| "grad_norm": 0.20992113649845123, | |
| "learning_rate": 7.746593121349774e-06, | |
| "loss": 0.0512, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 17.03, | |
| "grad_norm": 0.001962395152077079, | |
| "learning_rate": 7.54380272550292e-06, | |
| "loss": 0.0564, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 17.03, | |
| "grad_norm": 0.007957936264574528, | |
| "learning_rate": 7.3410123296560675e-06, | |
| "loss": 0.062, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 17.04, | |
| "grad_norm": 19.138671875, | |
| "learning_rate": 7.138221933809215e-06, | |
| "loss": 0.1305, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 17.04, | |
| "grad_norm": 0.00127317919395864, | |
| "learning_rate": 6.9354315379623625e-06, | |
| "loss": 0.1225, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 17.04, | |
| "grad_norm": 0.001261857571080327, | |
| "learning_rate": 6.73264114211551e-06, | |
| "loss": 0.1043, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 17.05, | |
| "grad_norm": 0.0031231152825057507, | |
| "learning_rate": 6.529850746268657e-06, | |
| "loss": 0.0846, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 17.05, | |
| "grad_norm": 0.0025645680725574493, | |
| "learning_rate": 6.327060350421804e-06, | |
| "loss": 0.1436, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 17.05, | |
| "eval_accuracy": 0.33170731707317075, | |
| "eval_loss": 7.929662227630615, | |
| "eval_runtime": 418.991, | |
| "eval_samples_per_second": 5.871, | |
| "eval_steps_per_second": 2.936, | |
| "step": 55494 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.0005848600412718952, | |
| "learning_rate": 6.124269954574952e-06, | |
| "loss": 0.0734, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.0016508701955899596, | |
| "learning_rate": 5.921479558728099e-06, | |
| "loss": 0.0234, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 18.01, | |
| "grad_norm": 0.0011639483273029327, | |
| "learning_rate": 5.718689162881246e-06, | |
| "loss": 0.0497, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 18.01, | |
| "grad_norm": 64.46589660644531, | |
| "learning_rate": 5.515898767034393e-06, | |
| "loss": 0.1146, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 18.01, | |
| "grad_norm": 0.003238030942156911, | |
| "learning_rate": 5.313108371187541e-06, | |
| "loss": 0.0722, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 18.02, | |
| "grad_norm": 0.34975093603134155, | |
| "learning_rate": 5.110317975340688e-06, | |
| "loss": 0.0685, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 18.02, | |
| "grad_norm": 0.0005624560872092843, | |
| "learning_rate": 4.907527579493836e-06, | |
| "loss": 0.0781, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 18.02, | |
| "grad_norm": 0.20106494426727295, | |
| "learning_rate": 4.704737183646982e-06, | |
| "loss": 0.084, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 18.03, | |
| "grad_norm": 0.0013241646811366081, | |
| "learning_rate": 4.50194678780013e-06, | |
| "loss": 0.0977, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 18.03, | |
| "grad_norm": 0.019781263545155525, | |
| "learning_rate": 4.299156391953277e-06, | |
| "loss": 0.1543, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 18.03, | |
| "grad_norm": 0.001491773989982903, | |
| "learning_rate": 4.096365996106424e-06, | |
| "loss": 0.0821, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 18.04, | |
| "grad_norm": 0.00963684543967247, | |
| "learning_rate": 3.893575600259572e-06, | |
| "loss": 0.1385, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 18.04, | |
| "grad_norm": 0.0037320530973374844, | |
| "learning_rate": 3.6907852044127193e-06, | |
| "loss": 0.0359, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 18.04, | |
| "grad_norm": 0.0036789393052458763, | |
| "learning_rate": 3.4879948085658664e-06, | |
| "loss": 0.0686, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 18.05, | |
| "grad_norm": 0.0006985150394029915, | |
| "learning_rate": 3.285204412719014e-06, | |
| "loss": 0.1134, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 18.05, | |
| "eval_accuracy": 0.32682926829268294, | |
| "eval_loss": 8.044651985168457, | |
| "eval_runtime": 441.4014, | |
| "eval_samples_per_second": 5.573, | |
| "eval_steps_per_second": 2.787, | |
| "step": 58577 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.19236122071743011, | |
| "learning_rate": 3.082414016872161e-06, | |
| "loss": 0.065, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.002881132299080491, | |
| "learning_rate": 2.8796236210253085e-06, | |
| "loss": 0.0571, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 19.01, | |
| "grad_norm": 3.4622700214385986, | |
| "learning_rate": 2.6768332251784555e-06, | |
| "loss": 0.0339, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 19.01, | |
| "grad_norm": 0.0006050022784620523, | |
| "learning_rate": 2.474042829331603e-06, | |
| "loss": 0.1419, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 19.01, | |
| "grad_norm": 43.79890441894531, | |
| "learning_rate": 2.27125243348475e-06, | |
| "loss": 0.1329, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 19.02, | |
| "grad_norm": 0.0007294774986803532, | |
| "learning_rate": 2.0684620376378976e-06, | |
| "loss": 0.0427, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 19.02, | |
| "grad_norm": 5.0048394203186035, | |
| "learning_rate": 1.8656716417910446e-06, | |
| "loss": 0.103, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 19.02, | |
| "grad_norm": 0.002903040498495102, | |
| "learning_rate": 1.6628812459441923e-06, | |
| "loss": 0.1119, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 19.03, | |
| "grad_norm": 7.973681926727295, | |
| "learning_rate": 1.4600908500973394e-06, | |
| "loss": 0.1148, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 19.03, | |
| "grad_norm": 0.0013534132158383727, | |
| "learning_rate": 1.2573004542504867e-06, | |
| "loss": 0.1022, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 19.03, | |
| "grad_norm": 0.0024612874258309603, | |
| "learning_rate": 1.0545100584036342e-06, | |
| "loss": 0.1123, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 19.04, | |
| "grad_norm": 0.0006685277330689132, | |
| "learning_rate": 8.517196625567812e-07, | |
| "loss": 0.0382, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 19.04, | |
| "grad_norm": 111.42835235595703, | |
| "learning_rate": 6.489292667099286e-07, | |
| "loss": 0.0578, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 19.04, | |
| "grad_norm": 0.0009195853490382433, | |
| "learning_rate": 4.4613887086307594e-07, | |
| "loss": 0.0779, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 19.05, | |
| "grad_norm": 0.0009977706940844655, | |
| "learning_rate": 2.4334847501622327e-07, | |
| "loss": 0.0684, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 19.05, | |
| "grad_norm": 0.025145627558231354, | |
| "learning_rate": 4.055807916937054e-08, | |
| "loss": 0.0634, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 19.05, | |
| "eval_accuracy": 0.3304878048780488, | |
| "eval_loss": 7.971652507781982, | |
| "eval_runtime": 406.4924, | |
| "eval_samples_per_second": 6.052, | |
| "eval_steps_per_second": 3.026, | |
| "step": 61640 | |
| }, | |
| { | |
| "epoch": 19.05, | |
| "step": 61640, | |
| "total_flos": 1.5360349509135758e+20, | |
| "train_loss": 0.34115883751707676, | |
| "train_runtime": 65653.4691, | |
| "train_samples_per_second": 1.878, | |
| "train_steps_per_second": 0.939 | |
| }, | |
| { | |
| "epoch": 19.05, | |
| "eval_accuracy": 0.33170731707317075, | |
| "eval_loss": 7.929662227630615, | |
| "eval_runtime": 414.4754, | |
| "eval_samples_per_second": 5.935, | |
| "eval_steps_per_second": 2.968, | |
| "step": 61640 | |
| }, | |
| { | |
| "epoch": 19.05, | |
| "eval_accuracy": 0.33170731707317075, | |
| "eval_loss": 7.929662227630615, | |
| "eval_runtime": 300.9414, | |
| "eval_samples_per_second": 8.174, | |
| "eval_steps_per_second": 4.087, | |
| "step": 61640 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 61640, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 500, | |
| "total_flos": 1.5360349509135758e+20, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |