| { |
| "best_global_step": 18484, |
| "best_metric": 0.30577707290649414, |
| "best_model_checkpoint": "/content/drive/MyDrive/DanceAI/roberta_goemotions_6cat/checkpoint-18484", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 18484, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005410084397316598, |
| "grad_norm": 0.5793700218200684, |
| "learning_rate": 1.997349058645315e-05, |
| "loss": 0.4819, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.010820168794633196, |
| "grad_norm": 2.736356735229492, |
| "learning_rate": 1.9946440164466567e-05, |
| "loss": 0.4086, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.016230253191949793, |
| "grad_norm": 2.994725227355957, |
| "learning_rate": 1.9919389742479985e-05, |
| "loss": 0.3826, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.02164033758926639, |
| "grad_norm": 2.1226229667663574, |
| "learning_rate": 1.98923393204934e-05, |
| "loss": 0.3733, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.02705042198658299, |
| "grad_norm": 2.5911431312561035, |
| "learning_rate": 1.986528889850682e-05, |
| "loss": 0.3587, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.032460506383899586, |
| "grad_norm": 2.090728282928467, |
| "learning_rate": 1.9838238476520234e-05, |
| "loss": 0.3521, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.037870590781216185, |
| "grad_norm": 2.1891307830810547, |
| "learning_rate": 1.9811188054533653e-05, |
| "loss": 0.3435, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.04328067517853278, |
| "grad_norm": 2.6659998893737793, |
| "learning_rate": 1.9784137632547068e-05, |
| "loss": 0.3556, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.04869075957584938, |
| "grad_norm": 2.140326499938965, |
| "learning_rate": 1.9757087210560487e-05, |
| "loss": 0.3345, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.05410084397316598, |
| "grad_norm": 2.83417010307312, |
| "learning_rate": 1.9730036788573902e-05, |
| "loss": 0.3542, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05951092837048258, |
| "grad_norm": 2.66451358795166, |
| "learning_rate": 1.970298636658732e-05, |
| "loss": 0.3409, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.06492101276779917, |
| "grad_norm": 3.015265941619873, |
| "learning_rate": 1.9675935944600736e-05, |
| "loss": 0.3515, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.07033109716511578, |
| "grad_norm": 1.8967560529708862, |
| "learning_rate": 1.9648885522614155e-05, |
| "loss": 0.3372, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.07574118156243237, |
| "grad_norm": 1.8910878896713257, |
| "learning_rate": 1.962183510062757e-05, |
| "loss": 0.3364, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.08115126595974897, |
| "grad_norm": 1.7993358373641968, |
| "learning_rate": 1.959478467864099e-05, |
| "loss": 0.3415, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.08656135035706557, |
| "grad_norm": 4.3601484298706055, |
| "learning_rate": 1.9567734256654404e-05, |
| "loss": 0.3106, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.09197143475438217, |
| "grad_norm": 3.3065075874328613, |
| "learning_rate": 1.9540683834667822e-05, |
| "loss": 0.3251, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.09738151915169876, |
| "grad_norm": 2.635791301727295, |
| "learning_rate": 1.9513633412681238e-05, |
| "loss": 0.3308, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.10279160354901537, |
| "grad_norm": 2.1672942638397217, |
| "learning_rate": 1.9486582990694656e-05, |
| "loss": 0.3242, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.10820168794633196, |
| "grad_norm": 1.772702693939209, |
| "learning_rate": 1.945953256870807e-05, |
| "loss": 0.3445, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.11361177234364857, |
| "grad_norm": 2.092458963394165, |
| "learning_rate": 1.943248214672149e-05, |
| "loss": 0.3317, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.11902185674096516, |
| "grad_norm": 3.1785027980804443, |
| "learning_rate": 1.9405431724734905e-05, |
| "loss": 0.3366, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.12443194113828175, |
| "grad_norm": 1.6550190448760986, |
| "learning_rate": 1.9378381302748324e-05, |
| "loss": 0.3245, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.12984202553559834, |
| "grad_norm": 2.842160701751709, |
| "learning_rate": 1.9351871889201474e-05, |
| "loss": 0.3141, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.13525210993291495, |
| "grad_norm": 3.7268848419189453, |
| "learning_rate": 1.932482146721489e-05, |
| "loss": 0.3217, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.14066219433023155, |
| "grad_norm": 2.2874679565429688, |
| "learning_rate": 1.9297771045228308e-05, |
| "loss": 0.313, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.14607227872754816, |
| "grad_norm": 2.965384006500244, |
| "learning_rate": 1.9270720623241723e-05, |
| "loss": 0.3112, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.15148236312486474, |
| "grad_norm": 2.0067362785339355, |
| "learning_rate": 1.924367020125514e-05, |
| "loss": 0.3207, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.15689244752218134, |
| "grad_norm": 1.780565619468689, |
| "learning_rate": 1.9216619779268557e-05, |
| "loss": 0.3186, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.16230253191949795, |
| "grad_norm": 1.802378535270691, |
| "learning_rate": 1.9189569357281975e-05, |
| "loss": 0.3236, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.16771261631681456, |
| "grad_norm": 2.4041481018066406, |
| "learning_rate": 1.916251893529539e-05, |
| "loss": 0.3316, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.17312270071413113, |
| "grad_norm": 2.3465633392333984, |
| "learning_rate": 1.913546851330881e-05, |
| "loss": 0.3288, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.17853278511144774, |
| "grad_norm": 2.623065948486328, |
| "learning_rate": 1.9108418091322228e-05, |
| "loss": 0.33, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.18394286950876435, |
| "grad_norm": 1.6979801654815674, |
| "learning_rate": 1.9081367669335643e-05, |
| "loss": 0.3234, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.18935295390608092, |
| "grad_norm": 2.876146078109741, |
| "learning_rate": 1.905431724734906e-05, |
| "loss": 0.3139, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.19476303830339753, |
| "grad_norm": 1.7591594457626343, |
| "learning_rate": 1.9027266825362477e-05, |
| "loss": 0.333, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.20017312270071413, |
| "grad_norm": 2.460012674331665, |
| "learning_rate": 1.9000216403375896e-05, |
| "loss": 0.3197, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.20558320709803074, |
| "grad_norm": 2.852691411972046, |
| "learning_rate": 1.897316598138931e-05, |
| "loss": 0.3177, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.21099329149534732, |
| "grad_norm": 2.61527419090271, |
| "learning_rate": 1.894611555940273e-05, |
| "loss": 0.3243, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.21640337589266392, |
| "grad_norm": 2.6071314811706543, |
| "learning_rate": 1.8919065137416145e-05, |
| "loss": 0.3098, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.22181346028998053, |
| "grad_norm": 3.4292609691619873, |
| "learning_rate": 1.8892014715429563e-05, |
| "loss": 0.3172, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.22722354468729714, |
| "grad_norm": 2.155167579650879, |
| "learning_rate": 1.886496429344298e-05, |
| "loss": 0.323, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.2326336290846137, |
| "grad_norm": 1.8653080463409424, |
| "learning_rate": 1.8837913871456397e-05, |
| "loss": 0.3273, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.23804371348193032, |
| "grad_norm": 2.2988572120666504, |
| "learning_rate": 1.8810863449469812e-05, |
| "loss": 0.3099, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.24345379787924692, |
| "grad_norm": 2.6169512271881104, |
| "learning_rate": 1.878381302748323e-05, |
| "loss": 0.3168, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.2488638822765635, |
| "grad_norm": 1.9101380109786987, |
| "learning_rate": 1.8756762605496646e-05, |
| "loss": 0.32, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.25427396667388014, |
| "grad_norm": 1.495621919631958, |
| "learning_rate": 1.8729712183510065e-05, |
| "loss": 0.3333, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.2596840510711967, |
| "grad_norm": 1.646639347076416, |
| "learning_rate": 1.870266176152348e-05, |
| "loss": 0.3238, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.2650941354685133, |
| "grad_norm": 2.967376708984375, |
| "learning_rate": 1.86756113395369e-05, |
| "loss": 0.3113, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.2705042198658299, |
| "grad_norm": 3.208625316619873, |
| "learning_rate": 1.8648560917550317e-05, |
| "loss": 0.3165, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.2759143042631465, |
| "grad_norm": 1.583409070968628, |
| "learning_rate": 1.8621510495563733e-05, |
| "loss": 0.3135, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.2813243886604631, |
| "grad_norm": 2.1211490631103516, |
| "learning_rate": 1.859446007357715e-05, |
| "loss": 0.3226, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.2867344730577797, |
| "grad_norm": 2.71584415435791, |
| "learning_rate": 1.8567409651590566e-05, |
| "loss": 0.3151, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.2921445574550963, |
| "grad_norm": 1.1264439821243286, |
| "learning_rate": 1.8540359229603985e-05, |
| "loss": 0.3015, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.29755464185241287, |
| "grad_norm": 2.2039103507995605, |
| "learning_rate": 1.85133088076174e-05, |
| "loss": 0.3199, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.3029647262497295, |
| "grad_norm": 2.2126784324645996, |
| "learning_rate": 1.848625838563082e-05, |
| "loss": 0.3084, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3083748106470461, |
| "grad_norm": 1.9607175588607788, |
| "learning_rate": 1.8459207963644234e-05, |
| "loss": 0.3293, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.3137848950443627, |
| "grad_norm": 2.560570240020752, |
| "learning_rate": 1.8432157541657653e-05, |
| "loss": 0.3108, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.3191949794416793, |
| "grad_norm": 2.3421413898468018, |
| "learning_rate": 1.8405107119671068e-05, |
| "loss": 0.3171, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.3246050638389959, |
| "grad_norm": 2.5096020698547363, |
| "learning_rate": 1.8378056697684487e-05, |
| "loss": 0.3191, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.3300151482363125, |
| "grad_norm": 2.1862289905548096, |
| "learning_rate": 1.8351006275697902e-05, |
| "loss": 0.3191, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.3354252326336291, |
| "grad_norm": 2.0171096324920654, |
| "learning_rate": 1.832395585371132e-05, |
| "loss": 0.3148, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.34083531703094566, |
| "grad_norm": 2.438683032989502, |
| "learning_rate": 1.8296905431724736e-05, |
| "loss": 0.3046, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.34624540142826227, |
| "grad_norm": 1.8642085790634155, |
| "learning_rate": 1.8269855009738154e-05, |
| "loss": 0.3174, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.3516554858255789, |
| "grad_norm": 1.5657634735107422, |
| "learning_rate": 1.824280458775157e-05, |
| "loss": 0.3188, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.3570655702228955, |
| "grad_norm": 1.6534936428070068, |
| "learning_rate": 1.8215754165764988e-05, |
| "loss": 0.3071, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.3624756546202121, |
| "grad_norm": 2.6682286262512207, |
| "learning_rate": 1.8188703743778404e-05, |
| "loss": 0.323, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.3678857390175287, |
| "grad_norm": 2.3177874088287354, |
| "learning_rate": 1.8161653321791822e-05, |
| "loss": 0.3265, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.3732958234148453, |
| "grad_norm": 1.8575387001037598, |
| "learning_rate": 1.8134602899805237e-05, |
| "loss": 0.3235, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.37870590781216185, |
| "grad_norm": 2.2886006832122803, |
| "learning_rate": 1.8107552477818656e-05, |
| "loss": 0.3235, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.38411599220947845, |
| "grad_norm": 3.049358606338501, |
| "learning_rate": 1.808050205583207e-05, |
| "loss": 0.3121, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.38952607660679506, |
| "grad_norm": 1.3371657133102417, |
| "learning_rate": 1.805345163384549e-05, |
| "loss": 0.3136, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.39493616100411166, |
| "grad_norm": 2.506653308868408, |
| "learning_rate": 1.8026401211858905e-05, |
| "loss": 0.3203, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.40034624540142827, |
| "grad_norm": 2.045966148376465, |
| "learning_rate": 1.7999350789872324e-05, |
| "loss": 0.3289, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.4057563297987449, |
| "grad_norm": 2.645087957382202, |
| "learning_rate": 1.797230036788574e-05, |
| "loss": 0.3078, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.4111664141960615, |
| "grad_norm": 1.7792373895645142, |
| "learning_rate": 1.7945249945899158e-05, |
| "loss": 0.3109, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.41657649859337803, |
| "grad_norm": 2.439561128616333, |
| "learning_rate": 1.7918199523912573e-05, |
| "loss": 0.3116, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.42198658299069464, |
| "grad_norm": 1.7365974187850952, |
| "learning_rate": 1.789114910192599e-05, |
| "loss": 0.3195, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.42739666738801124, |
| "grad_norm": 1.4129964113235474, |
| "learning_rate": 1.7864098679939407e-05, |
| "loss": 0.3179, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.43280675178532785, |
| "grad_norm": 2.3082122802734375, |
| "learning_rate": 1.7837048257952825e-05, |
| "loss": 0.3006, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.43821683618264445, |
| "grad_norm": 2.0384387969970703, |
| "learning_rate": 1.780999783596624e-05, |
| "loss": 0.3022, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.44362692057996106, |
| "grad_norm": 3.2233986854553223, |
| "learning_rate": 1.778294741397966e-05, |
| "loss": 0.3185, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.44903700497727767, |
| "grad_norm": 3.3285160064697266, |
| "learning_rate": 1.7755896991993074e-05, |
| "loss": 0.3149, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.45444708937459427, |
| "grad_norm": 2.251983165740967, |
| "learning_rate": 1.7728846570006493e-05, |
| "loss": 0.3165, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.4598571737719108, |
| "grad_norm": 2.060551404953003, |
| "learning_rate": 1.770179614801991e-05, |
| "loss": 0.3141, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.4652672581692274, |
| "grad_norm": 1.4859846830368042, |
| "learning_rate": 1.7674745726033327e-05, |
| "loss": 0.3181, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.47067734256654403, |
| "grad_norm": 2.066725730895996, |
| "learning_rate": 1.7647695304046742e-05, |
| "loss": 0.3147, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.47608742696386064, |
| "grad_norm": 1.7268898487091064, |
| "learning_rate": 1.762064488206016e-05, |
| "loss": 0.3117, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.48149751136117724, |
| "grad_norm": 2.5234525203704834, |
| "learning_rate": 1.7593594460073576e-05, |
| "loss": 0.3081, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.48690759575849385, |
| "grad_norm": 2.0419929027557373, |
| "learning_rate": 1.7566544038086995e-05, |
| "loss": 0.3315, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.49231768015581046, |
| "grad_norm": 1.9306811094284058, |
| "learning_rate": 1.7539493616100413e-05, |
| "loss": 0.3149, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.497727764553127, |
| "grad_norm": 1.1427522897720337, |
| "learning_rate": 1.7512443194113832e-05, |
| "loss": 0.3214, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.5031378489504437, |
| "grad_norm": 2.0656473636627197, |
| "learning_rate": 1.7485392772127247e-05, |
| "loss": 0.3226, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.5085479333477603, |
| "grad_norm": 2.1577677726745605, |
| "learning_rate": 1.7458342350140666e-05, |
| "loss": 0.3098, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.5139580177450769, |
| "grad_norm": 2.430352210998535, |
| "learning_rate": 1.743129192815408e-05, |
| "loss": 0.3103, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.5193681021423934, |
| "grad_norm": 2.0063912868499756, |
| "learning_rate": 1.74042415061675e-05, |
| "loss": 0.3062, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.52477818653971, |
| "grad_norm": 2.357673168182373, |
| "learning_rate": 1.7377191084180915e-05, |
| "loss": 0.3172, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.5301882709370266, |
| "grad_norm": 2.1735177040100098, |
| "learning_rate": 1.7350140662194334e-05, |
| "loss": 0.3051, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.5355983553343432, |
| "grad_norm": 2.143653154373169, |
| "learning_rate": 1.732309024020775e-05, |
| "loss": 0.3264, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.5410084397316598, |
| "grad_norm": 3.2294411659240723, |
| "learning_rate": 1.7296039818221167e-05, |
| "loss": 0.3168, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.5464185241289764, |
| "grad_norm": 3.9202592372894287, |
| "learning_rate": 1.7268989396234583e-05, |
| "loss": 0.3038, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.551828608526293, |
| "grad_norm": 2.067411422729492, |
| "learning_rate": 1.7241938974248e-05, |
| "loss": 0.3094, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.5572386929236096, |
| "grad_norm": 3.1209259033203125, |
| "learning_rate": 1.7214888552261416e-05, |
| "loss": 0.2853, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.5626487773209262, |
| "grad_norm": 2.2904303073883057, |
| "learning_rate": 1.7187838130274835e-05, |
| "loss": 0.334, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.5680588617182428, |
| "grad_norm": 2.144474983215332, |
| "learning_rate": 1.716078770828825e-05, |
| "loss": 0.3206, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.5734689461155594, |
| "grad_norm": 1.682051181793213, |
| "learning_rate": 1.71342782947414e-05, |
| "loss": 0.3098, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.578879030512876, |
| "grad_norm": 2.1887052059173584, |
| "learning_rate": 1.7107227872754815e-05, |
| "loss": 0.3051, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.5842891149101926, |
| "grad_norm": 1.5054807662963867, |
| "learning_rate": 1.7080177450768234e-05, |
| "loss": 0.3205, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.5896991993075092, |
| "grad_norm": 1.9038208723068237, |
| "learning_rate": 1.705312702878165e-05, |
| "loss": 0.3196, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.5951092837048257, |
| "grad_norm": 2.6153712272644043, |
| "learning_rate": 1.7026076606795068e-05, |
| "loss": 0.3096, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.6005193681021423, |
| "grad_norm": 1.7780612707138062, |
| "learning_rate": 1.6999026184808483e-05, |
| "loss": 0.3167, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.605929452499459, |
| "grad_norm": 1.6894149780273438, |
| "learning_rate": 1.6971975762821902e-05, |
| "loss": 0.3156, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.6113395368967756, |
| "grad_norm": 1.0445067882537842, |
| "learning_rate": 1.694492534083532e-05, |
| "loss": 0.3029, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.6167496212940922, |
| "grad_norm": 2.145226001739502, |
| "learning_rate": 1.6917874918848736e-05, |
| "loss": 0.3147, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.6221597056914088, |
| "grad_norm": 2.216933250427246, |
| "learning_rate": 1.6890824496862154e-05, |
| "loss": 0.3169, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.6275697900887254, |
| "grad_norm": 1.8254783153533936, |
| "learning_rate": 1.686377407487557e-05, |
| "loss": 0.2997, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.632979874486042, |
| "grad_norm": 2.0882210731506348, |
| "learning_rate": 1.6836723652888988e-05, |
| "loss": 0.3348, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.6383899588833586, |
| "grad_norm": 1.6735379695892334, |
| "learning_rate": 1.6809673230902403e-05, |
| "loss": 0.3073, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.6438000432806752, |
| "grad_norm": 2.022500514984131, |
| "learning_rate": 1.6782622808915822e-05, |
| "loss": 0.3188, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.6492101276779918, |
| "grad_norm": 1.9945366382598877, |
| "learning_rate": 1.6755572386929237e-05, |
| "loss": 0.2968, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.6546202120753084, |
| "grad_norm": 2.076066493988037, |
| "learning_rate": 1.6729062973382387e-05, |
| "loss": 0.3111, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.660030296472625, |
| "grad_norm": 2.083757162094116, |
| "learning_rate": 1.6702012551395802e-05, |
| "loss": 0.3083, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.6654403808699416, |
| "grad_norm": 1.9437183141708374, |
| "learning_rate": 1.667496212940922e-05, |
| "loss": 0.328, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.6708504652672582, |
| "grad_norm": 1.824617862701416, |
| "learning_rate": 1.6647911707422636e-05, |
| "loss": 0.3042, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.6762605496645747, |
| "grad_norm": 1.78602135181427, |
| "learning_rate": 1.6620861285436055e-05, |
| "loss": 0.3167, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.6816706340618913, |
| "grad_norm": 2.4211480617523193, |
| "learning_rate": 1.659381086344947e-05, |
| "loss": 0.3009, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.6870807184592079, |
| "grad_norm": 1.5942953824996948, |
| "learning_rate": 1.656676044146289e-05, |
| "loss": 0.3073, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.6924908028565245, |
| "grad_norm": 1.8902363777160645, |
| "learning_rate": 1.6539710019476304e-05, |
| "loss": 0.3289, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.6979008872538411, |
| "grad_norm": 1.8132948875427246, |
| "learning_rate": 1.6512659597489722e-05, |
| "loss": 0.3142, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.7033109716511577, |
| "grad_norm": 2.8677127361297607, |
| "learning_rate": 1.6485609175503138e-05, |
| "loss": 0.3035, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.7087210560484744, |
| "grad_norm": 1.7309463024139404, |
| "learning_rate": 1.6458558753516556e-05, |
| "loss": 0.3149, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.714131140445791, |
| "grad_norm": 1.587862253189087, |
| "learning_rate": 1.643150833152997e-05, |
| "loss": 0.3137, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.7195412248431076, |
| "grad_norm": 2.0411460399627686, |
| "learning_rate": 1.640445790954339e-05, |
| "loss": 0.3188, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.7249513092404242, |
| "grad_norm": 2.4019126892089844, |
| "learning_rate": 1.6377407487556805e-05, |
| "loss": 0.3053, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.7303613936377408, |
| "grad_norm": 1.4498906135559082, |
| "learning_rate": 1.6350357065570224e-05, |
| "loss": 0.3203, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.7357714780350574, |
| "grad_norm": 1.8330546617507935, |
| "learning_rate": 1.632330664358364e-05, |
| "loss": 0.3131, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.741181562432374, |
| "grad_norm": 2.0546889305114746, |
| "learning_rate": 1.6296256221597058e-05, |
| "loss": 0.3162, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.7465916468296906, |
| "grad_norm": 2.014270544052124, |
| "learning_rate": 1.6269205799610473e-05, |
| "loss": 0.3123, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.7520017312270071, |
| "grad_norm": 2.8432347774505615, |
| "learning_rate": 1.6242155377623892e-05, |
| "loss": 0.3155, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.7574118156243237, |
| "grad_norm": 1.6952821016311646, |
| "learning_rate": 1.6215104955637307e-05, |
| "loss": 0.3014, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.7628219000216403, |
| "grad_norm": 1.657174825668335, |
| "learning_rate": 1.6188054533650726e-05, |
| "loss": 0.2904, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.7682319844189569, |
| "grad_norm": 2.069336175918579, |
| "learning_rate": 1.616100411166414e-05, |
| "loss": 0.3098, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.7736420688162735, |
| "grad_norm": 2.974534749984741, |
| "learning_rate": 1.613395368967756e-05, |
| "loss": 0.3103, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.7790521532135901, |
| "grad_norm": 1.8722079992294312, |
| "learning_rate": 1.6106903267690978e-05, |
| "loss": 0.3181, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.7844622376109067, |
| "grad_norm": 1.8573060035705566, |
| "learning_rate": 1.6079852845704393e-05, |
| "loss": 0.3095, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.7898723220082233, |
| "grad_norm": 1.5203043222427368, |
| "learning_rate": 1.6052802423717812e-05, |
| "loss": 0.3062, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.7952824064055399, |
| "grad_norm": 1.3434637784957886, |
| "learning_rate": 1.6025752001731227e-05, |
| "loss": 0.315, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.8006924908028565, |
| "grad_norm": 2.0199999809265137, |
| "learning_rate": 1.5998701579744646e-05, |
| "loss": 0.301, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.8061025752001731, |
| "grad_norm": 1.8482146263122559, |
| "learning_rate": 1.597165115775806e-05, |
| "loss": 0.2947, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.8115126595974897, |
| "grad_norm": 2.2450520992279053, |
| "learning_rate": 1.594460073577148e-05, |
| "loss": 0.3098, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.8169227439948064, |
| "grad_norm": 2.0248937606811523, |
| "learning_rate": 1.59175503137849e-05, |
| "loss": 0.3125, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.822332828392123, |
| "grad_norm": 1.8997951745986938, |
| "learning_rate": 1.5890499891798314e-05, |
| "loss": 0.3054, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.8277429127894396, |
| "grad_norm": 1.7810888290405273, |
| "learning_rate": 1.5863449469811732e-05, |
| "loss": 0.3054, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.8331529971867561, |
| "grad_norm": 1.5169132947921753, |
| "learning_rate": 1.5836399047825147e-05, |
| "loss": 0.3051, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.8385630815840727, |
| "grad_norm": 1.8637396097183228, |
| "learning_rate": 1.5809348625838566e-05, |
| "loss": 0.3002, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.8439731659813893, |
| "grad_norm": 2.1024274826049805, |
| "learning_rate": 1.578229820385198e-05, |
| "loss": 0.2988, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.8493832503787059, |
| "grad_norm": 2.098560333251953, |
| "learning_rate": 1.57552477818654e-05, |
| "loss": 0.3231, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.8547933347760225, |
| "grad_norm": 2.609898328781128, |
| "learning_rate": 1.5728197359878815e-05, |
| "loss": 0.3076, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.8602034191733391, |
| "grad_norm": 1.3566176891326904, |
| "learning_rate": 1.5701146937892234e-05, |
| "loss": 0.3052, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.8656135035706557, |
| "grad_norm": 2.245142936706543, |
| "learning_rate": 1.567409651590565e-05, |
| "loss": 0.3079, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.8710235879679723, |
| "grad_norm": 2.1084117889404297, |
| "learning_rate": 1.5647046093919068e-05, |
| "loss": 0.3096, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.8764336723652889, |
| "grad_norm": 1.445786714553833, |
| "learning_rate": 1.5619995671932483e-05, |
| "loss": 0.3006, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.8818437567626055, |
| "grad_norm": 1.5979697704315186, |
| "learning_rate": 1.55929452499459e-05, |
| "loss": 0.3138, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.8872538411599221, |
| "grad_norm": 1.7282668352127075, |
| "learning_rate": 1.5565894827959317e-05, |
| "loss": 0.3102, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.8926639255572387, |
| "grad_norm": 1.5071444511413574, |
| "learning_rate": 1.5538844405972735e-05, |
| "loss": 0.2989, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.8980740099545553, |
| "grad_norm": 2.033233880996704, |
| "learning_rate": 1.551179398398615e-05, |
| "loss": 0.3162, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.9034840943518719, |
| "grad_norm": 1.9847067594528198, |
| "learning_rate": 1.548474356199957e-05, |
| "loss": 0.3208, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.9088941787491885, |
| "grad_norm": 1.9450207948684692, |
| "learning_rate": 1.5457693140012985e-05, |
| "loss": 0.2982, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.914304263146505, |
| "grad_norm": 2.0153732299804688, |
| "learning_rate": 1.5430642718026403e-05, |
| "loss": 0.2965, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.9197143475438216, |
| "grad_norm": 1.7769889831542969, |
| "learning_rate": 1.540359229603982e-05, |
| "loss": 0.3325, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.9251244319411382, |
| "grad_norm": 1.949591040611267, |
| "learning_rate": 1.5376541874053237e-05, |
| "loss": 0.3019, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.9305345163384549, |
| "grad_norm": 1.7794471979141235, |
| "learning_rate": 1.5349491452066652e-05, |
| "loss": 0.3156, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.9359446007357715, |
| "grad_norm": 1.8910231590270996, |
| "learning_rate": 1.532244103008007e-05, |
| "loss": 0.3186, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.9413546851330881, |
| "grad_norm": 1.9011621475219727, |
| "learning_rate": 1.5295390608093486e-05, |
| "loss": 0.3123, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.9467647695304047, |
| "grad_norm": 2.2883121967315674, |
| "learning_rate": 1.5268340186106905e-05, |
| "loss": 0.3039, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.9521748539277213, |
| "grad_norm": 1.4748516082763672, |
| "learning_rate": 1.5241289764120322e-05, |
| "loss": 0.3132, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.9575849383250379, |
| "grad_norm": 2.059274673461914, |
| "learning_rate": 1.5214239342133739e-05, |
| "loss": 0.3051, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.9629950227223545, |
| "grad_norm": 1.7099647521972656, |
| "learning_rate": 1.5187188920147156e-05, |
| "loss": 0.3168, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.9684051071196711, |
| "grad_norm": 1.8200836181640625, |
| "learning_rate": 1.5160138498160572e-05, |
| "loss": 0.3029, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.9738151915169877, |
| "grad_norm": 1.5641443729400635, |
| "learning_rate": 1.513308807617399e-05, |
| "loss": 0.2976, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.9792252759143043, |
| "grad_norm": 2.299715757369995, |
| "learning_rate": 1.5106037654187406e-05, |
| "loss": 0.3085, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.9846353603116209, |
| "grad_norm": 2.057692289352417, |
| "learning_rate": 1.5078987232200823e-05, |
| "loss": 0.3134, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.9900454447089374, |
| "grad_norm": 3.2328195571899414, |
| "learning_rate": 1.505193681021424e-05, |
| "loss": 0.3082, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.995455529106254, |
| "grad_norm": 1.7043819427490234, |
| "learning_rate": 1.5024886388227657e-05, |
| "loss": 0.3054, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8663521020073223, |
| "eval_hamming_loss": 0.1336478979926777, |
| "eval_jaccard_score": 0.460505302360813, |
| "eval_loss": 0.3095310926437378, |
| "eval_runtime": 65.5546, |
| "eval_samples_per_second": 483.322, |
| "eval_steps_per_second": 30.219, |
| "step": 9242 |
| }, |
| { |
| "epoch": 1.0008656135035707, |
| "grad_norm": 1.3396296501159668, |
| "learning_rate": 1.4997835966241074e-05, |
| "loss": 0.3116, |
| "step": 9250 |
| }, |
| { |
| "epoch": 1.0062756979008873, |
| "grad_norm": 1.8290706872940063, |
| "learning_rate": 1.4970785544254491e-05, |
| "loss": 0.2928, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.011685782298204, |
| "grad_norm": 1.7000977993011475, |
| "learning_rate": 1.4943735122267908e-05, |
| "loss": 0.302, |
| "step": 9350 |
| }, |
| { |
| "epoch": 1.0170958666955205, |
| "grad_norm": 1.5283267498016357, |
| "learning_rate": 1.4916684700281325e-05, |
| "loss": 0.304, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.0225059510928372, |
| "grad_norm": 2.1828360557556152, |
| "learning_rate": 1.4889634278294742e-05, |
| "loss": 0.2936, |
| "step": 9450 |
| }, |
| { |
| "epoch": 1.0279160354901538, |
| "grad_norm": 1.8878413438796997, |
| "learning_rate": 1.4862583856308159e-05, |
| "loss": 0.2837, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.0333261198874701, |
| "grad_norm": 2.093980073928833, |
| "learning_rate": 1.4835533434321576e-05, |
| "loss": 0.2787, |
| "step": 9550 |
| }, |
| { |
| "epoch": 1.0387362042847867, |
| "grad_norm": 2.5507097244262695, |
| "learning_rate": 1.4808483012334993e-05, |
| "loss": 0.2896, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.0441462886821034, |
| "grad_norm": 2.2995388507843018, |
| "learning_rate": 1.478143259034841e-05, |
| "loss": 0.3053, |
| "step": 9650 |
| }, |
| { |
| "epoch": 1.04955637307942, |
| "grad_norm": 2.930950880050659, |
| "learning_rate": 1.4754382168361826e-05, |
| "loss": 0.2963, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.0549664574767366, |
| "grad_norm": 2.29239821434021, |
| "learning_rate": 1.4727331746375243e-05, |
| "loss": 0.2886, |
| "step": 9750 |
| }, |
| { |
| "epoch": 1.0603765418740532, |
| "grad_norm": 2.4556353092193604, |
| "learning_rate": 1.470028132438866e-05, |
| "loss": 0.292, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.0657866262713698, |
| "grad_norm": 2.0122013092041016, |
| "learning_rate": 1.4673230902402077e-05, |
| "loss": 0.2896, |
| "step": 9850 |
| }, |
| { |
| "epoch": 1.0711967106686864, |
| "grad_norm": 2.613096237182617, |
| "learning_rate": 1.4646180480415494e-05, |
| "loss": 0.2939, |
| "step": 9900 |
| }, |
| { |
| "epoch": 1.076606795066003, |
| "grad_norm": 1.474997639656067, |
| "learning_rate": 1.4619130058428911e-05, |
| "loss": 0.307, |
| "step": 9950 |
| }, |
| { |
| "epoch": 1.0820168794633196, |
| "grad_norm": 2.1981139183044434, |
| "learning_rate": 1.459207963644233e-05, |
| "loss": 0.2986, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.0874269638606362, |
| "grad_norm": 1.849653959274292, |
| "learning_rate": 1.4565029214455747e-05, |
| "loss": 0.2939, |
| "step": 10050 |
| }, |
| { |
| "epoch": 1.0928370482579528, |
| "grad_norm": 1.8995155096054077, |
| "learning_rate": 1.4537978792469164e-05, |
| "loss": 0.3063, |
| "step": 10100 |
| }, |
| { |
| "epoch": 1.0982471326552694, |
| "grad_norm": 2.691397190093994, |
| "learning_rate": 1.451092837048258e-05, |
| "loss": 0.3049, |
| "step": 10150 |
| }, |
| { |
| "epoch": 1.103657217052586, |
| "grad_norm": 1.8928948640823364, |
| "learning_rate": 1.4483877948495997e-05, |
| "loss": 0.3065, |
| "step": 10200 |
| }, |
| { |
| "epoch": 1.1090673014499026, |
| "grad_norm": 3.0470404624938965, |
| "learning_rate": 1.4456827526509414e-05, |
| "loss": 0.2972, |
| "step": 10250 |
| }, |
| { |
| "epoch": 1.1144773858472192, |
| "grad_norm": 1.8250367641448975, |
| "learning_rate": 1.4429777104522831e-05, |
| "loss": 0.2881, |
| "step": 10300 |
| }, |
| { |
| "epoch": 1.1198874702445358, |
| "grad_norm": 2.1483049392700195, |
| "learning_rate": 1.4402726682536248e-05, |
| "loss": 0.2972, |
| "step": 10350 |
| }, |
| { |
| "epoch": 1.1252975546418524, |
| "grad_norm": 1.9604085683822632, |
| "learning_rate": 1.4375676260549665e-05, |
| "loss": 0.2895, |
| "step": 10400 |
| }, |
| { |
| "epoch": 1.130707639039169, |
| "grad_norm": 1.5873773097991943, |
| "learning_rate": 1.4348625838563082e-05, |
| "loss": 0.3044, |
| "step": 10450 |
| }, |
| { |
| "epoch": 1.1361177234364856, |
| "grad_norm": 3.0002996921539307, |
| "learning_rate": 1.43215754165765e-05, |
| "loss": 0.3158, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.1415278078338023, |
| "grad_norm": 1.7767727375030518, |
| "learning_rate": 1.4294524994589918e-05, |
| "loss": 0.2872, |
| "step": 10550 |
| }, |
| { |
| "epoch": 1.1469378922311189, |
| "grad_norm": 2.038928270339966, |
| "learning_rate": 1.4267474572603335e-05, |
| "loss": 0.2894, |
| "step": 10600 |
| }, |
| { |
| "epoch": 1.1523479766284355, |
| "grad_norm": 2.048238515853882, |
| "learning_rate": 1.4240424150616752e-05, |
| "loss": 0.296, |
| "step": 10650 |
| }, |
| { |
| "epoch": 1.157758061025752, |
| "grad_norm": 1.646410346031189, |
| "learning_rate": 1.4213373728630169e-05, |
| "loss": 0.2903, |
| "step": 10700 |
| }, |
| { |
| "epoch": 1.1631681454230687, |
| "grad_norm": 1.8049273490905762, |
| "learning_rate": 1.4186323306643585e-05, |
| "loss": 0.2963, |
| "step": 10750 |
| }, |
| { |
| "epoch": 1.1685782298203853, |
| "grad_norm": 2.3281192779541016, |
| "learning_rate": 1.4159272884657002e-05, |
| "loss": 0.3096, |
| "step": 10800 |
| }, |
| { |
| "epoch": 1.173988314217702, |
| "grad_norm": 2.3356218338012695, |
| "learning_rate": 1.413222246267042e-05, |
| "loss": 0.3096, |
| "step": 10850 |
| }, |
| { |
| "epoch": 1.1793983986150183, |
| "grad_norm": 2.3634541034698486, |
| "learning_rate": 1.4105172040683836e-05, |
| "loss": 0.2944, |
| "step": 10900 |
| }, |
| { |
| "epoch": 1.1848084830123349, |
| "grad_norm": 2.3536300659179688, |
| "learning_rate": 1.4078121618697253e-05, |
| "loss": 0.2938, |
| "step": 10950 |
| }, |
| { |
| "epoch": 1.1902185674096515, |
| "grad_norm": 2.2869951725006104, |
| "learning_rate": 1.4051612205150403e-05, |
| "loss": 0.298, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.195628651806968, |
| "grad_norm": 2.1792991161346436, |
| "learning_rate": 1.402456178316382e-05, |
| "loss": 0.2931, |
| "step": 11050 |
| }, |
| { |
| "epoch": 1.2010387362042847, |
| "grad_norm": 1.900748372077942, |
| "learning_rate": 1.3997511361177237e-05, |
| "loss": 0.2837, |
| "step": 11100 |
| }, |
| { |
| "epoch": 1.2064488206016013, |
| "grad_norm": 1.2853705883026123, |
| "learning_rate": 1.3970460939190654e-05, |
| "loss": 0.3129, |
| "step": 11150 |
| }, |
| { |
| "epoch": 1.211858904998918, |
| "grad_norm": 2.5928916931152344, |
| "learning_rate": 1.394341051720407e-05, |
| "loss": 0.2912, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.2172689893962345, |
| "grad_norm": 1.8576390743255615, |
| "learning_rate": 1.3916360095217488e-05, |
| "loss": 0.2999, |
| "step": 11250 |
| }, |
| { |
| "epoch": 1.2226790737935511, |
| "grad_norm": 1.708588719367981, |
| "learning_rate": 1.3889309673230905e-05, |
| "loss": 0.2923, |
| "step": 11300 |
| }, |
| { |
| "epoch": 1.2280891581908677, |
| "grad_norm": 2.3297269344329834, |
| "learning_rate": 1.3862259251244321e-05, |
| "loss": 0.2894, |
| "step": 11350 |
| }, |
| { |
| "epoch": 1.2334992425881843, |
| "grad_norm": 1.9831498861312866, |
| "learning_rate": 1.3835208829257738e-05, |
| "loss": 0.3088, |
| "step": 11400 |
| }, |
| { |
| "epoch": 1.238909326985501, |
| "grad_norm": 1.94819974899292, |
| "learning_rate": 1.3808158407271155e-05, |
| "loss": 0.28, |
| "step": 11450 |
| }, |
| { |
| "epoch": 1.2443194113828175, |
| "grad_norm": 2.235166549682617, |
| "learning_rate": 1.3781107985284572e-05, |
| "loss": 0.2992, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.2497294957801341, |
| "grad_norm": 3.5507967472076416, |
| "learning_rate": 1.375405756329799e-05, |
| "loss": 0.3057, |
| "step": 11550 |
| }, |
| { |
| "epoch": 1.2551395801774508, |
| "grad_norm": 2.4944968223571777, |
| "learning_rate": 1.3727007141311406e-05, |
| "loss": 0.2928, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.2605496645747674, |
| "grad_norm": 2.121217727661133, |
| "learning_rate": 1.3699956719324823e-05, |
| "loss": 0.2912, |
| "step": 11650 |
| }, |
| { |
| "epoch": 1.265959748972084, |
| "grad_norm": 1.9403678178787231, |
| "learning_rate": 1.367290629733824e-05, |
| "loss": 0.2787, |
| "step": 11700 |
| }, |
| { |
| "epoch": 1.2713698333694006, |
| "grad_norm": 2.8676600456237793, |
| "learning_rate": 1.3645855875351657e-05, |
| "loss": 0.2966, |
| "step": 11750 |
| }, |
| { |
| "epoch": 1.2767799177667172, |
| "grad_norm": 1.6059439182281494, |
| "learning_rate": 1.3618805453365074e-05, |
| "loss": 0.2953, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.2821900021640338, |
| "grad_norm": 1.5992461442947388, |
| "learning_rate": 1.359175503137849e-05, |
| "loss": 0.2858, |
| "step": 11850 |
| }, |
| { |
| "epoch": 1.2876000865613504, |
| "grad_norm": 2.614466667175293, |
| "learning_rate": 1.3564704609391908e-05, |
| "loss": 0.2905, |
| "step": 11900 |
| }, |
| { |
| "epoch": 1.293010170958667, |
| "grad_norm": 2.2092373371124268, |
| "learning_rate": 1.3537654187405325e-05, |
| "loss": 0.2939, |
| "step": 11950 |
| }, |
| { |
| "epoch": 1.2984202553559836, |
| "grad_norm": 1.8631094694137573, |
| "learning_rate": 1.3510603765418742e-05, |
| "loss": 0.3166, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.3038303397533002, |
| "grad_norm": 2.1259829998016357, |
| "learning_rate": 1.3483553343432159e-05, |
| "loss": 0.2941, |
| "step": 12050 |
| }, |
| { |
| "epoch": 1.3092404241506168, |
| "grad_norm": 2.443293333053589, |
| "learning_rate": 1.3456502921445575e-05, |
| "loss": 0.2841, |
| "step": 12100 |
| }, |
| { |
| "epoch": 1.3146505085479334, |
| "grad_norm": 1.8442476987838745, |
| "learning_rate": 1.3429452499458992e-05, |
| "loss": 0.2988, |
| "step": 12150 |
| }, |
| { |
| "epoch": 1.32006059294525, |
| "grad_norm": 2.4069666862487793, |
| "learning_rate": 1.340240207747241e-05, |
| "loss": 0.2897, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.3254706773425666, |
| "grad_norm": 3.02299427986145, |
| "learning_rate": 1.3375351655485826e-05, |
| "loss": 0.3033, |
| "step": 12250 |
| }, |
| { |
| "epoch": 1.3308807617398832, |
| "grad_norm": 1.7643568515777588, |
| "learning_rate": 1.3348301233499243e-05, |
| "loss": 0.285, |
| "step": 12300 |
| }, |
| { |
| "epoch": 1.3362908461371998, |
| "grad_norm": 1.788694977760315, |
| "learning_rate": 1.332125081151266e-05, |
| "loss": 0.2863, |
| "step": 12350 |
| }, |
| { |
| "epoch": 1.3417009305345164, |
| "grad_norm": 1.921799898147583, |
| "learning_rate": 1.3294200389526077e-05, |
| "loss": 0.3018, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.347111014931833, |
| "grad_norm": 1.9117213487625122, |
| "learning_rate": 1.3267149967539494e-05, |
| "loss": 0.2934, |
| "step": 12450 |
| }, |
| { |
| "epoch": 1.3525210993291497, |
| "grad_norm": 1.407914400100708, |
| "learning_rate": 1.3240099545552911e-05, |
| "loss": 0.2976, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.3579311837264663, |
| "grad_norm": 1.9407671689987183, |
| "learning_rate": 1.3213049123566328e-05, |
| "loss": 0.3091, |
| "step": 12550 |
| }, |
| { |
| "epoch": 1.3633412681237829, |
| "grad_norm": 2.697124481201172, |
| "learning_rate": 1.3185998701579745e-05, |
| "loss": 0.2967, |
| "step": 12600 |
| }, |
| { |
| "epoch": 1.3687513525210993, |
| "grad_norm": 2.1249263286590576, |
| "learning_rate": 1.3158948279593162e-05, |
| "loss": 0.3008, |
| "step": 12650 |
| }, |
| { |
| "epoch": 1.3741614369184159, |
| "grad_norm": 2.2134153842926025, |
| "learning_rate": 1.3131897857606579e-05, |
| "loss": 0.2971, |
| "step": 12700 |
| }, |
| { |
| "epoch": 1.3795715213157325, |
| "grad_norm": 2.4941091537475586, |
| "learning_rate": 1.3104847435619996e-05, |
| "loss": 0.2909, |
| "step": 12750 |
| }, |
| { |
| "epoch": 1.384981605713049, |
| "grad_norm": 2.3787550926208496, |
| "learning_rate": 1.3077797013633413e-05, |
| "loss": 0.3048, |
| "step": 12800 |
| }, |
| { |
| "epoch": 1.3903916901103657, |
| "grad_norm": 1.608185887336731, |
| "learning_rate": 1.305074659164683e-05, |
| "loss": 0.2942, |
| "step": 12850 |
| }, |
| { |
| "epoch": 1.3958017745076823, |
| "grad_norm": 2.492053270339966, |
| "learning_rate": 1.3023696169660246e-05, |
| "loss": 0.2814, |
| "step": 12900 |
| }, |
| { |
| "epoch": 1.4012118589049989, |
| "grad_norm": 1.7174736261367798, |
| "learning_rate": 1.2996645747673663e-05, |
| "loss": 0.3046, |
| "step": 12950 |
| }, |
| { |
| "epoch": 1.4066219433023155, |
| "grad_norm": 1.7538783550262451, |
| "learning_rate": 1.296959532568708e-05, |
| "loss": 0.2958, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.412032027699632, |
| "grad_norm": 2.1111483573913574, |
| "learning_rate": 1.2942544903700497e-05, |
| "loss": 0.295, |
| "step": 13050 |
| }, |
| { |
| "epoch": 1.4174421120969487, |
| "grad_norm": 1.6928726434707642, |
| "learning_rate": 1.2915494481713914e-05, |
| "loss": 0.2899, |
| "step": 13100 |
| }, |
| { |
| "epoch": 1.4228521964942653, |
| "grad_norm": 2.2727274894714355, |
| "learning_rate": 1.2888444059727333e-05, |
| "loss": 0.309, |
| "step": 13150 |
| }, |
| { |
| "epoch": 1.428262280891582, |
| "grad_norm": 1.9953638315200806, |
| "learning_rate": 1.286139363774075e-05, |
| "loss": 0.2911, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.4336723652888985, |
| "grad_norm": 1.7269879579544067, |
| "learning_rate": 1.2834343215754167e-05, |
| "loss": 0.3023, |
| "step": 13250 |
| }, |
| { |
| "epoch": 1.4390824496862151, |
| "grad_norm": 2.24790620803833, |
| "learning_rate": 1.2807292793767584e-05, |
| "loss": 0.2737, |
| "step": 13300 |
| }, |
| { |
| "epoch": 1.4444925340835317, |
| "grad_norm": 1.5782350301742554, |
| "learning_rate": 1.2780242371781e-05, |
| "loss": 0.2814, |
| "step": 13350 |
| }, |
| { |
| "epoch": 1.4499026184808483, |
| "grad_norm": 1.7679520845413208, |
| "learning_rate": 1.2753191949794417e-05, |
| "loss": 0.2947, |
| "step": 13400 |
| }, |
| { |
| "epoch": 1.455312702878165, |
| "grad_norm": 2.2966043949127197, |
| "learning_rate": 1.2726141527807834e-05, |
| "loss": 0.3022, |
| "step": 13450 |
| }, |
| { |
| "epoch": 1.4607227872754815, |
| "grad_norm": 2.3050730228424072, |
| "learning_rate": 1.2699091105821253e-05, |
| "loss": 0.2854, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.4661328716727982, |
| "grad_norm": 2.056730270385742, |
| "learning_rate": 1.267204068383467e-05, |
| "loss": 0.304, |
| "step": 13550 |
| }, |
| { |
| "epoch": 1.4715429560701148, |
| "grad_norm": 1.7711883783340454, |
| "learning_rate": 1.2644990261848087e-05, |
| "loss": 0.2874, |
| "step": 13600 |
| }, |
| { |
| "epoch": 1.4769530404674314, |
| "grad_norm": 2.4947686195373535, |
| "learning_rate": 1.2617939839861504e-05, |
| "loss": 0.2863, |
| "step": 13650 |
| }, |
| { |
| "epoch": 1.482363124864748, |
| "grad_norm": 2.20076060295105, |
| "learning_rate": 1.259088941787492e-05, |
| "loss": 0.2902, |
| "step": 13700 |
| }, |
| { |
| "epoch": 1.4877732092620644, |
| "grad_norm": 1.9857763051986694, |
| "learning_rate": 1.2563838995888338e-05, |
| "loss": 0.2976, |
| "step": 13750 |
| }, |
| { |
| "epoch": 1.493183293659381, |
| "grad_norm": 1.9671763181686401, |
| "learning_rate": 1.2536788573901755e-05, |
| "loss": 0.2987, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.4985933780566976, |
| "grad_norm": 2.057361602783203, |
| "learning_rate": 1.2509738151915172e-05, |
| "loss": 0.2935, |
| "step": 13850 |
| }, |
| { |
| "epoch": 1.5040034624540142, |
| "grad_norm": 1.8263319730758667, |
| "learning_rate": 1.2482687729928588e-05, |
| "loss": 0.2981, |
| "step": 13900 |
| }, |
| { |
| "epoch": 1.5094135468513308, |
| "grad_norm": 1.7946516275405884, |
| "learning_rate": 1.2455637307942005e-05, |
| "loss": 0.3111, |
| "step": 13950 |
| }, |
| { |
| "epoch": 1.5148236312486474, |
| "grad_norm": 1.6943495273590088, |
| "learning_rate": 1.2428586885955422e-05, |
| "loss": 0.2949, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.520233715645964, |
| "grad_norm": 1.7670172452926636, |
| "learning_rate": 1.240153646396884e-05, |
| "loss": 0.285, |
| "step": 14050 |
| }, |
| { |
| "epoch": 1.5256438000432806, |
| "grad_norm": 2.126786470413208, |
| "learning_rate": 1.2375027050421989e-05, |
| "loss": 0.2937, |
| "step": 14100 |
| }, |
| { |
| "epoch": 1.5310538844405972, |
| "grad_norm": 1.107203483581543, |
| "learning_rate": 1.2347976628435406e-05, |
| "loss": 0.2831, |
| "step": 14150 |
| }, |
| { |
| "epoch": 1.5364639688379138, |
| "grad_norm": 1.985102891921997, |
| "learning_rate": 1.2320926206448823e-05, |
| "loss": 0.3039, |
| "step": 14200 |
| }, |
| { |
| "epoch": 1.5418740532352304, |
| "grad_norm": 2.21325945854187, |
| "learning_rate": 1.229387578446224e-05, |
| "loss": 0.289, |
| "step": 14250 |
| }, |
| { |
| "epoch": 1.547284137632547, |
| "grad_norm": 1.7797402143478394, |
| "learning_rate": 1.2266825362475657e-05, |
| "loss": 0.2999, |
| "step": 14300 |
| }, |
| { |
| "epoch": 1.5526942220298636, |
| "grad_norm": 1.8672326803207397, |
| "learning_rate": 1.2239774940489074e-05, |
| "loss": 0.2992, |
| "step": 14350 |
| }, |
| { |
| "epoch": 1.5581043064271802, |
| "grad_norm": 1.652946949005127, |
| "learning_rate": 1.221272451850249e-05, |
| "loss": 0.3027, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.5635143908244968, |
| "grad_norm": 2.3132338523864746, |
| "learning_rate": 1.2185674096515908e-05, |
| "loss": 0.298, |
| "step": 14450 |
| }, |
| { |
| "epoch": 1.5689244752218134, |
| "grad_norm": 1.5479793548583984, |
| "learning_rate": 1.2158623674529324e-05, |
| "loss": 0.3063, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.57433455961913, |
| "grad_norm": 2.814436912536621, |
| "learning_rate": 1.2131573252542741e-05, |
| "loss": 0.2853, |
| "step": 14550 |
| }, |
| { |
| "epoch": 1.5797446440164467, |
| "grad_norm": 1.8235867023468018, |
| "learning_rate": 1.2104522830556158e-05, |
| "loss": 0.2854, |
| "step": 14600 |
| }, |
| { |
| "epoch": 1.5851547284137633, |
| "grad_norm": 2.052896499633789, |
| "learning_rate": 1.2077472408569575e-05, |
| "loss": 0.2952, |
| "step": 14650 |
| }, |
| { |
| "epoch": 1.5905648128110799, |
| "grad_norm": 1.8663949966430664, |
| "learning_rate": 1.2050421986582992e-05, |
| "loss": 0.2934, |
| "step": 14700 |
| }, |
| { |
| "epoch": 1.5959748972083965, |
| "grad_norm": 1.9430421590805054, |
| "learning_rate": 1.202337156459641e-05, |
| "loss": 0.294, |
| "step": 14750 |
| }, |
| { |
| "epoch": 1.601384981605713, |
| "grad_norm": 2.4206387996673584, |
| "learning_rate": 1.1996321142609826e-05, |
| "loss": 0.3056, |
| "step": 14800 |
| }, |
| { |
| "epoch": 1.6067950660030297, |
| "grad_norm": 2.826404333114624, |
| "learning_rate": 1.1969270720623243e-05, |
| "loss": 0.2945, |
| "step": 14850 |
| }, |
| { |
| "epoch": 1.6122051504003463, |
| "grad_norm": 1.83290433883667, |
| "learning_rate": 1.194222029863666e-05, |
| "loss": 0.3009, |
| "step": 14900 |
| }, |
| { |
| "epoch": 1.617615234797663, |
| "grad_norm": 1.7646769285202026, |
| "learning_rate": 1.1915169876650077e-05, |
| "loss": 0.283, |
| "step": 14950 |
| }, |
| { |
| "epoch": 1.6230253191949795, |
| "grad_norm": 1.6220637559890747, |
| "learning_rate": 1.1888119454663494e-05, |
| "loss": 0.3089, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.628435403592296, |
| "grad_norm": 1.905791163444519, |
| "learning_rate": 1.186106903267691e-05, |
| "loss": 0.289, |
| "step": 15050 |
| }, |
| { |
| "epoch": 1.6338454879896127, |
| "grad_norm": 1.5982850790023804, |
| "learning_rate": 1.1834018610690328e-05, |
| "loss": 0.2913, |
| "step": 15100 |
| }, |
| { |
| "epoch": 1.6392555723869293, |
| "grad_norm": 2.2029902935028076, |
| "learning_rate": 1.1806968188703745e-05, |
| "loss": 0.283, |
| "step": 15150 |
| }, |
| { |
| "epoch": 1.644665656784246, |
| "grad_norm": 2.1125001907348633, |
| "learning_rate": 1.1779917766717162e-05, |
| "loss": 0.2924, |
| "step": 15200 |
| }, |
| { |
| "epoch": 1.6500757411815625, |
| "grad_norm": 1.6996169090270996, |
| "learning_rate": 1.1752867344730578e-05, |
| "loss": 0.3027, |
| "step": 15250 |
| }, |
| { |
| "epoch": 1.6554858255788791, |
| "grad_norm": 1.54236900806427, |
| "learning_rate": 1.1725816922743995e-05, |
| "loss": 0.306, |
| "step": 15300 |
| }, |
| { |
| "epoch": 1.6608959099761957, |
| "grad_norm": 2.4978113174438477, |
| "learning_rate": 1.1698766500757412e-05, |
| "loss": 0.3037, |
| "step": 15350 |
| }, |
| { |
| "epoch": 1.6663059943735123, |
| "grad_norm": 1.9665488004684448, |
| "learning_rate": 1.167171607877083e-05, |
| "loss": 0.3019, |
| "step": 15400 |
| }, |
| { |
| "epoch": 1.671716078770829, |
| "grad_norm": 1.758878231048584, |
| "learning_rate": 1.1644665656784246e-05, |
| "loss": 0.3005, |
| "step": 15450 |
| }, |
| { |
| "epoch": 1.6771261631681456, |
| "grad_norm": 1.5586014986038208, |
| "learning_rate": 1.1617615234797663e-05, |
| "loss": 0.3004, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.6825362475654622, |
| "grad_norm": 1.176145315170288, |
| "learning_rate": 1.159056481281108e-05, |
| "loss": 0.2877, |
| "step": 15550 |
| }, |
| { |
| "epoch": 1.6879463319627788, |
| "grad_norm": 1.9588565826416016, |
| "learning_rate": 1.1563514390824497e-05, |
| "loss": 0.297, |
| "step": 15600 |
| }, |
| { |
| "epoch": 1.6933564163600954, |
| "grad_norm": 2.419602870941162, |
| "learning_rate": 1.1536463968837914e-05, |
| "loss": 0.303, |
| "step": 15650 |
| }, |
| { |
| "epoch": 1.698766500757412, |
| "grad_norm": 2.2711267471313477, |
| "learning_rate": 1.1509413546851331e-05, |
| "loss": 0.2947, |
| "step": 15700 |
| }, |
| { |
| "epoch": 1.7041765851547284, |
| "grad_norm": 2.223388433456421, |
| "learning_rate": 1.1482363124864748e-05, |
| "loss": 0.2741, |
| "step": 15750 |
| }, |
| { |
| "epoch": 1.709586669552045, |
| "grad_norm": 3.3137338161468506, |
| "learning_rate": 1.1455312702878165e-05, |
| "loss": 0.2979, |
| "step": 15800 |
| }, |
| { |
| "epoch": 1.7149967539493616, |
| "grad_norm": 1.5524086952209473, |
| "learning_rate": 1.1428262280891582e-05, |
| "loss": 0.2848, |
| "step": 15850 |
| }, |
| { |
| "epoch": 1.7204068383466782, |
| "grad_norm": 2.8329715728759766, |
| "learning_rate": 1.1401211858904999e-05, |
| "loss": 0.2835, |
| "step": 15900 |
| }, |
| { |
| "epoch": 1.7258169227439948, |
| "grad_norm": 1.8086459636688232, |
| "learning_rate": 1.1374161436918416e-05, |
| "loss": 0.2967, |
| "step": 15950 |
| }, |
| { |
| "epoch": 1.7312270071413114, |
| "grad_norm": 1.848944902420044, |
| "learning_rate": 1.1347652023371565e-05, |
| "loss": 0.2906, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.736637091538628, |
| "grad_norm": 1.8679676055908203, |
| "learning_rate": 1.1320601601384982e-05, |
| "loss": 0.308, |
| "step": 16050 |
| }, |
| { |
| "epoch": 1.7420471759359446, |
| "grad_norm": 2.902830123901367, |
| "learning_rate": 1.12935511793984e-05, |
| "loss": 0.2892, |
| "step": 16100 |
| }, |
| { |
| "epoch": 1.7474572603332612, |
| "grad_norm": 2.772718906402588, |
| "learning_rate": 1.1266500757411816e-05, |
| "loss": 0.2995, |
| "step": 16150 |
| }, |
| { |
| "epoch": 1.7528673447305778, |
| "grad_norm": 2.3687069416046143, |
| "learning_rate": 1.1239450335425233e-05, |
| "loss": 0.2875, |
| "step": 16200 |
| }, |
| { |
| "epoch": 1.7582774291278944, |
| "grad_norm": 2.0232176780700684, |
| "learning_rate": 1.121239991343865e-05, |
| "loss": 0.2951, |
| "step": 16250 |
| }, |
| { |
| "epoch": 1.763687513525211, |
| "grad_norm": 1.7384998798370361, |
| "learning_rate": 1.1185349491452067e-05, |
| "loss": 0.3026, |
| "step": 16300 |
| }, |
| { |
| "epoch": 1.7690975979225276, |
| "grad_norm": 2.728248357772827, |
| "learning_rate": 1.1158299069465484e-05, |
| "loss": 0.2839, |
| "step": 16350 |
| }, |
| { |
| "epoch": 1.7745076823198442, |
| "grad_norm": 2.3606762886047363, |
| "learning_rate": 1.11312486474789e-05, |
| "loss": 0.3013, |
| "step": 16400 |
| }, |
| { |
| "epoch": 1.7799177667171608, |
| "grad_norm": 1.9604283571243286, |
| "learning_rate": 1.1104198225492318e-05, |
| "loss": 0.2878, |
| "step": 16450 |
| }, |
| { |
| "epoch": 1.7853278511144772, |
| "grad_norm": 2.1552507877349854, |
| "learning_rate": 1.1077147803505735e-05, |
| "loss": 0.3008, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.7907379355117938, |
| "grad_norm": 1.8902772665023804, |
| "learning_rate": 1.1050097381519153e-05, |
| "loss": 0.2961, |
| "step": 16550 |
| }, |
| { |
| "epoch": 1.7961480199091104, |
| "grad_norm": 3.5575530529022217, |
| "learning_rate": 1.102304695953257e-05, |
| "loss": 0.291, |
| "step": 16600 |
| }, |
| { |
| "epoch": 1.801558104306427, |
| "grad_norm": 1.5154167413711548, |
| "learning_rate": 1.0995996537545987e-05, |
| "loss": 0.2858, |
| "step": 16650 |
| }, |
| { |
| "epoch": 1.8069681887037436, |
| "grad_norm": 2.2316699028015137, |
| "learning_rate": 1.0968946115559404e-05, |
| "loss": 0.2928, |
| "step": 16700 |
| }, |
| { |
| "epoch": 1.8123782731010603, |
| "grad_norm": 2.4230222702026367, |
| "learning_rate": 1.0941895693572821e-05, |
| "loss": 0.2979, |
| "step": 16750 |
| }, |
| { |
| "epoch": 1.8177883574983769, |
| "grad_norm": 2.080579996109009, |
| "learning_rate": 1.0914845271586238e-05, |
| "loss": 0.2957, |
| "step": 16800 |
| }, |
| { |
| "epoch": 1.8231984418956935, |
| "grad_norm": 1.679219365119934, |
| "learning_rate": 1.0887794849599655e-05, |
| "loss": 0.311, |
| "step": 16850 |
| }, |
| { |
| "epoch": 1.82860852629301, |
| "grad_norm": 3.401188611984253, |
| "learning_rate": 1.0860744427613072e-05, |
| "loss": 0.2906, |
| "step": 16900 |
| }, |
| { |
| "epoch": 1.8340186106903267, |
| "grad_norm": 2.529232978820801, |
| "learning_rate": 1.0833694005626489e-05, |
| "loss": 0.2924, |
| "step": 16950 |
| }, |
| { |
| "epoch": 1.8394286950876433, |
| "grad_norm": 2.5550222396850586, |
| "learning_rate": 1.0806643583639906e-05, |
| "loss": 0.3036, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.84483877948496, |
| "grad_norm": 2.1273419857025146, |
| "learning_rate": 1.0779593161653323e-05, |
| "loss": 0.2959, |
| "step": 17050 |
| }, |
| { |
| "epoch": 1.8502488638822765, |
| "grad_norm": 2.560131788253784, |
| "learning_rate": 1.075254273966674e-05, |
| "loss": 0.2903, |
| "step": 17100 |
| }, |
| { |
| "epoch": 1.855658948279593, |
| "grad_norm": 2.8354437351226807, |
| "learning_rate": 1.0725492317680156e-05, |
| "loss": 0.2945, |
| "step": 17150 |
| }, |
| { |
| "epoch": 1.8610690326769097, |
| "grad_norm": 1.8256155252456665, |
| "learning_rate": 1.0698441895693575e-05, |
| "loss": 0.3005, |
| "step": 17200 |
| }, |
| { |
| "epoch": 1.8664791170742263, |
| "grad_norm": 2.123786211013794, |
| "learning_rate": 1.0671391473706992e-05, |
| "loss": 0.2916, |
| "step": 17250 |
| }, |
| { |
| "epoch": 1.871889201471543, |
| "grad_norm": 2.604180097579956, |
| "learning_rate": 1.0644341051720409e-05, |
| "loss": 0.2676, |
| "step": 17300 |
| }, |
| { |
| "epoch": 1.8772992858688595, |
| "grad_norm": 2.563234806060791, |
| "learning_rate": 1.0617290629733826e-05, |
| "loss": 0.2913, |
| "step": 17350 |
| }, |
| { |
| "epoch": 1.8827093702661761, |
| "grad_norm": 1.5203748941421509, |
| "learning_rate": 1.0590240207747243e-05, |
| "loss": 0.2963, |
| "step": 17400 |
| }, |
| { |
| "epoch": 1.8881194546634927, |
| "grad_norm": 2.2248854637145996, |
| "learning_rate": 1.056318978576066e-05, |
| "loss": 0.2863, |
| "step": 17450 |
| }, |
| { |
| "epoch": 1.8935295390608093, |
| "grad_norm": 2.470536708831787, |
| "learning_rate": 1.0536139363774077e-05, |
| "loss": 0.2908, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.898939623458126, |
| "grad_norm": 2.862772226333618, |
| "learning_rate": 1.0509088941787494e-05, |
| "loss": 0.2777, |
| "step": 17550 |
| }, |
| { |
| "epoch": 1.9043497078554426, |
| "grad_norm": 1.9825083017349243, |
| "learning_rate": 1.048203851980091e-05, |
| "loss": 0.2876, |
| "step": 17600 |
| }, |
| { |
| "epoch": 1.9097597922527592, |
| "grad_norm": 2.3272595405578613, |
| "learning_rate": 1.0454988097814327e-05, |
| "loss": 0.2976, |
| "step": 17650 |
| }, |
| { |
| "epoch": 1.9151698766500758, |
| "grad_norm": 1.9145753383636475, |
| "learning_rate": 1.0427937675827744e-05, |
| "loss": 0.2906, |
| "step": 17700 |
| }, |
| { |
| "epoch": 1.9205799610473924, |
| "grad_norm": 2.9724373817443848, |
| "learning_rate": 1.0400887253841161e-05, |
| "loss": 0.2955, |
| "step": 17750 |
| }, |
| { |
| "epoch": 1.925990045444709, |
| "grad_norm": 2.3501229286193848, |
| "learning_rate": 1.0373836831854578e-05, |
| "loss": 0.3014, |
| "step": 17800 |
| }, |
| { |
| "epoch": 1.9314001298420256, |
| "grad_norm": 2.1227500438690186, |
| "learning_rate": 1.0346786409867995e-05, |
| "loss": 0.3032, |
| "step": 17850 |
| }, |
| { |
| "epoch": 1.9368102142393422, |
| "grad_norm": 2.2915961742401123, |
| "learning_rate": 1.0319735987881412e-05, |
| "loss": 0.3016, |
| "step": 17900 |
| }, |
| { |
| "epoch": 1.9422202986366588, |
| "grad_norm": 2.912987232208252, |
| "learning_rate": 1.0292685565894829e-05, |
| "loss": 0.3037, |
| "step": 17950 |
| }, |
| { |
| "epoch": 1.9476303830339754, |
| "grad_norm": 2.616238832473755, |
| "learning_rate": 1.0265635143908246e-05, |
| "loss": 0.3021, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.953040467431292, |
| "grad_norm": 2.370420455932617, |
| "learning_rate": 1.0239125730361394e-05, |
| "loss": 0.2975, |
| "step": 18050 |
| }, |
| { |
| "epoch": 1.9584505518286086, |
| "grad_norm": 3.267179012298584, |
| "learning_rate": 1.0212075308374811e-05, |
| "loss": 0.2798, |
| "step": 18100 |
| }, |
| { |
| "epoch": 1.9638606362259252, |
| "grad_norm": 3.6033174991607666, |
| "learning_rate": 1.0185024886388228e-05, |
| "loss": 0.2867, |
| "step": 18150 |
| }, |
| { |
| "epoch": 1.9692707206232418, |
| "grad_norm": 2.4550583362579346, |
| "learning_rate": 1.0157974464401645e-05, |
| "loss": 0.3088, |
| "step": 18200 |
| }, |
| { |
| "epoch": 1.9746808050205584, |
| "grad_norm": 1.8558053970336914, |
| "learning_rate": 1.0130924042415062e-05, |
| "loss": 0.2727, |
| "step": 18250 |
| }, |
| { |
| "epoch": 1.980090889417875, |
| "grad_norm": 2.0998899936676025, |
| "learning_rate": 1.0103873620428479e-05, |
| "loss": 0.2905, |
| "step": 18300 |
| }, |
| { |
| "epoch": 1.9855009738151916, |
| "grad_norm": 2.2105298042297363, |
| "learning_rate": 1.0076823198441896e-05, |
| "loss": 0.2912, |
| "step": 18350 |
| }, |
| { |
| "epoch": 1.9909110582125082, |
| "grad_norm": 1.970096230506897, |
| "learning_rate": 1.0049772776455313e-05, |
| "loss": 0.3149, |
| "step": 18400 |
| }, |
| { |
| "epoch": 1.9963211426098249, |
| "grad_norm": 2.061622381210327, |
| "learning_rate": 1.002272235446873e-05, |
| "loss": 0.2945, |
| "step": 18450 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.8672147876951564, |
| "eval_hamming_loss": 0.13278521230484366, |
| "eval_jaccard_score": 0.5006501704330262, |
| "eval_loss": 0.30577707290649414, |
| "eval_runtime": 64.9813, |
| "eval_samples_per_second": 487.586, |
| "eval_steps_per_second": 30.486, |
| "step": 18484 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 36968, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.945210424147251e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|