| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 10880, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09191176470588236, |
| "grad_norm": 2.861119031906128, |
| "learning_rate": 4.955882352941177e-05, |
| "loss": 1.678604736328125, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.18382352941176472, |
| "grad_norm": 1.6152201890945435, |
| "learning_rate": 4.9099264705882355e-05, |
| "loss": 0.554380111694336, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2757352941176471, |
| "grad_norm": 2.08105206489563, |
| "learning_rate": 4.863970588235294e-05, |
| "loss": 0.33380359649658203, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.36764705882352944, |
| "grad_norm": 1.5297737121582031, |
| "learning_rate": 4.818014705882353e-05, |
| "loss": 0.26274593353271486, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.45955882352941174, |
| "grad_norm": 1.867387294769287, |
| "learning_rate": 4.7720588235294124e-05, |
| "loss": 0.22210750579833985, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5514705882352942, |
| "grad_norm": 4.296945095062256, |
| "learning_rate": 4.7261029411764704e-05, |
| "loss": 0.1887100601196289, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6433823529411765, |
| "grad_norm": 1.9779675006866455, |
| "learning_rate": 4.68014705882353e-05, |
| "loss": 0.16664567947387696, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7352941176470589, |
| "grad_norm": 1.235213279724121, |
| "learning_rate": 4.6341911764705886e-05, |
| "loss": 0.16922021865844727, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8272058823529411, |
| "grad_norm": 2.484598398208618, |
| "learning_rate": 4.588235294117647e-05, |
| "loss": 0.15672155380249023, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.9191176470588235, |
| "grad_norm": 0.8935145139694214, |
| "learning_rate": 4.542279411764706e-05, |
| "loss": 0.1493326473236084, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.14647769927978516, |
| "eval_runtime": 4.2967, |
| "eval_samples_per_second": 2025.035, |
| "eval_steps_per_second": 63.304, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.0110294117647058, |
| "grad_norm": 0.8300992250442505, |
| "learning_rate": 4.496323529411765e-05, |
| "loss": 0.14999670028686524, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.1029411764705883, |
| "grad_norm": 0.6350510716438293, |
| "learning_rate": 4.4503676470588236e-05, |
| "loss": 0.12446197509765625, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.1948529411764706, |
| "grad_norm": 1.469220519065857, |
| "learning_rate": 4.404411764705882e-05, |
| "loss": 0.11504798889160156, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.2867647058823528, |
| "grad_norm": 0.9885977506637573, |
| "learning_rate": 4.358455882352942e-05, |
| "loss": 0.12503914833068847, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.3786764705882353, |
| "grad_norm": 1.2623215913772583, |
| "learning_rate": 4.3125000000000005e-05, |
| "loss": 0.1185552978515625, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.4705882352941178, |
| "grad_norm": 1.181009292602539, |
| "learning_rate": 4.2665441176470585e-05, |
| "loss": 0.1074635124206543, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.5625, |
| "grad_norm": 0.7625616788864136, |
| "learning_rate": 4.220588235294118e-05, |
| "loss": 0.11462491989135742, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.6544117647058822, |
| "grad_norm": 2.7165768146514893, |
| "learning_rate": 4.174632352941177e-05, |
| "loss": 0.11170839309692383, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.7463235294117647, |
| "grad_norm": 1.0306402444839478, |
| "learning_rate": 4.1286764705882354e-05, |
| "loss": 0.10664710998535157, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.8382352941176472, |
| "grad_norm": 1.0458590984344482, |
| "learning_rate": 4.082720588235294e-05, |
| "loss": 0.10655851364135742, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.9301470588235294, |
| "grad_norm": 1.751387357711792, |
| "learning_rate": 4.036764705882353e-05, |
| "loss": 0.11276634216308594, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.11876623332500458, |
| "eval_runtime": 3.7959, |
| "eval_samples_per_second": 2292.186, |
| "eval_steps_per_second": 71.656, |
| "step": 2176 |
| }, |
| { |
| "epoch": 2.0220588235294117, |
| "grad_norm": 0.9640232920646667, |
| "learning_rate": 3.9908088235294123e-05, |
| "loss": 0.10104022979736328, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.113970588235294, |
| "grad_norm": 1.449666976928711, |
| "learning_rate": 3.9448529411764704e-05, |
| "loss": 0.08359379768371582, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.2058823529411766, |
| "grad_norm": 0.781505286693573, |
| "learning_rate": 3.89889705882353e-05, |
| "loss": 0.08938695907592774, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.297794117647059, |
| "grad_norm": 0.9161350131034851, |
| "learning_rate": 3.8529411764705886e-05, |
| "loss": 0.09331055641174317, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.389705882352941, |
| "grad_norm": 0.34266597032546997, |
| "learning_rate": 3.806985294117647e-05, |
| "loss": 0.0942567253112793, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.4816176470588234, |
| "grad_norm": 0.3938254714012146, |
| "learning_rate": 3.761029411764706e-05, |
| "loss": 0.08378758430480956, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.5735294117647056, |
| "grad_norm": 0.8159363865852356, |
| "learning_rate": 3.715073529411765e-05, |
| "loss": 0.08920242309570313, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.6654411764705883, |
| "grad_norm": 0.7843156456947327, |
| "learning_rate": 3.6691176470588235e-05, |
| "loss": 0.09552728652954101, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.7573529411764706, |
| "grad_norm": 1.678454041481018, |
| "learning_rate": 3.623161764705882e-05, |
| "loss": 0.0881564712524414, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.849264705882353, |
| "grad_norm": 1.567854642868042, |
| "learning_rate": 3.577205882352942e-05, |
| "loss": 0.09041579246520996, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.9411764705882355, |
| "grad_norm": 0.587993860244751, |
| "learning_rate": 3.5312500000000005e-05, |
| "loss": 0.08352569580078124, |
| "step": 3200 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.11157828569412231, |
| "eval_runtime": 3.9136, |
| "eval_samples_per_second": 2223.252, |
| "eval_steps_per_second": 69.501, |
| "step": 3264 |
| }, |
| { |
| "epoch": 3.0330882352941178, |
| "grad_norm": 0.7346888184547424, |
| "learning_rate": 3.4852941176470585e-05, |
| "loss": 0.08925918579101562, |
| "step": 3300 |
| }, |
| { |
| "epoch": 3.125, |
| "grad_norm": 0.6136897206306458, |
| "learning_rate": 3.439338235294118e-05, |
| "loss": 0.07912126064300537, |
| "step": 3400 |
| }, |
| { |
| "epoch": 3.2169117647058822, |
| "grad_norm": 0.47108200192451477, |
| "learning_rate": 3.393382352941177e-05, |
| "loss": 0.07419106960296631, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.3088235294117645, |
| "grad_norm": 0.8382533192634583, |
| "learning_rate": 3.3474264705882354e-05, |
| "loss": 0.0670989227294922, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.400735294117647, |
| "grad_norm": 0.6706309914588928, |
| "learning_rate": 3.301470588235294e-05, |
| "loss": 0.06986721515655518, |
| "step": 3700 |
| }, |
| { |
| "epoch": 3.4926470588235294, |
| "grad_norm": 0.5485235452651978, |
| "learning_rate": 3.255514705882353e-05, |
| "loss": 0.07686973571777343, |
| "step": 3800 |
| }, |
| { |
| "epoch": 3.5845588235294117, |
| "grad_norm": 0.8460040092468262, |
| "learning_rate": 3.209558823529412e-05, |
| "loss": 0.07120684623718261, |
| "step": 3900 |
| }, |
| { |
| "epoch": 3.6764705882352944, |
| "grad_norm": 0.9563305974006653, |
| "learning_rate": 3.1636029411764704e-05, |
| "loss": 0.07464917659759522, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.7683823529411766, |
| "grad_norm": 0.6851525902748108, |
| "learning_rate": 3.11764705882353e-05, |
| "loss": 0.07348180770874023, |
| "step": 4100 |
| }, |
| { |
| "epoch": 3.860294117647059, |
| "grad_norm": 0.46768584847450256, |
| "learning_rate": 3.0716911764705886e-05, |
| "loss": 0.08051628112792969, |
| "step": 4200 |
| }, |
| { |
| "epoch": 3.952205882352941, |
| "grad_norm": 0.8145326375961304, |
| "learning_rate": 3.025735294117647e-05, |
| "loss": 0.0793468189239502, |
| "step": 4300 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.11373896896839142, |
| "eval_runtime": 3.8395, |
| "eval_samples_per_second": 2266.171, |
| "eval_steps_per_second": 70.842, |
| "step": 4352 |
| }, |
| { |
| "epoch": 4.044117647058823, |
| "grad_norm": 4.8013997077941895, |
| "learning_rate": 2.979779411764706e-05, |
| "loss": 0.06481593132019042, |
| "step": 4400 |
| }, |
| { |
| "epoch": 4.136029411764706, |
| "grad_norm": 1.51911199092865, |
| "learning_rate": 2.933823529411765e-05, |
| "loss": 0.05568636417388916, |
| "step": 4500 |
| }, |
| { |
| "epoch": 4.227941176470588, |
| "grad_norm": 1.1331921815872192, |
| "learning_rate": 2.8878676470588235e-05, |
| "loss": 0.06395863056182861, |
| "step": 4600 |
| }, |
| { |
| "epoch": 4.319852941176471, |
| "grad_norm": 1.9773746728897095, |
| "learning_rate": 2.8419117647058823e-05, |
| "loss": 0.05171878814697266, |
| "step": 4700 |
| }, |
| { |
| "epoch": 4.411764705882353, |
| "grad_norm": 0.720111608505249, |
| "learning_rate": 2.7959558823529414e-05, |
| "loss": 0.06219084739685059, |
| "step": 4800 |
| }, |
| { |
| "epoch": 4.5036764705882355, |
| "grad_norm": 1.243735671043396, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 0.06258386135101318, |
| "step": 4900 |
| }, |
| { |
| "epoch": 4.595588235294118, |
| "grad_norm": 0.7214698195457458, |
| "learning_rate": 2.704044117647059e-05, |
| "loss": 0.057212424278259275, |
| "step": 5000 |
| }, |
| { |
| "epoch": 4.6875, |
| "grad_norm": 2.4246177673339844, |
| "learning_rate": 2.658088235294118e-05, |
| "loss": 0.05896786212921143, |
| "step": 5100 |
| }, |
| { |
| "epoch": 4.779411764705882, |
| "grad_norm": 0.3699852228164673, |
| "learning_rate": 2.6121323529411767e-05, |
| "loss": 0.0625047254562378, |
| "step": 5200 |
| }, |
| { |
| "epoch": 4.8713235294117645, |
| "grad_norm": 0.8965820670127869, |
| "learning_rate": 2.566176470588235e-05, |
| "loss": 0.06457361221313476, |
| "step": 5300 |
| }, |
| { |
| "epoch": 4.963235294117647, |
| "grad_norm": 0.6348599791526794, |
| "learning_rate": 2.520220588235294e-05, |
| "loss": 0.05709341049194336, |
| "step": 5400 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 0.13100895285606384, |
| "eval_runtime": 3.7913, |
| "eval_samples_per_second": 2294.964, |
| "eval_steps_per_second": 71.742, |
| "step": 5440 |
| }, |
| { |
| "epoch": 5.055147058823529, |
| "grad_norm": 0.30243951082229614, |
| "learning_rate": 2.4742647058823532e-05, |
| "loss": 0.0436199951171875, |
| "step": 5500 |
| }, |
| { |
| "epoch": 5.147058823529412, |
| "grad_norm": 1.0563251972198486, |
| "learning_rate": 2.428308823529412e-05, |
| "loss": 0.03898259401321411, |
| "step": 5600 |
| }, |
| { |
| "epoch": 5.238970588235294, |
| "grad_norm": 0.3339505195617676, |
| "learning_rate": 2.3823529411764707e-05, |
| "loss": 0.03611770153045654, |
| "step": 5700 |
| }, |
| { |
| "epoch": 5.330882352941177, |
| "grad_norm": 3.47481107711792, |
| "learning_rate": 2.3363970588235295e-05, |
| "loss": 0.03747700929641724, |
| "step": 5800 |
| }, |
| { |
| "epoch": 5.422794117647059, |
| "grad_norm": 0.49956804513931274, |
| "learning_rate": 2.2904411764705882e-05, |
| "loss": 0.034790968894958495, |
| "step": 5900 |
| }, |
| { |
| "epoch": 5.514705882352941, |
| "grad_norm": 0.2137073427438736, |
| "learning_rate": 2.2444852941176473e-05, |
| "loss": 0.042610764503479004, |
| "step": 6000 |
| }, |
| { |
| "epoch": 5.606617647058823, |
| "grad_norm": 4.238280296325684, |
| "learning_rate": 2.198529411764706e-05, |
| "loss": 0.041733989715576174, |
| "step": 6100 |
| }, |
| { |
| "epoch": 5.698529411764706, |
| "grad_norm": 0.7751985192298889, |
| "learning_rate": 2.1525735294117648e-05, |
| "loss": 0.042198920249938966, |
| "step": 6200 |
| }, |
| { |
| "epoch": 5.790441176470588, |
| "grad_norm": 6.274240493774414, |
| "learning_rate": 2.1066176470588235e-05, |
| "loss": 0.035168659687042234, |
| "step": 6300 |
| }, |
| { |
| "epoch": 5.882352941176471, |
| "grad_norm": 0.8700118064880371, |
| "learning_rate": 2.0606617647058823e-05, |
| "loss": 0.04176306247711182, |
| "step": 6400 |
| }, |
| { |
| "epoch": 5.974264705882353, |
| "grad_norm": 0.40382614731788635, |
| "learning_rate": 2.0151654411764708e-05, |
| "loss": 0.03780954122543335, |
| "step": 6500 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 0.1548856645822525, |
| "eval_runtime": 3.8561, |
| "eval_samples_per_second": 2256.438, |
| "eval_steps_per_second": 70.538, |
| "step": 6528 |
| }, |
| { |
| "epoch": 6.0661764705882355, |
| "grad_norm": 0.35931289196014404, |
| "learning_rate": 1.9692095588235295e-05, |
| "loss": 0.026753320693969726, |
| "step": 6600 |
| }, |
| { |
| "epoch": 6.158088235294118, |
| "grad_norm": 1.0790654420852661, |
| "learning_rate": 1.9232536764705883e-05, |
| "loss": 0.022549192905426025, |
| "step": 6700 |
| }, |
| { |
| "epoch": 6.25, |
| "grad_norm": 0.39832767844200134, |
| "learning_rate": 1.8777573529411764e-05, |
| "loss": 0.02674192190170288, |
| "step": 6800 |
| }, |
| { |
| "epoch": 6.341911764705882, |
| "grad_norm": 0.38946613669395447, |
| "learning_rate": 1.8318014705882352e-05, |
| "loss": 0.024337658882141112, |
| "step": 6900 |
| }, |
| { |
| "epoch": 6.4338235294117645, |
| "grad_norm": 6.687967300415039, |
| "learning_rate": 1.7858455882352943e-05, |
| "loss": 0.02405022144317627, |
| "step": 7000 |
| }, |
| { |
| "epoch": 6.525735294117647, |
| "grad_norm": 1.1742165088653564, |
| "learning_rate": 1.739889705882353e-05, |
| "loss": 0.023499369621276855, |
| "step": 7100 |
| }, |
| { |
| "epoch": 6.617647058823529, |
| "grad_norm": 0.728435754776001, |
| "learning_rate": 1.693933823529412e-05, |
| "loss": 0.01860466957092285, |
| "step": 7200 |
| }, |
| { |
| "epoch": 6.709558823529412, |
| "grad_norm": 0.18539367616176605, |
| "learning_rate": 1.6479779411764705e-05, |
| "loss": 0.024487736225128173, |
| "step": 7300 |
| }, |
| { |
| "epoch": 6.801470588235294, |
| "grad_norm": 2.0757601261138916, |
| "learning_rate": 1.6020220588235296e-05, |
| "loss": 0.027930150032043456, |
| "step": 7400 |
| }, |
| { |
| "epoch": 6.893382352941177, |
| "grad_norm": 0.6962282657623291, |
| "learning_rate": 1.5560661764705883e-05, |
| "loss": 0.019371466636657717, |
| "step": 7500 |
| }, |
| { |
| "epoch": 6.985294117647059, |
| "grad_norm": 0.3877858519554138, |
| "learning_rate": 1.510110294117647e-05, |
| "loss": 0.023068771362304688, |
| "step": 7600 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 0.19883336126804352, |
| "eval_runtime": 3.8871, |
| "eval_samples_per_second": 2238.423, |
| "eval_steps_per_second": 69.975, |
| "step": 7616 |
| }, |
| { |
| "epoch": 7.077205882352941, |
| "grad_norm": 0.32462701201438904, |
| "learning_rate": 1.464154411764706e-05, |
| "loss": 0.01416821002960205, |
| "step": 7700 |
| }, |
| { |
| "epoch": 7.169117647058823, |
| "grad_norm": 0.70732182264328, |
| "learning_rate": 1.4181985294117647e-05, |
| "loss": 0.013301538228988647, |
| "step": 7800 |
| }, |
| { |
| "epoch": 7.261029411764706, |
| "grad_norm": 12.949718475341797, |
| "learning_rate": 1.3722426470588238e-05, |
| "loss": 0.01586754560470581, |
| "step": 7900 |
| }, |
| { |
| "epoch": 7.352941176470588, |
| "grad_norm": 1.7924553155899048, |
| "learning_rate": 1.3262867647058824e-05, |
| "loss": 0.019125467538833617, |
| "step": 8000 |
| }, |
| { |
| "epoch": 7.444852941176471, |
| "grad_norm": 0.45370689034461975, |
| "learning_rate": 1.2803308823529411e-05, |
| "loss": 0.017261466979980468, |
| "step": 8100 |
| }, |
| { |
| "epoch": 7.536764705882353, |
| "grad_norm": 0.24471713602542877, |
| "learning_rate": 1.2343750000000002e-05, |
| "loss": 0.016836028099060058, |
| "step": 8200 |
| }, |
| { |
| "epoch": 7.6286764705882355, |
| "grad_norm": 0.273219496011734, |
| "learning_rate": 1.1884191176470588e-05, |
| "loss": 0.014804782867431641, |
| "step": 8300 |
| }, |
| { |
| "epoch": 7.720588235294118, |
| "grad_norm": 0.27901849150657654, |
| "learning_rate": 1.1424632352941177e-05, |
| "loss": 0.017638254165649413, |
| "step": 8400 |
| }, |
| { |
| "epoch": 7.8125, |
| "grad_norm": 0.41847002506256104, |
| "learning_rate": 1.0965073529411766e-05, |
| "loss": 0.014013255834579469, |
| "step": 8500 |
| }, |
| { |
| "epoch": 7.904411764705882, |
| "grad_norm": 0.3298964500427246, |
| "learning_rate": 1.0505514705882353e-05, |
| "loss": 0.015006015300750733, |
| "step": 8600 |
| }, |
| { |
| "epoch": 7.9963235294117645, |
| "grad_norm": 0.5094680786132812, |
| "learning_rate": 1.0045955882352942e-05, |
| "loss": 0.016840940713882445, |
| "step": 8700 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.22593119740486145, |
| "eval_runtime": 3.8624, |
| "eval_samples_per_second": 2252.772, |
| "eval_steps_per_second": 70.423, |
| "step": 8704 |
| }, |
| { |
| "epoch": 8.088235294117647, |
| "grad_norm": 0.23575666546821594, |
| "learning_rate": 9.58639705882353e-06, |
| "loss": 0.011906511783599853, |
| "step": 8800 |
| }, |
| { |
| "epoch": 8.180147058823529, |
| "grad_norm": 0.39200881123542786, |
| "learning_rate": 9.126838235294117e-06, |
| "loss": 0.009779441356658935, |
| "step": 8900 |
| }, |
| { |
| "epoch": 8.272058823529411, |
| "grad_norm": 0.2954489588737488, |
| "learning_rate": 8.667279411764706e-06, |
| "loss": 0.011333670616149902, |
| "step": 9000 |
| }, |
| { |
| "epoch": 8.363970588235293, |
| "grad_norm": 0.1555805653333664, |
| "learning_rate": 8.207720588235294e-06, |
| "loss": 0.011691917181015015, |
| "step": 9100 |
| }, |
| { |
| "epoch": 8.455882352941176, |
| "grad_norm": 0.6293551921844482, |
| "learning_rate": 7.748161764705883e-06, |
| "loss": 0.010650770664215088, |
| "step": 9200 |
| }, |
| { |
| "epoch": 8.547794117647058, |
| "grad_norm": 0.47241711616516113, |
| "learning_rate": 7.288602941176471e-06, |
| "loss": 0.008451443314552307, |
| "step": 9300 |
| }, |
| { |
| "epoch": 8.639705882352942, |
| "grad_norm": 0.39692994952201843, |
| "learning_rate": 6.829044117647059e-06, |
| "loss": 0.01287778615951538, |
| "step": 9400 |
| }, |
| { |
| "epoch": 8.731617647058824, |
| "grad_norm": 4.867070198059082, |
| "learning_rate": 6.374080882352941e-06, |
| "loss": 0.012674452066421508, |
| "step": 9500 |
| }, |
| { |
| "epoch": 8.823529411764707, |
| "grad_norm": 0.20786941051483154, |
| "learning_rate": 5.9145220588235295e-06, |
| "loss": 0.009658980965614319, |
| "step": 9600 |
| }, |
| { |
| "epoch": 8.915441176470589, |
| "grad_norm": 0.24980570375919342, |
| "learning_rate": 5.454963235294118e-06, |
| "loss": 0.012473410367965699, |
| "step": 9700 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 0.2505253851413727, |
| "eval_runtime": 3.8579, |
| "eval_samples_per_second": 2255.375, |
| "eval_steps_per_second": 70.505, |
| "step": 9792 |
| }, |
| { |
| "epoch": 9.007352941176471, |
| "grad_norm": 0.34893837571144104, |
| "learning_rate": 4.995404411764706e-06, |
| "loss": 0.01137054443359375, |
| "step": 9800 |
| }, |
| { |
| "epoch": 9.099264705882353, |
| "grad_norm": 0.1509261131286621, |
| "learning_rate": 4.535845588235294e-06, |
| "loss": 0.009393535852432251, |
| "step": 9900 |
| }, |
| { |
| "epoch": 9.191176470588236, |
| "grad_norm": 0.4828801453113556, |
| "learning_rate": 4.076286764705883e-06, |
| "loss": 0.011060981750488282, |
| "step": 10000 |
| }, |
| { |
| "epoch": 9.283088235294118, |
| "grad_norm": 0.10140291601419449, |
| "learning_rate": 3.616727941176471e-06, |
| "loss": 0.008189416527748107, |
| "step": 10100 |
| }, |
| { |
| "epoch": 9.375, |
| "grad_norm": 0.13836342096328735, |
| "learning_rate": 3.1571691176470588e-06, |
| "loss": 0.00860303282737732, |
| "step": 10200 |
| }, |
| { |
| "epoch": 9.466911764705882, |
| "grad_norm": 0.3119546175003052, |
| "learning_rate": 2.6976102941176475e-06, |
| "loss": 0.008158923387527465, |
| "step": 10300 |
| }, |
| { |
| "epoch": 9.558823529411764, |
| "grad_norm": 0.7189019918441772, |
| "learning_rate": 2.2380514705882353e-06, |
| "loss": 0.007920079231262207, |
| "step": 10400 |
| }, |
| { |
| "epoch": 9.650735294117647, |
| "grad_norm": 0.28993985056877136, |
| "learning_rate": 1.7784926470588236e-06, |
| "loss": 0.00800090193748474, |
| "step": 10500 |
| }, |
| { |
| "epoch": 9.742647058823529, |
| "grad_norm": 0.6550254225730896, |
| "learning_rate": 1.3189338235294119e-06, |
| "loss": 0.008071759939193726, |
| "step": 10600 |
| }, |
| { |
| "epoch": 9.834558823529411, |
| "grad_norm": 0.47817108035087585, |
| "learning_rate": 8.593750000000001e-07, |
| "loss": 0.008685371279716492, |
| "step": 10700 |
| }, |
| { |
| "epoch": 9.926470588235293, |
| "grad_norm": 0.5251961350440979, |
| "learning_rate": 3.998161764705882e-07, |
| "loss": 0.007078754305839538, |
| "step": 10800 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 0.24180535972118378, |
| "eval_runtime": 3.9524, |
| "eval_samples_per_second": 2201.471, |
| "eval_steps_per_second": 68.82, |
| "step": 10880 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 10880, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.138963309056e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|