| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.9241877256317688, |
| "eval_steps": 25, |
| "global_step": 204, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.019253910950661854, |
| "grad_norm": 23.54662322998047, |
| "learning_rate": 0.00019901960784313727, |
| "loss": 9.4209, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.03850782190132371, |
| "grad_norm": 22.151025772094727, |
| "learning_rate": 0.00019803921568627454, |
| "loss": 9.3584, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.05776173285198556, |
| "grad_norm": 32.229759216308594, |
| "learning_rate": 0.00019705882352941177, |
| "loss": 9.1469, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.07701564380264742, |
| "grad_norm": 42.96324920654297, |
| "learning_rate": 0.000196078431372549, |
| "loss": 8.5595, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.09626955475330927, |
| "grad_norm": 32.40974044799805, |
| "learning_rate": 0.00019509803921568628, |
| "loss": 8.3043, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.11552346570397112, |
| "grad_norm": 32.838134765625, |
| "learning_rate": 0.00019411764705882354, |
| "loss": 8.1422, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.13477737665463296, |
| "grad_norm": 34.38292694091797, |
| "learning_rate": 0.0001931372549019608, |
| "loss": 7.7643, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.15403128760529483, |
| "grad_norm": 31.947425842285156, |
| "learning_rate": 0.00019215686274509807, |
| "loss": 7.4565, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.17328519855595667, |
| "grad_norm": 242.39166259765625, |
| "learning_rate": 0.0001911764705882353, |
| "loss": 7.436, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.19253910950661854, |
| "grad_norm": 25.68425750732422, |
| "learning_rate": 0.00019019607843137254, |
| "loss": 7.1307, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.21179302045728038, |
| "grad_norm": 24.717641830444336, |
| "learning_rate": 0.0001892156862745098, |
| "loss": 7.1206, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.23104693140794225, |
| "grad_norm": 36.47980880737305, |
| "learning_rate": 0.00018823529411764707, |
| "loss": 6.6912, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.2503008423586041, |
| "grad_norm": 28.181612014770508, |
| "learning_rate": 0.00018725490196078433, |
| "loss": 6.6547, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.2695547533092659, |
| "grad_norm": 24.55516242980957, |
| "learning_rate": 0.00018627450980392157, |
| "loss": 6.9486, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.2888086642599278, |
| "grad_norm": 32.426963806152344, |
| "learning_rate": 0.00018529411764705883, |
| "loss": 7.1069, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.30806257521058966, |
| "grad_norm": 20.413976669311523, |
| "learning_rate": 0.00018431372549019607, |
| "loss": 6.6628, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.32731648616125153, |
| "grad_norm": 28.58907699584961, |
| "learning_rate": 0.00018333333333333334, |
| "loss": 6.5333, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.34657039711191334, |
| "grad_norm": 24.02996253967285, |
| "learning_rate": 0.0001823529411764706, |
| "loss": 6.5981, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.3658243080625752, |
| "grad_norm": 23.250669479370117, |
| "learning_rate": 0.00018137254901960786, |
| "loss": 6.4779, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.3850782190132371, |
| "grad_norm": 15.006091117858887, |
| "learning_rate": 0.0001803921568627451, |
| "loss": 6.6096, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.4043321299638989, |
| "grad_norm": 16.560985565185547, |
| "learning_rate": 0.00017941176470588236, |
| "loss": 6.6496, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.42358604091456076, |
| "grad_norm": 31.329875946044922, |
| "learning_rate": 0.00017843137254901963, |
| "loss": 6.9627, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.4428399518652226, |
| "grad_norm": 12.381958961486816, |
| "learning_rate": 0.00017745098039215687, |
| "loss": 6.398, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.4620938628158845, |
| "grad_norm": 9.271923065185547, |
| "learning_rate": 0.00017647058823529413, |
| "loss": 6.6, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.4813477737665463, |
| "grad_norm": 12.544185638427734, |
| "learning_rate": 0.00017549019607843137, |
| "loss": 6.4684, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.4813477737665463, |
| "eval_clap": 0.09883298724889755, |
| "eval_loss": 6.00625467300415, |
| "eval_runtime": 166.3531, |
| "eval_samples_per_second": 0.096, |
| "eval_steps_per_second": 0.096, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.5006016847172082, |
| "grad_norm": 11.769013404846191, |
| "learning_rate": 0.00017450980392156863, |
| "loss": 6.5248, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.51985559566787, |
| "grad_norm": 11.039627075195312, |
| "learning_rate": 0.0001735294117647059, |
| "loss": 6.6403, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.5391095066185319, |
| "grad_norm": 17.4042911529541, |
| "learning_rate": 0.00017254901960784316, |
| "loss": 6.8092, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.5583634175691937, |
| "grad_norm": 12.926351547241211, |
| "learning_rate": 0.0001715686274509804, |
| "loss": 6.5886, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.5776173285198556, |
| "grad_norm": 12.865156173706055, |
| "learning_rate": 0.00017058823529411766, |
| "loss": 6.6176, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5968712394705175, |
| "grad_norm": 15.517515182495117, |
| "learning_rate": 0.0001696078431372549, |
| "loss": 6.4096, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.6161251504211793, |
| "grad_norm": 12.356785774230957, |
| "learning_rate": 0.00016862745098039216, |
| "loss": 6.4528, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.6353790613718412, |
| "grad_norm": 15.226251602172852, |
| "learning_rate": 0.00016764705882352942, |
| "loss": 6.3188, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.6546329723225031, |
| "grad_norm": 13.221582412719727, |
| "learning_rate": 0.0001666666666666667, |
| "loss": 6.542, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.6738868832731648, |
| "grad_norm": 13.414304733276367, |
| "learning_rate": 0.00016568627450980395, |
| "loss": 6.4272, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.6931407942238267, |
| "grad_norm": 27.81321907043457, |
| "learning_rate": 0.0001647058823529412, |
| "loss": 6.7035, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.7123947051744886, |
| "grad_norm": 17.882911682128906, |
| "learning_rate": 0.00016372549019607843, |
| "loss": 6.6117, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.7316486161251504, |
| "grad_norm": 10.675613403320312, |
| "learning_rate": 0.0001627450980392157, |
| "loss": 6.4818, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.7509025270758123, |
| "grad_norm": 11.32511043548584, |
| "learning_rate": 0.00016176470588235295, |
| "loss": 6.4717, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.7701564380264742, |
| "grad_norm": 13.292048454284668, |
| "learning_rate": 0.00016078431372549022, |
| "loss": 6.4119, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.789410348977136, |
| "grad_norm": 9.824177742004395, |
| "learning_rate": 0.00015980392156862746, |
| "loss": 6.6399, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.8086642599277978, |
| "grad_norm": 18.48476791381836, |
| "learning_rate": 0.0001588235294117647, |
| "loss": 6.4116, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.8279181708784596, |
| "grad_norm": 10.409250259399414, |
| "learning_rate": 0.00015784313725490196, |
| "loss": 6.4832, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.8471720818291215, |
| "grad_norm": 18.297466278076172, |
| "learning_rate": 0.00015686274509803922, |
| "loss": 6.308, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.8664259927797834, |
| "grad_norm": 12.408952713012695, |
| "learning_rate": 0.00015588235294117648, |
| "loss": 6.3373, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.8856799037304453, |
| "grad_norm": 12.280571937561035, |
| "learning_rate": 0.00015490196078431375, |
| "loss": 6.3173, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.9049338146811071, |
| "grad_norm": 12.348167419433594, |
| "learning_rate": 0.00015392156862745098, |
| "loss": 6.2873, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.924187725631769, |
| "grad_norm": 28.005126953125, |
| "learning_rate": 0.00015294117647058822, |
| "loss": 6.7117, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.9434416365824309, |
| "grad_norm": 16.248571395874023, |
| "learning_rate": 0.00015196078431372549, |
| "loss": 6.3493, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.9626955475330926, |
| "grad_norm": 19.102869033813477, |
| "learning_rate": 0.00015098039215686275, |
| "loss": 6.4209, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.9626955475330926, |
| "eval_clap": 0.13957397639751434, |
| "eval_loss": 6.070012092590332, |
| "eval_runtime": 165.6113, |
| "eval_samples_per_second": 0.097, |
| "eval_steps_per_second": 0.097, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.9819494584837545, |
| "grad_norm": 6.675487995147705, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 6.1695, |
| "step": 51 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 14.88092041015625, |
| "learning_rate": 0.00014901960784313728, |
| "loss": 5.6169, |
| "step": 52 |
| }, |
| { |
| "epoch": 1.0192539109506618, |
| "grad_norm": 19.78269386291504, |
| "learning_rate": 0.00014803921568627451, |
| "loss": 6.5455, |
| "step": 53 |
| }, |
| { |
| "epoch": 1.0385078219013237, |
| "grad_norm": 7.873740196228027, |
| "learning_rate": 0.00014705882352941178, |
| "loss": 6.3154, |
| "step": 54 |
| }, |
| { |
| "epoch": 1.0577617328519855, |
| "grad_norm": 10.514632225036621, |
| "learning_rate": 0.00014607843137254902, |
| "loss": 6.5085, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.0770156438026475, |
| "grad_norm": 10.021757125854492, |
| "learning_rate": 0.00014509803921568628, |
| "loss": 6.5109, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.0962695547533092, |
| "grad_norm": 8.690667152404785, |
| "learning_rate": 0.00014411764705882354, |
| "loss": 6.5515, |
| "step": 57 |
| }, |
| { |
| "epoch": 1.1155234657039712, |
| "grad_norm": 12.78662109375, |
| "learning_rate": 0.00014313725490196078, |
| "loss": 6.5425, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.134777376654633, |
| "grad_norm": 10.592965126037598, |
| "learning_rate": 0.00014215686274509804, |
| "loss": 6.5105, |
| "step": 59 |
| }, |
| { |
| "epoch": 1.154031287605295, |
| "grad_norm": 7.947122573852539, |
| "learning_rate": 0.0001411764705882353, |
| "loss": 6.6142, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.1732851985559567, |
| "grad_norm": 6.823319911956787, |
| "learning_rate": 0.00014019607843137255, |
| "loss": 6.5339, |
| "step": 61 |
| }, |
| { |
| "epoch": 1.1925391095066185, |
| "grad_norm": 16.670989990234375, |
| "learning_rate": 0.0001392156862745098, |
| "loss": 6.3022, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.2117930204572804, |
| "grad_norm": 20.09317398071289, |
| "learning_rate": 0.00013823529411764707, |
| "loss": 6.0779, |
| "step": 63 |
| }, |
| { |
| "epoch": 1.2310469314079422, |
| "grad_norm": 8.030014991760254, |
| "learning_rate": 0.0001372549019607843, |
| "loss": 6.3284, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.2503008423586042, |
| "grad_norm": 10.324827194213867, |
| "learning_rate": 0.00013627450980392157, |
| "loss": 6.4022, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.269554753309266, |
| "grad_norm": 29.070960998535156, |
| "learning_rate": 0.00013529411764705884, |
| "loss": 6.7835, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.288808664259928, |
| "grad_norm": 17.838394165039062, |
| "learning_rate": 0.00013431372549019608, |
| "loss": 6.5344, |
| "step": 67 |
| }, |
| { |
| "epoch": 1.3080625752105897, |
| "grad_norm": 10.388354301452637, |
| "learning_rate": 0.00013333333333333334, |
| "loss": 6.3438, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.3273164861612514, |
| "grad_norm": 9.607653617858887, |
| "learning_rate": 0.0001323529411764706, |
| "loss": 6.4325, |
| "step": 69 |
| }, |
| { |
| "epoch": 1.3465703971119134, |
| "grad_norm": 9.639688491821289, |
| "learning_rate": 0.00013137254901960784, |
| "loss": 6.3907, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.3658243080625752, |
| "grad_norm": 9.424043655395508, |
| "learning_rate": 0.0001303921568627451, |
| "loss": 6.605, |
| "step": 71 |
| }, |
| { |
| "epoch": 1.3850782190132371, |
| "grad_norm": 8.21303653717041, |
| "learning_rate": 0.00012941176470588237, |
| "loss": 6.6275, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.404332129963899, |
| "grad_norm": 10.479741096496582, |
| "learning_rate": 0.00012843137254901963, |
| "loss": 6.4801, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.4235860409145609, |
| "grad_norm": 21.424253463745117, |
| "learning_rate": 0.00012745098039215687, |
| "loss": 6.3391, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.4428399518652226, |
| "grad_norm": 6.5513224601745605, |
| "learning_rate": 0.0001264705882352941, |
| "loss": 6.7252, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.4428399518652226, |
| "eval_clap": 0.10309316217899323, |
| "eval_loss": 6.036521911621094, |
| "eval_runtime": 165.4554, |
| "eval_samples_per_second": 0.097, |
| "eval_steps_per_second": 0.097, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.4620938628158844, |
| "grad_norm": 32.52528762817383, |
| "learning_rate": 0.00012549019607843137, |
| "loss": 6.1922, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.4813477737665464, |
| "grad_norm": 23.51795196533203, |
| "learning_rate": 0.00012450980392156863, |
| "loss": 6.3506, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.5006016847172083, |
| "grad_norm": 10.925686836242676, |
| "learning_rate": 0.0001235294117647059, |
| "loss": 6.4783, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.5198555956678699, |
| "grad_norm": 7.924820899963379, |
| "learning_rate": 0.00012254901960784316, |
| "loss": 6.6288, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.5391095066185319, |
| "grad_norm": 6.946601390838623, |
| "learning_rate": 0.00012156862745098039, |
| "loss": 6.4085, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.5583634175691938, |
| "grad_norm": 10.120043754577637, |
| "learning_rate": 0.00012058823529411765, |
| "loss": 6.4667, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.5776173285198556, |
| "grad_norm": 9.635017395019531, |
| "learning_rate": 0.0001196078431372549, |
| "loss": 6.3742, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.5968712394705173, |
| "grad_norm": 6.578627586364746, |
| "learning_rate": 0.00011862745098039216, |
| "loss": 6.1956, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.6161251504211793, |
| "grad_norm": 18.30640983581543, |
| "learning_rate": 0.00011764705882352942, |
| "loss": 6.4804, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.6353790613718413, |
| "grad_norm": 11.166876792907715, |
| "learning_rate": 0.00011666666666666668, |
| "loss": 6.4495, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.654632972322503, |
| "grad_norm": 8.15738582611084, |
| "learning_rate": 0.00011568627450980394, |
| "loss": 6.1371, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.6738868832731648, |
| "grad_norm": 9.473989486694336, |
| "learning_rate": 0.00011470588235294118, |
| "loss": 6.366, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.6931407942238268, |
| "grad_norm": 16.634380340576172, |
| "learning_rate": 0.00011372549019607843, |
| "loss": 6.1748, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.7123947051744886, |
| "grad_norm": 20.92518424987793, |
| "learning_rate": 0.0001127450980392157, |
| "loss": 6.0918, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.7316486161251503, |
| "grad_norm": 10.186667442321777, |
| "learning_rate": 0.00011176470588235294, |
| "loss": 6.1072, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.7509025270758123, |
| "grad_norm": 21.300180435180664, |
| "learning_rate": 0.00011078431372549021, |
| "loss": 6.724, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.7701564380264743, |
| "grad_norm": 17.833845138549805, |
| "learning_rate": 0.00010980392156862746, |
| "loss": 6.2231, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.789410348977136, |
| "grad_norm": 12.850127220153809, |
| "learning_rate": 0.0001088235294117647, |
| "loss": 6.4846, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.8086642599277978, |
| "grad_norm": 16.229764938354492, |
| "learning_rate": 0.00010784313725490196, |
| "loss": 6.6046, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.8279181708784598, |
| "grad_norm": 41.6049690246582, |
| "learning_rate": 0.00010686274509803922, |
| "loss": 6.5044, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.8471720818291215, |
| "grad_norm": 8.0320463180542, |
| "learning_rate": 0.00010588235294117647, |
| "loss": 6.4836, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.8664259927797833, |
| "grad_norm": 19.129127502441406, |
| "learning_rate": 0.00010490196078431374, |
| "loss": 6.1962, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.8856799037304453, |
| "grad_norm": 14.464997291564941, |
| "learning_rate": 0.00010392156862745099, |
| "loss": 6.2694, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.9049338146811072, |
| "grad_norm": 25.245752334594727, |
| "learning_rate": 0.00010294117647058823, |
| "loss": 6.0148, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.924187725631769, |
| "grad_norm": 12.66399097442627, |
| "learning_rate": 0.00010196078431372549, |
| "loss": 6.1879, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.924187725631769, |
| "eval_clap": 0.12328307330608368, |
| "eval_loss": 5.896579742431641, |
| "eval_runtime": 165.5834, |
| "eval_samples_per_second": 0.097, |
| "eval_steps_per_second": 0.097, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.9434416365824307, |
| "grad_norm": 12.162952423095703, |
| "learning_rate": 0.00010098039215686274, |
| "loss": 6.1875, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.9626955475330927, |
| "grad_norm": 16.754629135131836, |
| "learning_rate": 0.0001, |
| "loss": 6.5483, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.9819494584837545, |
| "grad_norm": 9.804841995239258, |
| "learning_rate": 9.901960784313727e-05, |
| "loss": 6.0631, |
| "step": 103 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 26.169551849365234, |
| "learning_rate": 9.80392156862745e-05, |
| "loss": 6.3384, |
| "step": 104 |
| }, |
| { |
| "epoch": 2.019253910950662, |
| "grad_norm": 22.054380416870117, |
| "learning_rate": 9.705882352941177e-05, |
| "loss": 6.5192, |
| "step": 105 |
| }, |
| { |
| "epoch": 2.0385078219013235, |
| "grad_norm": 13.319371223449707, |
| "learning_rate": 9.607843137254903e-05, |
| "loss": 6.1904, |
| "step": 106 |
| }, |
| { |
| "epoch": 2.0577617328519855, |
| "grad_norm": 13.158707618713379, |
| "learning_rate": 9.509803921568627e-05, |
| "loss": 6.4906, |
| "step": 107 |
| }, |
| { |
| "epoch": 2.0770156438026475, |
| "grad_norm": 7.972289562225342, |
| "learning_rate": 9.411764705882353e-05, |
| "loss": 6.4551, |
| "step": 108 |
| }, |
| { |
| "epoch": 2.0962695547533094, |
| "grad_norm": 14.052528381347656, |
| "learning_rate": 9.313725490196079e-05, |
| "loss": 6.2028, |
| "step": 109 |
| }, |
| { |
| "epoch": 2.115523465703971, |
| "grad_norm": 21.128631591796875, |
| "learning_rate": 9.215686274509804e-05, |
| "loss": 6.121, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.134777376654633, |
| "grad_norm": 9.11488151550293, |
| "learning_rate": 9.11764705882353e-05, |
| "loss": 6.559, |
| "step": 111 |
| }, |
| { |
| "epoch": 2.154031287605295, |
| "grad_norm": 10.081767082214355, |
| "learning_rate": 9.019607843137255e-05, |
| "loss": 6.4236, |
| "step": 112 |
| }, |
| { |
| "epoch": 2.1732851985559565, |
| "grad_norm": 7.397235870361328, |
| "learning_rate": 8.921568627450981e-05, |
| "loss": 6.5415, |
| "step": 113 |
| }, |
| { |
| "epoch": 2.1925391095066185, |
| "grad_norm": 9.652939796447754, |
| "learning_rate": 8.823529411764706e-05, |
| "loss": 6.3744, |
| "step": 114 |
| }, |
| { |
| "epoch": 2.2117930204572804, |
| "grad_norm": 12.823005676269531, |
| "learning_rate": 8.725490196078432e-05, |
| "loss": 5.9683, |
| "step": 115 |
| }, |
| { |
| "epoch": 2.2310469314079424, |
| "grad_norm": 9.981169700622559, |
| "learning_rate": 8.627450980392158e-05, |
| "loss": 6.2714, |
| "step": 116 |
| }, |
| { |
| "epoch": 2.250300842358604, |
| "grad_norm": 11.026590347290039, |
| "learning_rate": 8.529411764705883e-05, |
| "loss": 6.1287, |
| "step": 117 |
| }, |
| { |
| "epoch": 2.269554753309266, |
| "grad_norm": 14.469505310058594, |
| "learning_rate": 8.431372549019608e-05, |
| "loss": 6.2634, |
| "step": 118 |
| }, |
| { |
| "epoch": 2.288808664259928, |
| "grad_norm": 10.639300346374512, |
| "learning_rate": 8.333333333333334e-05, |
| "loss": 6.1014, |
| "step": 119 |
| }, |
| { |
| "epoch": 2.30806257521059, |
| "grad_norm": 10.407938003540039, |
| "learning_rate": 8.23529411764706e-05, |
| "loss": 6.2487, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.3273164861612514, |
| "grad_norm": 18.310867309570312, |
| "learning_rate": 8.137254901960785e-05, |
| "loss": 6.025, |
| "step": 121 |
| }, |
| { |
| "epoch": 2.3465703971119134, |
| "grad_norm": 13.314108848571777, |
| "learning_rate": 8.039215686274511e-05, |
| "loss": 6.1319, |
| "step": 122 |
| }, |
| { |
| "epoch": 2.3658243080625754, |
| "grad_norm": 12.528412818908691, |
| "learning_rate": 7.941176470588235e-05, |
| "loss": 6.27, |
| "step": 123 |
| }, |
| { |
| "epoch": 2.385078219013237, |
| "grad_norm": 10.71603775024414, |
| "learning_rate": 7.843137254901961e-05, |
| "loss": 6.4118, |
| "step": 124 |
| }, |
| { |
| "epoch": 2.404332129963899, |
| "grad_norm": 8.234016418457031, |
| "learning_rate": 7.745098039215687e-05, |
| "loss": 6.3642, |
| "step": 125 |
| }, |
| { |
| "epoch": 2.404332129963899, |
| "eval_clap": 0.10650094598531723, |
| "eval_loss": 6.806448936462402, |
| "eval_runtime": 165.8182, |
| "eval_samples_per_second": 0.096, |
| "eval_steps_per_second": 0.096, |
| "step": 125 |
| }, |
| { |
| "epoch": 2.423586040914561, |
| "grad_norm": 13.84628963470459, |
| "learning_rate": 7.647058823529411e-05, |
| "loss": 6.0872, |
| "step": 126 |
| }, |
| { |
| "epoch": 2.4428399518652224, |
| "grad_norm": 7.576101779937744, |
| "learning_rate": 7.549019607843137e-05, |
| "loss": 6.3515, |
| "step": 127 |
| }, |
| { |
| "epoch": 2.4620938628158844, |
| "grad_norm": 9.205301284790039, |
| "learning_rate": 7.450980392156864e-05, |
| "loss": 6.0883, |
| "step": 128 |
| }, |
| { |
| "epoch": 2.4813477737665464, |
| "grad_norm": 8.85059928894043, |
| "learning_rate": 7.352941176470589e-05, |
| "loss": 5.824, |
| "step": 129 |
| }, |
| { |
| "epoch": 2.5006016847172083, |
| "grad_norm": 6.963297367095947, |
| "learning_rate": 7.254901960784314e-05, |
| "loss": 6.4633, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.51985559566787, |
| "grad_norm": 6.612102508544922, |
| "learning_rate": 7.156862745098039e-05, |
| "loss": 6.3979, |
| "step": 131 |
| }, |
| { |
| "epoch": 2.539109506618532, |
| "grad_norm": 11.322911262512207, |
| "learning_rate": 7.058823529411765e-05, |
| "loss": 6.2103, |
| "step": 132 |
| }, |
| { |
| "epoch": 2.558363417569194, |
| "grad_norm": 21.0396671295166, |
| "learning_rate": 6.96078431372549e-05, |
| "loss": 5.6772, |
| "step": 133 |
| }, |
| { |
| "epoch": 2.577617328519856, |
| "grad_norm": 13.040122985839844, |
| "learning_rate": 6.862745098039216e-05, |
| "loss": 6.0072, |
| "step": 134 |
| }, |
| { |
| "epoch": 2.5968712394705173, |
| "grad_norm": 13.392056465148926, |
| "learning_rate": 6.764705882352942e-05, |
| "loss": 6.0408, |
| "step": 135 |
| }, |
| { |
| "epoch": 2.6161251504211793, |
| "grad_norm": 9.345407485961914, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 6.345, |
| "step": 136 |
| }, |
| { |
| "epoch": 2.6353790613718413, |
| "grad_norm": 9.068965911865234, |
| "learning_rate": 6.568627450980392e-05, |
| "loss": 6.0518, |
| "step": 137 |
| }, |
| { |
| "epoch": 2.654632972322503, |
| "grad_norm": 9.924796104431152, |
| "learning_rate": 6.470588235294118e-05, |
| "loss": 6.404, |
| "step": 138 |
| }, |
| { |
| "epoch": 2.673886883273165, |
| "grad_norm": 11.512860298156738, |
| "learning_rate": 6.372549019607843e-05, |
| "loss": 5.849, |
| "step": 139 |
| }, |
| { |
| "epoch": 2.693140794223827, |
| "grad_norm": 9.558600425720215, |
| "learning_rate": 6.274509803921569e-05, |
| "loss": 6.0751, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.7123947051744883, |
| "grad_norm": 14.465291976928711, |
| "learning_rate": 6.176470588235295e-05, |
| "loss": 5.5432, |
| "step": 141 |
| }, |
| { |
| "epoch": 2.7316486161251503, |
| "grad_norm": 14.843960762023926, |
| "learning_rate": 6.078431372549019e-05, |
| "loss": 5.8858, |
| "step": 142 |
| }, |
| { |
| "epoch": 2.7509025270758123, |
| "grad_norm": 8.04920768737793, |
| "learning_rate": 5.980392156862745e-05, |
| "loss": 5.8131, |
| "step": 143 |
| }, |
| { |
| "epoch": 2.7701564380264743, |
| "grad_norm": 9.71105670928955, |
| "learning_rate": 5.882352941176471e-05, |
| "loss": 5.9374, |
| "step": 144 |
| }, |
| { |
| "epoch": 2.7894103489771362, |
| "grad_norm": 5.949017524719238, |
| "learning_rate": 5.784313725490197e-05, |
| "loss": 6.4545, |
| "step": 145 |
| }, |
| { |
| "epoch": 2.808664259927798, |
| "grad_norm": 7.233414649963379, |
| "learning_rate": 5.6862745098039215e-05, |
| "loss": 6.1215, |
| "step": 146 |
| }, |
| { |
| "epoch": 2.8279181708784598, |
| "grad_norm": 9.445034980773926, |
| "learning_rate": 5.588235294117647e-05, |
| "loss": 5.7711, |
| "step": 147 |
| }, |
| { |
| "epoch": 2.8471720818291217, |
| "grad_norm": 6.351881980895996, |
| "learning_rate": 5.490196078431373e-05, |
| "loss": 6.3073, |
| "step": 148 |
| }, |
| { |
| "epoch": 2.8664259927797833, |
| "grad_norm": 5.955877304077148, |
| "learning_rate": 5.392156862745098e-05, |
| "loss": 6.2675, |
| "step": 149 |
| }, |
| { |
| "epoch": 2.8856799037304453, |
| "grad_norm": 7.2687764167785645, |
| "learning_rate": 5.294117647058824e-05, |
| "loss": 6.2382, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.8856799037304453, |
| "eval_clap": 0.07656023651361465, |
| "eval_loss": 6.118464469909668, |
| "eval_runtime": 165.7635, |
| "eval_samples_per_second": 0.097, |
| "eval_steps_per_second": 0.097, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.9049338146811072, |
| "grad_norm": 7.581653594970703, |
| "learning_rate": 5.1960784313725495e-05, |
| "loss": 6.1951, |
| "step": 151 |
| }, |
| { |
| "epoch": 2.9241877256317688, |
| "grad_norm": 5.309889793395996, |
| "learning_rate": 5.0980392156862745e-05, |
| "loss": 6.1416, |
| "step": 152 |
| }, |
| { |
| "epoch": 2.9434416365824307, |
| "grad_norm": 10.804561614990234, |
| "learning_rate": 5e-05, |
| "loss": 6.4203, |
| "step": 153 |
| }, |
| { |
| "epoch": 2.9626955475330927, |
| "grad_norm": 7.452890872955322, |
| "learning_rate": 4.901960784313725e-05, |
| "loss": 6.3695, |
| "step": 154 |
| }, |
| { |
| "epoch": 2.9819494584837543, |
| "grad_norm": 7.373142719268799, |
| "learning_rate": 4.803921568627452e-05, |
| "loss": 6.0469, |
| "step": 155 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 6.503188610076904, |
| "learning_rate": 4.705882352941177e-05, |
| "loss": 5.5774, |
| "step": 156 |
| }, |
| { |
| "epoch": 3.019253910950662, |
| "grad_norm": 6.571235656738281, |
| "learning_rate": 4.607843137254902e-05, |
| "loss": 6.3784, |
| "step": 157 |
| }, |
| { |
| "epoch": 3.0385078219013235, |
| "grad_norm": 6.059790134429932, |
| "learning_rate": 4.5098039215686275e-05, |
| "loss": 6.2638, |
| "step": 158 |
| }, |
| { |
| "epoch": 3.0577617328519855, |
| "grad_norm": 7.978560447692871, |
| "learning_rate": 4.411764705882353e-05, |
| "loss": 6.2388, |
| "step": 159 |
| }, |
| { |
| "epoch": 3.0770156438026475, |
| "grad_norm": 4.5174479484558105, |
| "learning_rate": 4.313725490196079e-05, |
| "loss": 6.1811, |
| "step": 160 |
| }, |
| { |
| "epoch": 3.0962695547533094, |
| "grad_norm": 16.497093200683594, |
| "learning_rate": 4.215686274509804e-05, |
| "loss": 5.8567, |
| "step": 161 |
| }, |
| { |
| "epoch": 3.115523465703971, |
| "grad_norm": 10.036762237548828, |
| "learning_rate": 4.11764705882353e-05, |
| "loss": 5.7851, |
| "step": 162 |
| }, |
| { |
| "epoch": 3.134777376654633, |
| "grad_norm": 8.312905311584473, |
| "learning_rate": 4.0196078431372555e-05, |
| "loss": 6.3701, |
| "step": 163 |
| }, |
| { |
| "epoch": 3.154031287605295, |
| "grad_norm": 6.305182456970215, |
| "learning_rate": 3.9215686274509805e-05, |
| "loss": 6.2461, |
| "step": 164 |
| }, |
| { |
| "epoch": 3.1732851985559565, |
| "grad_norm": 6.297240257263184, |
| "learning_rate": 3.8235294117647055e-05, |
| "loss": 6.1583, |
| "step": 165 |
| }, |
| { |
| "epoch": 3.1925391095066185, |
| "grad_norm": 6.377700328826904, |
| "learning_rate": 3.725490196078432e-05, |
| "loss": 5.8368, |
| "step": 166 |
| }, |
| { |
| "epoch": 3.2117930204572804, |
| "grad_norm": 6.20255708694458, |
| "learning_rate": 3.627450980392157e-05, |
| "loss": 6.1394, |
| "step": 167 |
| }, |
| { |
| "epoch": 3.2310469314079424, |
| "grad_norm": 10.172269821166992, |
| "learning_rate": 3.529411764705883e-05, |
| "loss": 5.99, |
| "step": 168 |
| }, |
| { |
| "epoch": 3.250300842358604, |
| "grad_norm": 12.56449031829834, |
| "learning_rate": 3.431372549019608e-05, |
| "loss": 6.2823, |
| "step": 169 |
| }, |
| { |
| "epoch": 3.269554753309266, |
| "grad_norm": 6.517347812652588, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 6.4417, |
| "step": 170 |
| }, |
| { |
| "epoch": 3.288808664259928, |
| "grad_norm": 7.165337085723877, |
| "learning_rate": 3.235294117647059e-05, |
| "loss": 6.1048, |
| "step": 171 |
| }, |
| { |
| "epoch": 3.30806257521059, |
| "grad_norm": 14.79480266571045, |
| "learning_rate": 3.137254901960784e-05, |
| "loss": 5.9012, |
| "step": 172 |
| }, |
| { |
| "epoch": 3.3273164861612514, |
| "grad_norm": 10.55307388305664, |
| "learning_rate": 3.0392156862745097e-05, |
| "loss": 6.0419, |
| "step": 173 |
| }, |
| { |
| "epoch": 3.3465703971119134, |
| "grad_norm": 7.354953289031982, |
| "learning_rate": 2.9411764705882354e-05, |
| "loss": 5.9871, |
| "step": 174 |
| }, |
| { |
| "epoch": 3.3658243080625754, |
| "grad_norm": 7.013256549835205, |
| "learning_rate": 2.8431372549019608e-05, |
| "loss": 6.3169, |
| "step": 175 |
| }, |
| { |
| "epoch": 3.3658243080625754, |
| "eval_clap": 0.09689466655254364, |
| "eval_loss": 6.116217613220215, |
| "eval_runtime": 165.7689, |
| "eval_samples_per_second": 0.097, |
| "eval_steps_per_second": 0.097, |
| "step": 175 |
| }, |
| { |
| "epoch": 3.385078219013237, |
| "grad_norm": 8.007953643798828, |
| "learning_rate": 2.7450980392156865e-05, |
| "loss": 6.0573, |
| "step": 176 |
| }, |
| { |
| "epoch": 3.404332129963899, |
| "grad_norm": 7.166982173919678, |
| "learning_rate": 2.647058823529412e-05, |
| "loss": 6.3097, |
| "step": 177 |
| }, |
| { |
| "epoch": 3.423586040914561, |
| "grad_norm": 5.868830680847168, |
| "learning_rate": 2.5490196078431373e-05, |
| "loss": 6.1856, |
| "step": 178 |
| }, |
| { |
| "epoch": 3.4428399518652224, |
| "grad_norm": 7.172518253326416, |
| "learning_rate": 2.4509803921568626e-05, |
| "loss": 6.284, |
| "step": 179 |
| }, |
| { |
| "epoch": 3.4620938628158844, |
| "grad_norm": 5.972955226898193, |
| "learning_rate": 2.3529411764705884e-05, |
| "loss": 6.1067, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.4813477737665464, |
| "grad_norm": 5.716938495635986, |
| "learning_rate": 2.2549019607843138e-05, |
| "loss": 6.2792, |
| "step": 181 |
| }, |
| { |
| "epoch": 3.5006016847172083, |
| "grad_norm": 5.647866249084473, |
| "learning_rate": 2.1568627450980395e-05, |
| "loss": 6.336, |
| "step": 182 |
| }, |
| { |
| "epoch": 3.51985559566787, |
| "grad_norm": 7.596288204193115, |
| "learning_rate": 2.058823529411765e-05, |
| "loss": 6.1188, |
| "step": 183 |
| }, |
| { |
| "epoch": 3.539109506618532, |
| "grad_norm": 9.767680168151855, |
| "learning_rate": 1.9607843137254903e-05, |
| "loss": 6.3607, |
| "step": 184 |
| }, |
| { |
| "epoch": 3.558363417569194, |
| "grad_norm": 5.301209926605225, |
| "learning_rate": 1.862745098039216e-05, |
| "loss": 6.0671, |
| "step": 185 |
| }, |
| { |
| "epoch": 3.577617328519856, |
| "grad_norm": 6.347781658172607, |
| "learning_rate": 1.7647058823529414e-05, |
| "loss": 6.1538, |
| "step": 186 |
| }, |
| { |
| "epoch": 3.5968712394705173, |
| "grad_norm": 6.653684139251709, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 6.1422, |
| "step": 187 |
| }, |
| { |
| "epoch": 3.6161251504211793, |
| "grad_norm": 9.340754508972168, |
| "learning_rate": 1.568627450980392e-05, |
| "loss": 5.6681, |
| "step": 188 |
| }, |
| { |
| "epoch": 3.6353790613718413, |
| "grad_norm": 6.159310340881348, |
| "learning_rate": 1.4705882352941177e-05, |
| "loss": 5.8408, |
| "step": 189 |
| }, |
| { |
| "epoch": 3.654632972322503, |
| "grad_norm": 7.5495195388793945, |
| "learning_rate": 1.3725490196078432e-05, |
| "loss": 6.1853, |
| "step": 190 |
| }, |
| { |
| "epoch": 3.673886883273165, |
| "grad_norm": 6.215287208557129, |
| "learning_rate": 1.2745098039215686e-05, |
| "loss": 6.082, |
| "step": 191 |
| }, |
| { |
| "epoch": 3.693140794223827, |
| "grad_norm": 5.863905906677246, |
| "learning_rate": 1.1764705882352942e-05, |
| "loss": 6.0772, |
| "step": 192 |
| }, |
| { |
| "epoch": 3.7123947051744883, |
| "grad_norm": 5.785052299499512, |
| "learning_rate": 1.0784313725490197e-05, |
| "loss": 6.2809, |
| "step": 193 |
| }, |
| { |
| "epoch": 3.7316486161251503, |
| "grad_norm": 8.62579345703125, |
| "learning_rate": 9.803921568627451e-06, |
| "loss": 5.9173, |
| "step": 194 |
| }, |
| { |
| "epoch": 3.7509025270758123, |
| "grad_norm": 8.095368385314941, |
| "learning_rate": 8.823529411764707e-06, |
| "loss": 6.2614, |
| "step": 195 |
| }, |
| { |
| "epoch": 3.7701564380264743, |
| "grad_norm": 6.416041851043701, |
| "learning_rate": 7.84313725490196e-06, |
| "loss": 5.7276, |
| "step": 196 |
| }, |
| { |
| "epoch": 3.7894103489771362, |
| "grad_norm": 6.0362868309021, |
| "learning_rate": 6.862745098039216e-06, |
| "loss": 6.1875, |
| "step": 197 |
| }, |
| { |
| "epoch": 3.808664259927798, |
| "grad_norm": 6.641626834869385, |
| "learning_rate": 5.882352941176471e-06, |
| "loss": 6.0641, |
| "step": 198 |
| }, |
| { |
| "epoch": 3.8279181708784598, |
| "grad_norm": 6.249925136566162, |
| "learning_rate": 4.901960784313726e-06, |
| "loss": 6.4255, |
| "step": 199 |
| }, |
| { |
| "epoch": 3.8471720818291217, |
| "grad_norm": 7.856912136077881, |
| "learning_rate": 3.92156862745098e-06, |
| "loss": 5.7667, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.8471720818291217, |
| "eval_clap": 0.11432015895843506, |
| "eval_loss": 6.130455017089844, |
| "eval_runtime": 165.7823, |
| "eval_samples_per_second": 0.097, |
| "eval_steps_per_second": 0.097, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.8664259927797833, |
| "grad_norm": 8.209946632385254, |
| "learning_rate": 2.9411764705882355e-06, |
| "loss": 6.1598, |
| "step": 201 |
| }, |
| { |
| "epoch": 3.8856799037304453, |
| "grad_norm": 7.541530609130859, |
| "learning_rate": 1.96078431372549e-06, |
| "loss": 5.7201, |
| "step": 202 |
| }, |
| { |
| "epoch": 3.9049338146811072, |
| "grad_norm": 36.531105041503906, |
| "learning_rate": 9.80392156862745e-07, |
| "loss": 6.0873, |
| "step": 203 |
| }, |
| { |
| "epoch": 3.9241877256317688, |
| "grad_norm": 6.220560073852539, |
| "learning_rate": 0.0, |
| "loss": 6.0892, |
| "step": 204 |
| }, |
| { |
| "epoch": 3.9241877256317688, |
| "step": 204, |
| "total_flos": 784195045500888.0, |
| "train_loss": 6.39456293629665, |
| "train_runtime": 14405.0011, |
| "train_samples_per_second": 0.231, |
| "train_steps_per_second": 0.014 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 204, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 784195045500888.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|