{ "best_metric": 0.017690911889076233, "best_model_checkpoint": "grey-multilabel-classification-3/checkpoint-10720", "epoch": 18.0, "eval_steps": 500, "global_step": 19296, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4664179104477612, "grad_norm": 0.3367554545402527, "learning_rate": 1.953358208955224e-05, "loss": 0.0801, "step": 500 }, { "epoch": 0.9328358208955224, "grad_norm": 0.2535526752471924, "learning_rate": 1.9067164179104477e-05, "loss": 0.0452, "step": 1000 }, { "epoch": 1.0, "eval_accuracy": 0.9954128111840032, "eval_f1": 0.0, "eval_loss": 0.021581802517175674, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 13.6235, "eval_samples_per_second": 39.344, "eval_steps_per_second": 39.344, "step": 1072 }, { "epoch": 1.3992537313432836, "grad_norm": 0.39555880427360535, "learning_rate": 1.860074626865672e-05, "loss": 0.0429, "step": 1500 }, { "epoch": 1.8656716417910446, "grad_norm": 0.2884560227394104, "learning_rate": 1.8134328358208956e-05, "loss": 0.0433, "step": 2000 }, { "epoch": 2.0, "eval_accuracy": 0.9955238204371616, "eval_f1": 0.0713224368499257, "eval_loss": 0.020623503252863884, "eval_precision": 0.7384615384615385, "eval_recall": 0.03747072599531616, "eval_runtime": 13.1268, "eval_samples_per_second": 40.832, "eval_steps_per_second": 40.832, "step": 2144 }, { "epoch": 2.332089552238806, "grad_norm": 0.31138914823532104, "learning_rate": 1.7667910447761197e-05, "loss": 0.0414, "step": 2500 }, { "epoch": 2.798507462686567, "grad_norm": 0.28297460079193115, "learning_rate": 1.7201492537313432e-05, "loss": 0.0401, "step": 3000 }, { "epoch": 3.0, "eval_accuracy": 0.9957243532815767, "eval_f1": 0.19973190348525469, "eval_loss": 0.019770797342061996, "eval_precision": 0.7061611374407583, "eval_recall": 0.11631537861046058, "eval_runtime": 13.0974, "eval_samples_per_second": 40.924, "eval_steps_per_second": 40.924, "step": 3216 }, { "epoch": 3.264925373134328, "grad_norm": 0.17224232852458954, "learning_rate": 1.6735074626865673e-05, "loss": 0.0388, "step": 3500 }, { "epoch": 3.7313432835820897, "grad_norm": 0.23225364089012146, "learning_rate": 1.626865671641791e-05, "loss": 0.0381, "step": 4000 }, { "epoch": 4.0, "eval_accuracy": 0.9958532672529865, "eval_f1": 0.28606658446362515, "eval_loss": 0.01913285069167614, "eval_precision": 0.6803519061583577, "eval_recall": 0.18110850897736144, "eval_runtime": 13.086, "eval_samples_per_second": 40.96, "eval_steps_per_second": 40.96, "step": 4288 }, { "epoch": 4.197761194029851, "grad_norm": 0.2774255573749542, "learning_rate": 1.5802238805970152e-05, "loss": 0.0362, "step": 4500 }, { "epoch": 4.664179104477612, "grad_norm": 0.34336161613464355, "learning_rate": 1.533582089552239e-05, "loss": 0.0348, "step": 5000 }, { "epoch": 5.0, "eval_accuracy": 0.9959105623513909, "eval_f1": 0.29244114002478316, "eval_loss": 0.01866454817354679, "eval_precision": 0.7087087087087087, "eval_recall": 0.18423106947697113, "eval_runtime": 13.0537, "eval_samples_per_second": 41.061, "eval_steps_per_second": 41.061, "step": 5360 }, { "epoch": 5.130597014925373, "grad_norm": 0.2382318079471588, "learning_rate": 1.4869402985074627e-05, "loss": 0.0349, "step": 5500 }, { "epoch": 5.597014925373134, "grad_norm": 0.20123334228992462, "learning_rate": 1.4402985074626867e-05, "loss": 0.0334, "step": 6000 }, { "epoch": 6.0, "eval_accuracy": 0.9959177242386914, "eval_f1": 0.30317848410757947, "eval_loss": 0.018318546935915947, "eval_precision": 0.6985915492957746, "eval_recall": 0.19359875097580015, "eval_runtime": 13.2299, "eval_samples_per_second": 40.514, "eval_steps_per_second": 40.514, "step": 6432 }, { "epoch": 6.063432835820896, "grad_norm": 0.2717812657356262, "learning_rate": 1.3936567164179106e-05, "loss": 0.0334, "step": 6500 }, { "epoch": 6.529850746268656, "grad_norm": 0.2352936565876007, "learning_rate": 1.3470149253731344e-05, "loss": 0.0314, "step": 7000 }, { "epoch": 6.996268656716418, "grad_norm": 0.3104737401008606, "learning_rate": 1.3003731343283584e-05, "loss": 0.0314, "step": 7500 }, { "epoch": 7.0, "eval_accuracy": 0.9959606955624947, "eval_f1": 0.3079754601226994, "eval_loss": 0.018033917993307114, "eval_precision": 0.7191977077363897, "eval_recall": 0.1959406713505074, "eval_runtime": 13.8378, "eval_samples_per_second": 38.734, "eval_steps_per_second": 38.734, "step": 7504 }, { "epoch": 7.462686567164179, "grad_norm": 0.23265156149864197, "learning_rate": 1.2537313432835823e-05, "loss": 0.0295, "step": 8000 }, { "epoch": 7.92910447761194, "grad_norm": 0.2608203887939453, "learning_rate": 1.207089552238806e-05, "loss": 0.0292, "step": 8500 }, { "epoch": 8.0, "eval_accuracy": 0.9960358953791503, "eval_f1": 0.351493848857645, "eval_loss": 0.0177922360599041, "eval_precision": 0.704225352112676, "eval_recall": 0.234192037470726, "eval_runtime": 12.9508, "eval_samples_per_second": 41.387, "eval_steps_per_second": 41.387, "step": 8576 }, { "epoch": 8.395522388059701, "grad_norm": 0.28066638112068176, "learning_rate": 1.1604477611940299e-05, "loss": 0.0285, "step": 9000 }, { "epoch": 8.861940298507463, "grad_norm": 0.3340807259082794, "learning_rate": 1.1138059701492538e-05, "loss": 0.0284, "step": 9500 }, { "epoch": 9.0, "eval_accuracy": 0.9960573810410519, "eval_f1": 0.3697767601602748, "eval_loss": 0.017844857648015022, "eval_precision": 0.6931330472103004, "eval_recall": 0.25214676034348166, "eval_runtime": 13.5821, "eval_samples_per_second": 39.464, "eval_steps_per_second": 39.464, "step": 9648 }, { "epoch": 9.328358208955224, "grad_norm": 0.1388923078775406, "learning_rate": 1.0671641791044778e-05, "loss": 0.0275, "step": 10000 }, { "epoch": 9.794776119402986, "grad_norm": 0.200577512383461, "learning_rate": 1.0205223880597015e-05, "loss": 0.0265, "step": 10500 }, { "epoch": 10.0, "eval_accuracy": 0.9961039333085054, "eval_f1": 0.3652275379229872, "eval_loss": 0.017690911889076233, "eval_precision": 0.7228637413394919, "eval_recall": 0.24434035909445745, "eval_runtime": 13.8518, "eval_samples_per_second": 38.695, "eval_steps_per_second": 38.695, "step": 10720 }, { "epoch": 10.261194029850746, "grad_norm": 0.1815371811389923, "learning_rate": 9.738805970149255e-06, "loss": 0.0261, "step": 11000 }, { "epoch": 10.727611940298507, "grad_norm": 0.21693415939807892, "learning_rate": 9.272388059701494e-06, "loss": 0.0251, "step": 11500 }, { "epoch": 11.0, "eval_accuracy": 0.9959857621680466, "eval_f1": 0.37825845812534664, "eval_loss": 0.018003830686211586, "eval_precision": 0.6532567049808429, "eval_recall": 0.26619828259172523, "eval_runtime": 13.2669, "eval_samples_per_second": 40.401, "eval_steps_per_second": 40.401, "step": 11792 }, { "epoch": 11.194029850746269, "grad_norm": 0.2108106017112732, "learning_rate": 8.805970149253732e-06, "loss": 0.024, "step": 12000 }, { "epoch": 11.66044776119403, "grad_norm": 0.3812670409679413, "learning_rate": 8.339552238805972e-06, "loss": 0.0245, "step": 12500 }, { "epoch": 12.0, "eval_accuracy": 0.9960215716045492, "eval_f1": 0.3984840281537629, "eval_loss": 0.017995502799749374, "eval_precision": 0.6501766784452296, "eval_recall": 0.28727556596409054, "eval_runtime": 14.2572, "eval_samples_per_second": 37.595, "eval_steps_per_second": 37.595, "step": 12864 }, { "epoch": 12.126865671641792, "grad_norm": 0.1880696564912796, "learning_rate": 7.87313432835821e-06, "loss": 0.0235, "step": 13000 }, { "epoch": 12.593283582089553, "grad_norm": 0.1416264921426773, "learning_rate": 7.406716417910448e-06, "loss": 0.0227, "step": 13500 }, { "epoch": 13.0, "eval_accuracy": 0.996071704815653, "eval_f1": 0.3989041095890411, "eval_loss": 0.018045414239168167, "eval_precision": 0.6691176470588235, "eval_recall": 0.28415300546448086, "eval_runtime": 14.1382, "eval_samples_per_second": 37.912, "eval_steps_per_second": 37.912, "step": 13936 }, { "epoch": 13.059701492537313, "grad_norm": 0.10282892733812332, "learning_rate": 6.9402985074626876e-06, "loss": 0.0231, "step": 14000 }, { "epoch": 13.526119402985074, "grad_norm": 0.22511619329452515, "learning_rate": 6.473880597014925e-06, "loss": 0.0225, "step": 14500 }, { "epoch": 13.992537313432836, "grad_norm": 0.2805801331996918, "learning_rate": 6.007462686567165e-06, "loss": 0.0223, "step": 15000 }, { "epoch": 14.0, "eval_accuracy": 0.9960108287735984, "eval_f1": 0.40171858216971, "eval_loss": 0.018198266625404358, "eval_precision": 0.6437177280550774, "eval_recall": 0.29195940671350507, "eval_runtime": 13.2494, "eval_samples_per_second": 40.455, "eval_steps_per_second": 40.455, "step": 15008 }, { "epoch": 14.458955223880597, "grad_norm": 0.2002527117729187, "learning_rate": 5.5410447761194035e-06, "loss": 0.0211, "step": 15500 }, { "epoch": 14.925373134328359, "grad_norm": 0.050380606204271317, "learning_rate": 5.074626865671642e-06, "loss": 0.0214, "step": 16000 }, { "epoch": 15.0, "eval_accuracy": 0.9960860285902541, "eval_f1": 0.40630092341118956, "eval_loss": 0.018185345456004143, "eval_precision": 0.6678571428571428, "eval_recall": 0.29195940671350507, "eval_runtime": 13.1752, "eval_samples_per_second": 40.683, "eval_steps_per_second": 40.683, "step": 16080 }, { "epoch": 15.39179104477612, "grad_norm": 0.29372188448905945, "learning_rate": 4.6082089552238816e-06, "loss": 0.0205, "step": 16500 }, { "epoch": 15.85820895522388, "grad_norm": 0.145236998796463, "learning_rate": 4.141791044776119e-06, "loss": 0.0213, "step": 17000 }, { "epoch": 16.0, "eval_accuracy": 0.9960573810410519, "eval_f1": 0.40582838640043173, "eval_loss": 0.018394598737359047, "eval_precision": 0.6573426573426573, "eval_recall": 0.2935206869633099, "eval_runtime": 13.885, "eval_samples_per_second": 38.603, "eval_steps_per_second": 38.603, "step": 17152 }, { "epoch": 16.324626865671643, "grad_norm": 0.3503226637840271, "learning_rate": 3.6753731343283584e-06, "loss": 0.0205, "step": 17500 }, { "epoch": 16.791044776119403, "grad_norm": 0.22795088589191437, "learning_rate": 3.208955223880597e-06, "loss": 0.0198, "step": 18000 }, { "epoch": 17.0, "eval_accuracy": 0.9960108287735984, "eval_f1": 0.40807651434643993, "eval_loss": 0.018566885963082314, "eval_precision": 0.6389351081530782, "eval_recall": 0.2997658079625293, "eval_runtime": 13.6531, "eval_samples_per_second": 39.259, "eval_steps_per_second": 39.259, "step": 18224 }, { "epoch": 17.257462686567163, "grad_norm": 0.09603070467710495, "learning_rate": 2.742537313432836e-06, "loss": 0.0198, "step": 18500 }, { "epoch": 17.723880597014926, "grad_norm": 0.3136885166168213, "learning_rate": 2.2761194029850747e-06, "loss": 0.0197, "step": 19000 }, { "epoch": 18.0, "eval_accuracy": 0.9960538000974016, "eval_f1": 0.42, "eval_loss": 0.018600279465317726, "eval_precision": 0.6445880452342488, "eval_recall": 0.3114754098360656, "eval_runtime": 12.9973, "eval_samples_per_second": 41.239, "eval_steps_per_second": 41.239, "step": 19296 } ], "logging_steps": 500, "max_steps": 21440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2579840201687040.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }