| { | |
| "best_metric": 0.017690911889076233, | |
| "best_model_checkpoint": "grey-multilabel-classification-3/checkpoint-10720", | |
| "epoch": 18.0, | |
| "eval_steps": 500, | |
| "global_step": 19296, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.4664179104477612, | |
| "grad_norm": 0.3367554545402527, | |
| "learning_rate": 1.953358208955224e-05, | |
| "loss": 0.0801, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9328358208955224, | |
| "grad_norm": 0.2535526752471924, | |
| "learning_rate": 1.9067164179104477e-05, | |
| "loss": 0.0452, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9954128111840032, | |
| "eval_f1": 0.0, | |
| "eval_loss": 0.021581802517175674, | |
| "eval_precision": 0.0, | |
| "eval_recall": 0.0, | |
| "eval_runtime": 13.6235, | |
| "eval_samples_per_second": 39.344, | |
| "eval_steps_per_second": 39.344, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.3992537313432836, | |
| "grad_norm": 0.39555880427360535, | |
| "learning_rate": 1.860074626865672e-05, | |
| "loss": 0.0429, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.8656716417910446, | |
| "grad_norm": 0.2884560227394104, | |
| "learning_rate": 1.8134328358208956e-05, | |
| "loss": 0.0433, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9955238204371616, | |
| "eval_f1": 0.0713224368499257, | |
| "eval_loss": 0.020623503252863884, | |
| "eval_precision": 0.7384615384615385, | |
| "eval_recall": 0.03747072599531616, | |
| "eval_runtime": 13.1268, | |
| "eval_samples_per_second": 40.832, | |
| "eval_steps_per_second": 40.832, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 2.332089552238806, | |
| "grad_norm": 0.31138914823532104, | |
| "learning_rate": 1.7667910447761197e-05, | |
| "loss": 0.0414, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.798507462686567, | |
| "grad_norm": 0.28297460079193115, | |
| "learning_rate": 1.7201492537313432e-05, | |
| "loss": 0.0401, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9957243532815767, | |
| "eval_f1": 0.19973190348525469, | |
| "eval_loss": 0.019770797342061996, | |
| "eval_precision": 0.7061611374407583, | |
| "eval_recall": 0.11631537861046058, | |
| "eval_runtime": 13.0974, | |
| "eval_samples_per_second": 40.924, | |
| "eval_steps_per_second": 40.924, | |
| "step": 3216 | |
| }, | |
| { | |
| "epoch": 3.264925373134328, | |
| "grad_norm": 0.17224232852458954, | |
| "learning_rate": 1.6735074626865673e-05, | |
| "loss": 0.0388, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.7313432835820897, | |
| "grad_norm": 0.23225364089012146, | |
| "learning_rate": 1.626865671641791e-05, | |
| "loss": 0.0381, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9958532672529865, | |
| "eval_f1": 0.28606658446362515, | |
| "eval_loss": 0.01913285069167614, | |
| "eval_precision": 0.6803519061583577, | |
| "eval_recall": 0.18110850897736144, | |
| "eval_runtime": 13.086, | |
| "eval_samples_per_second": 40.96, | |
| "eval_steps_per_second": 40.96, | |
| "step": 4288 | |
| }, | |
| { | |
| "epoch": 4.197761194029851, | |
| "grad_norm": 0.2774255573749542, | |
| "learning_rate": 1.5802238805970152e-05, | |
| "loss": 0.0362, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.664179104477612, | |
| "grad_norm": 0.34336161613464355, | |
| "learning_rate": 1.533582089552239e-05, | |
| "loss": 0.0348, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9959105623513909, | |
| "eval_f1": 0.29244114002478316, | |
| "eval_loss": 0.01866454817354679, | |
| "eval_precision": 0.7087087087087087, | |
| "eval_recall": 0.18423106947697113, | |
| "eval_runtime": 13.0537, | |
| "eval_samples_per_second": 41.061, | |
| "eval_steps_per_second": 41.061, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 5.130597014925373, | |
| "grad_norm": 0.2382318079471588, | |
| "learning_rate": 1.4869402985074627e-05, | |
| "loss": 0.0349, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 5.597014925373134, | |
| "grad_norm": 0.20123334228992462, | |
| "learning_rate": 1.4402985074626867e-05, | |
| "loss": 0.0334, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9959177242386914, | |
| "eval_f1": 0.30317848410757947, | |
| "eval_loss": 0.018318546935915947, | |
| "eval_precision": 0.6985915492957746, | |
| "eval_recall": 0.19359875097580015, | |
| "eval_runtime": 13.2299, | |
| "eval_samples_per_second": 40.514, | |
| "eval_steps_per_second": 40.514, | |
| "step": 6432 | |
| }, | |
| { | |
| "epoch": 6.063432835820896, | |
| "grad_norm": 0.2717812657356262, | |
| "learning_rate": 1.3936567164179106e-05, | |
| "loss": 0.0334, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 6.529850746268656, | |
| "grad_norm": 0.2352936565876007, | |
| "learning_rate": 1.3470149253731344e-05, | |
| "loss": 0.0314, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.996268656716418, | |
| "grad_norm": 0.3104737401008606, | |
| "learning_rate": 1.3003731343283584e-05, | |
| "loss": 0.0314, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9959606955624947, | |
| "eval_f1": 0.3079754601226994, | |
| "eval_loss": 0.018033917993307114, | |
| "eval_precision": 0.7191977077363897, | |
| "eval_recall": 0.1959406713505074, | |
| "eval_runtime": 13.8378, | |
| "eval_samples_per_second": 38.734, | |
| "eval_steps_per_second": 38.734, | |
| "step": 7504 | |
| }, | |
| { | |
| "epoch": 7.462686567164179, | |
| "grad_norm": 0.23265156149864197, | |
| "learning_rate": 1.2537313432835823e-05, | |
| "loss": 0.0295, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 7.92910447761194, | |
| "grad_norm": 0.2608203887939453, | |
| "learning_rate": 1.207089552238806e-05, | |
| "loss": 0.0292, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9960358953791503, | |
| "eval_f1": 0.351493848857645, | |
| "eval_loss": 0.0177922360599041, | |
| "eval_precision": 0.704225352112676, | |
| "eval_recall": 0.234192037470726, | |
| "eval_runtime": 12.9508, | |
| "eval_samples_per_second": 41.387, | |
| "eval_steps_per_second": 41.387, | |
| "step": 8576 | |
| }, | |
| { | |
| "epoch": 8.395522388059701, | |
| "grad_norm": 0.28066638112068176, | |
| "learning_rate": 1.1604477611940299e-05, | |
| "loss": 0.0285, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 8.861940298507463, | |
| "grad_norm": 0.3340807259082794, | |
| "learning_rate": 1.1138059701492538e-05, | |
| "loss": 0.0284, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9960573810410519, | |
| "eval_f1": 0.3697767601602748, | |
| "eval_loss": 0.017844857648015022, | |
| "eval_precision": 0.6931330472103004, | |
| "eval_recall": 0.25214676034348166, | |
| "eval_runtime": 13.5821, | |
| "eval_samples_per_second": 39.464, | |
| "eval_steps_per_second": 39.464, | |
| "step": 9648 | |
| }, | |
| { | |
| "epoch": 9.328358208955224, | |
| "grad_norm": 0.1388923078775406, | |
| "learning_rate": 1.0671641791044778e-05, | |
| "loss": 0.0275, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 9.794776119402986, | |
| "grad_norm": 0.200577512383461, | |
| "learning_rate": 1.0205223880597015e-05, | |
| "loss": 0.0265, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9961039333085054, | |
| "eval_f1": 0.3652275379229872, | |
| "eval_loss": 0.017690911889076233, | |
| "eval_precision": 0.7228637413394919, | |
| "eval_recall": 0.24434035909445745, | |
| "eval_runtime": 13.8518, | |
| "eval_samples_per_second": 38.695, | |
| "eval_steps_per_second": 38.695, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 10.261194029850746, | |
| "grad_norm": 0.1815371811389923, | |
| "learning_rate": 9.738805970149255e-06, | |
| "loss": 0.0261, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 10.727611940298507, | |
| "grad_norm": 0.21693415939807892, | |
| "learning_rate": 9.272388059701494e-06, | |
| "loss": 0.0251, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.9959857621680466, | |
| "eval_f1": 0.37825845812534664, | |
| "eval_loss": 0.018003830686211586, | |
| "eval_precision": 0.6532567049808429, | |
| "eval_recall": 0.26619828259172523, | |
| "eval_runtime": 13.2669, | |
| "eval_samples_per_second": 40.401, | |
| "eval_steps_per_second": 40.401, | |
| "step": 11792 | |
| }, | |
| { | |
| "epoch": 11.194029850746269, | |
| "grad_norm": 0.2108106017112732, | |
| "learning_rate": 8.805970149253732e-06, | |
| "loss": 0.024, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 11.66044776119403, | |
| "grad_norm": 0.3812670409679413, | |
| "learning_rate": 8.339552238805972e-06, | |
| "loss": 0.0245, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9960215716045492, | |
| "eval_f1": 0.3984840281537629, | |
| "eval_loss": 0.017995502799749374, | |
| "eval_precision": 0.6501766784452296, | |
| "eval_recall": 0.28727556596409054, | |
| "eval_runtime": 14.2572, | |
| "eval_samples_per_second": 37.595, | |
| "eval_steps_per_second": 37.595, | |
| "step": 12864 | |
| }, | |
| { | |
| "epoch": 12.126865671641792, | |
| "grad_norm": 0.1880696564912796, | |
| "learning_rate": 7.87313432835821e-06, | |
| "loss": 0.0235, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 12.593283582089553, | |
| "grad_norm": 0.1416264921426773, | |
| "learning_rate": 7.406716417910448e-06, | |
| "loss": 0.0227, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.996071704815653, | |
| "eval_f1": 0.3989041095890411, | |
| "eval_loss": 0.018045414239168167, | |
| "eval_precision": 0.6691176470588235, | |
| "eval_recall": 0.28415300546448086, | |
| "eval_runtime": 14.1382, | |
| "eval_samples_per_second": 37.912, | |
| "eval_steps_per_second": 37.912, | |
| "step": 13936 | |
| }, | |
| { | |
| "epoch": 13.059701492537313, | |
| "grad_norm": 0.10282892733812332, | |
| "learning_rate": 6.9402985074626876e-06, | |
| "loss": 0.0231, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 13.526119402985074, | |
| "grad_norm": 0.22511619329452515, | |
| "learning_rate": 6.473880597014925e-06, | |
| "loss": 0.0225, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 13.992537313432836, | |
| "grad_norm": 0.2805801331996918, | |
| "learning_rate": 6.007462686567165e-06, | |
| "loss": 0.0223, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.9960108287735984, | |
| "eval_f1": 0.40171858216971, | |
| "eval_loss": 0.018198266625404358, | |
| "eval_precision": 0.6437177280550774, | |
| "eval_recall": 0.29195940671350507, | |
| "eval_runtime": 13.2494, | |
| "eval_samples_per_second": 40.455, | |
| "eval_steps_per_second": 40.455, | |
| "step": 15008 | |
| }, | |
| { | |
| "epoch": 14.458955223880597, | |
| "grad_norm": 0.2002527117729187, | |
| "learning_rate": 5.5410447761194035e-06, | |
| "loss": 0.0211, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 14.925373134328359, | |
| "grad_norm": 0.050380606204271317, | |
| "learning_rate": 5.074626865671642e-06, | |
| "loss": 0.0214, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.9960860285902541, | |
| "eval_f1": 0.40630092341118956, | |
| "eval_loss": 0.018185345456004143, | |
| "eval_precision": 0.6678571428571428, | |
| "eval_recall": 0.29195940671350507, | |
| "eval_runtime": 13.1752, | |
| "eval_samples_per_second": 40.683, | |
| "eval_steps_per_second": 40.683, | |
| "step": 16080 | |
| }, | |
| { | |
| "epoch": 15.39179104477612, | |
| "grad_norm": 0.29372188448905945, | |
| "learning_rate": 4.6082089552238816e-06, | |
| "loss": 0.0205, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 15.85820895522388, | |
| "grad_norm": 0.145236998796463, | |
| "learning_rate": 4.141791044776119e-06, | |
| "loss": 0.0213, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.9960573810410519, | |
| "eval_f1": 0.40582838640043173, | |
| "eval_loss": 0.018394598737359047, | |
| "eval_precision": 0.6573426573426573, | |
| "eval_recall": 0.2935206869633099, | |
| "eval_runtime": 13.885, | |
| "eval_samples_per_second": 38.603, | |
| "eval_steps_per_second": 38.603, | |
| "step": 17152 | |
| }, | |
| { | |
| "epoch": 16.324626865671643, | |
| "grad_norm": 0.3503226637840271, | |
| "learning_rate": 3.6753731343283584e-06, | |
| "loss": 0.0205, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 16.791044776119403, | |
| "grad_norm": 0.22795088589191437, | |
| "learning_rate": 3.208955223880597e-06, | |
| "loss": 0.0198, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.9960108287735984, | |
| "eval_f1": 0.40807651434643993, | |
| "eval_loss": 0.018566885963082314, | |
| "eval_precision": 0.6389351081530782, | |
| "eval_recall": 0.2997658079625293, | |
| "eval_runtime": 13.6531, | |
| "eval_samples_per_second": 39.259, | |
| "eval_steps_per_second": 39.259, | |
| "step": 18224 | |
| }, | |
| { | |
| "epoch": 17.257462686567163, | |
| "grad_norm": 0.09603070467710495, | |
| "learning_rate": 2.742537313432836e-06, | |
| "loss": 0.0198, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 17.723880597014926, | |
| "grad_norm": 0.3136885166168213, | |
| "learning_rate": 2.2761194029850747e-06, | |
| "loss": 0.0197, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.9960538000974016, | |
| "eval_f1": 0.42, | |
| "eval_loss": 0.018600279465317726, | |
| "eval_precision": 0.6445880452342488, | |
| "eval_recall": 0.3114754098360656, | |
| "eval_runtime": 12.9973, | |
| "eval_samples_per_second": 41.239, | |
| "eval_steps_per_second": 41.239, | |
| "step": 19296 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 21440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2579840201687040.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |