| { | |
| "best_metric": 0.6611545464384188, | |
| "best_model_checkpoint": "../saved_model/tibetan-bert_tncc-document_v3/checkpoint-3234", | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 4620, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6271739130434782, | |
| "eval_loss": 1.15811288356781, | |
| "eval_macro-f1": 0.5321491435605941, | |
| "eval_macro-precision": 0.5499509034708364, | |
| "eval_macro-recall": 0.5513303846525741, | |
| "eval_runtime": 7.2664, | |
| "eval_samples_per_second": 126.61, | |
| "eval_steps_per_second": 3.991, | |
| "eval_weighted-f1": 0.6286606311348418, | |
| "eval_weighted-precision": 0.6570526714130747, | |
| "eval_weighted-recall": 0.6271739130434782, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6467391304347826, | |
| "eval_loss": 1.068244218826294, | |
| "eval_macro-f1": 0.5647362494157037, | |
| "eval_macro-precision": 0.6183747770237591, | |
| "eval_macro-recall": 0.5557985823725112, | |
| "eval_runtime": 7.286, | |
| "eval_samples_per_second": 126.269, | |
| "eval_steps_per_second": 3.98, | |
| "eval_weighted-f1": 0.6340133102826743, | |
| "eval_weighted-precision": 0.661646599688849, | |
| "eval_weighted-recall": 0.6467391304347826, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 4.458874458874459e-05, | |
| "loss": 1.1735, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6760869565217391, | |
| "eval_loss": 0.9934693574905396, | |
| "eval_macro-f1": 0.5945571067918397, | |
| "eval_macro-precision": 0.6219488737703451, | |
| "eval_macro-recall": 0.5845017075090547, | |
| "eval_runtime": 7.2737, | |
| "eval_samples_per_second": 126.483, | |
| "eval_steps_per_second": 3.987, | |
| "eval_weighted-f1": 0.6693675078151828, | |
| "eval_weighted-precision": 0.6716730940106624, | |
| "eval_weighted-recall": 0.6760869565217391, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6760869565217391, | |
| "eval_loss": 1.0614756345748901, | |
| "eval_macro-f1": 0.6069076519660962, | |
| "eval_macro-precision": 0.6513367891333143, | |
| "eval_macro-recall": 0.5927988194005301, | |
| "eval_runtime": 7.33, | |
| "eval_samples_per_second": 125.511, | |
| "eval_steps_per_second": 3.956, | |
| "eval_weighted-f1": 0.668192819938365, | |
| "eval_weighted-precision": 0.6913129775105903, | |
| "eval_weighted-recall": 0.6760869565217391, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 3.917748917748918e-05, | |
| "loss": 0.6662, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.6608695652173913, | |
| "eval_loss": 1.1701490879058838, | |
| "eval_macro-f1": 0.594957375773476, | |
| "eval_macro-precision": 0.6170952632020997, | |
| "eval_macro-recall": 0.6011833102671116, | |
| "eval_runtime": 7.2756, | |
| "eval_samples_per_second": 126.45, | |
| "eval_steps_per_second": 3.986, | |
| "eval_weighted-f1": 0.6654696189795196, | |
| "eval_weighted-precision": 0.6851333084994409, | |
| "eval_weighted-recall": 0.6608695652173913, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.6510869565217391, | |
| "eval_loss": 1.292517066001892, | |
| "eval_macro-f1": 0.607937784808065, | |
| "eval_macro-precision": 0.630719857369284, | |
| "eval_macro-recall": 0.6225829586625629, | |
| "eval_runtime": 7.2833, | |
| "eval_samples_per_second": 126.317, | |
| "eval_steps_per_second": 3.982, | |
| "eval_weighted-f1": 0.6579657588719497, | |
| "eval_weighted-precision": 0.68649865311892, | |
| "eval_weighted-recall": 0.6510869565217391, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 3.376623376623377e-05, | |
| "loss": 0.3247, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.6695652173913044, | |
| "eval_loss": 1.3797581195831299, | |
| "eval_macro-f1": 0.6129092280782845, | |
| "eval_macro-precision": 0.6387021571282967, | |
| "eval_macro-recall": 0.609094564757661, | |
| "eval_runtime": 7.2749, | |
| "eval_samples_per_second": 126.461, | |
| "eval_steps_per_second": 3.986, | |
| "eval_weighted-f1": 0.6640159054651581, | |
| "eval_weighted-precision": 0.6700483998986603, | |
| "eval_weighted-recall": 0.6695652173913044, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6706521739130434, | |
| "eval_loss": 1.4838347434997559, | |
| "eval_macro-f1": 0.6061542359667785, | |
| "eval_macro-precision": 0.611272244576803, | |
| "eval_macro-recall": 0.6146650230118652, | |
| "eval_runtime": 7.2825, | |
| "eval_samples_per_second": 126.33, | |
| "eval_steps_per_second": 3.982, | |
| "eval_weighted-f1": 0.6720993101087696, | |
| "eval_weighted-precision": 0.6820628069048843, | |
| "eval_weighted-recall": 0.6706521739130434, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 2.8354978354978357e-05, | |
| "loss": 0.1507, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.6880434782608695, | |
| "eval_loss": 1.5808299779891968, | |
| "eval_macro-f1": 0.6506642163845188, | |
| "eval_macro-precision": 0.6540683372674448, | |
| "eval_macro-recall": 0.6603270904104771, | |
| "eval_runtime": 7.2595, | |
| "eval_samples_per_second": 126.731, | |
| "eval_steps_per_second": 3.995, | |
| "eval_weighted-f1": 0.6915267384661025, | |
| "eval_weighted-precision": 0.7075992309158492, | |
| "eval_weighted-recall": 0.6880434782608695, | |
| "step": 2079 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.6717391304347826, | |
| "eval_loss": 1.650195837020874, | |
| "eval_macro-f1": 0.6034609083187376, | |
| "eval_macro-precision": 0.6321546665662738, | |
| "eval_macro-recall": 0.5917026419660609, | |
| "eval_runtime": 7.2865, | |
| "eval_samples_per_second": 126.261, | |
| "eval_steps_per_second": 3.98, | |
| "eval_weighted-f1": 0.6684456866047149, | |
| "eval_weighted-precision": 0.6745276629705688, | |
| "eval_weighted-recall": 0.6717391304347826, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 10.82, | |
| "learning_rate": 2.2943722943722946e-05, | |
| "loss": 0.0896, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.6804347826086956, | |
| "eval_loss": 1.738294005393982, | |
| "eval_macro-f1": 0.6302114432029545, | |
| "eval_macro-precision": 0.64659326690522, | |
| "eval_macro-recall": 0.6353590685660309, | |
| "eval_runtime": 7.3402, | |
| "eval_samples_per_second": 125.337, | |
| "eval_steps_per_second": 3.951, | |
| "eval_weighted-f1": 0.6820100289567567, | |
| "eval_weighted-precision": 0.6975783236550734, | |
| "eval_weighted-recall": 0.6804347826086956, | |
| "step": 2541 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.6989130434782609, | |
| "eval_loss": 1.7147595882415771, | |
| "eval_macro-f1": 0.6515414811628367, | |
| "eval_macro-precision": 0.6658442974988088, | |
| "eval_macro-recall": 0.6496260625897462, | |
| "eval_runtime": 7.2787, | |
| "eval_samples_per_second": 126.396, | |
| "eval_steps_per_second": 3.984, | |
| "eval_weighted-f1": 0.6976782715450106, | |
| "eval_weighted-precision": 0.7017023034717548, | |
| "eval_weighted-recall": 0.6989130434782609, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "learning_rate": 1.7532467532467535e-05, | |
| "loss": 0.0646, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.6891304347826087, | |
| "eval_loss": 1.7946357727050781, | |
| "eval_macro-f1": 0.648332711071471, | |
| "eval_macro-precision": 0.6479765490771864, | |
| "eval_macro-recall": 0.6515217848664077, | |
| "eval_runtime": 7.3066, | |
| "eval_samples_per_second": 125.913, | |
| "eval_steps_per_second": 3.969, | |
| "eval_weighted-f1": 0.6915983518325557, | |
| "eval_weighted-precision": 0.6986485129748002, | |
| "eval_weighted-recall": 0.6891304347826087, | |
| "step": 3003 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7, | |
| "eval_loss": 1.7724699974060059, | |
| "eval_macro-f1": 0.6611545464384188, | |
| "eval_macro-precision": 0.667409141168159, | |
| "eval_macro-recall": 0.6627804433172214, | |
| "eval_runtime": 7.2494, | |
| "eval_samples_per_second": 126.906, | |
| "eval_steps_per_second": 4.0, | |
| "eval_weighted-f1": 0.7033455944346818, | |
| "eval_weighted-precision": 0.7140252489602517, | |
| "eval_weighted-recall": 0.7, | |
| "step": 3234 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.6923913043478261, | |
| "eval_loss": 1.819846510887146, | |
| "eval_macro-f1": 0.6556012492821643, | |
| "eval_macro-precision": 0.6602686382879858, | |
| "eval_macro-recall": 0.6668664107682606, | |
| "eval_runtime": 7.2775, | |
| "eval_samples_per_second": 126.418, | |
| "eval_steps_per_second": 3.985, | |
| "eval_weighted-f1": 0.6965952163097968, | |
| "eval_weighted-precision": 0.7083445248462037, | |
| "eval_weighted-recall": 0.6923913043478261, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 15.15, | |
| "learning_rate": 1.2121212121212122e-05, | |
| "loss": 0.042, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.6945652173913044, | |
| "eval_loss": 1.784122109413147, | |
| "eval_macro-f1": 0.6563585998978742, | |
| "eval_macro-precision": 0.6655291218706761, | |
| "eval_macro-recall": 0.6534120619783158, | |
| "eval_runtime": 7.297, | |
| "eval_samples_per_second": 126.08, | |
| "eval_steps_per_second": 3.974, | |
| "eval_weighted-f1": 0.6949462526633576, | |
| "eval_weighted-precision": 0.6993999521302994, | |
| "eval_weighted-recall": 0.6945652173913044, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.6945652173913044, | |
| "eval_loss": 1.7921020984649658, | |
| "eval_macro-f1": 0.654978142271046, | |
| "eval_macro-precision": 0.6614421486999998, | |
| "eval_macro-recall": 0.657140253465508, | |
| "eval_runtime": 7.359, | |
| "eval_samples_per_second": 125.018, | |
| "eval_steps_per_second": 3.941, | |
| "eval_weighted-f1": 0.6972072799287201, | |
| "eval_weighted-precision": 0.705999712282921, | |
| "eval_weighted-recall": 0.6945652173913044, | |
| "step": 3927 | |
| }, | |
| { | |
| "epoch": 17.32, | |
| "learning_rate": 6.709956709956711e-06, | |
| "loss": 0.0314, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.6945652173913044, | |
| "eval_loss": 1.824020266532898, | |
| "eval_macro-f1": 0.6548614235086001, | |
| "eval_macro-precision": 0.6544996322588115, | |
| "eval_macro-recall": 0.660921203092836, | |
| "eval_runtime": 7.3304, | |
| "eval_samples_per_second": 125.504, | |
| "eval_steps_per_second": 3.956, | |
| "eval_weighted-f1": 0.6960752624759597, | |
| "eval_weighted-precision": 0.7018632204313372, | |
| "eval_weighted-recall": 0.6945652173913044, | |
| "step": 4158 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.6923913043478261, | |
| "eval_loss": 1.8412573337554932, | |
| "eval_macro-f1": 0.6506506908222951, | |
| "eval_macro-precision": 0.6468541951851238, | |
| "eval_macro-recall": 0.6600299174294355, | |
| "eval_runtime": 7.2351, | |
| "eval_samples_per_second": 127.157, | |
| "eval_steps_per_second": 4.008, | |
| "eval_weighted-f1": 0.69621624934211, | |
| "eval_weighted-precision": 0.7054368084525598, | |
| "eval_weighted-recall": 0.6923913043478261, | |
| "step": 4389 | |
| }, | |
| { | |
| "epoch": 19.48, | |
| "learning_rate": 1.2987012987012988e-06, | |
| "loss": 0.0233, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.691304347826087, | |
| "eval_loss": 1.8325966596603394, | |
| "eval_macro-f1": 0.6463398836192601, | |
| "eval_macro-precision": 0.6403230244612891, | |
| "eval_macro-recall": 0.6567581821874694, | |
| "eval_runtime": 7.2741, | |
| "eval_samples_per_second": 126.476, | |
| "eval_steps_per_second": 3.987, | |
| "eval_weighted-f1": 0.6940975925997279, | |
| "eval_weighted-precision": 0.7008420722361637, | |
| "eval_weighted-recall": 0.691304347826087, | |
| "step": 4620 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 4620, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 3.87544755290112e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |