diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/config.json b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a424582b16776120730808f810ad9081375d580e --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/config.json @@ -0,0 +1,52 @@ +{ + "architectures": [ + "GloMeModelForTokenClassification" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "bos_token_id": 28, + "cdr_weight": 0.0, + "class_weights": [ + 0.1, + 0.9 + ], + "classifier_activation": "gelu", + "classifier_bias": false, + "classifier_dropout": 0.1, + "classifier_pooling": "cls", + "cls_token_id": 28, + "compress_block_size": 16, + "compress_block_sliding_stride": 16, + "decoder_bias": true, + "dice_weight": 0.1, + "embedding_dropout": 0.1, + "eos_token_id": 29, + "hidden_activation": "gelu", + "hidden_size": 320, + "inner_rank": 32, + "intermediate_size": 1280, + "kv_heads": 10, + "mask_token_id": 31, + "mlp_bias": false, + "mlp_dropout": 0.1, + "model_size": "tiny", + "model_type": "glome", + "norm_bias": false, + "norm_eps": 1e-05, + "num_attention_heads": 20, + "num_hidden_layers": 6, + "num_selected_blocks": 8, + "num_slots": 64, + "pad_token_id": 30, + "reference_compile": null, + "selection_block_size": 16, + "sep_token_id": 29, + "sliding_window_size": 0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.52.3", + "unk_token_id": 27, + "use_glome": true, + "use_nsa": true, + "vocab_size": 36 +} diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/model.safetensors b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9a41ccd9e1f4850445f9b65c983d85f83b449b4 --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30c034c905b9a0eea1b7cc3c4c77fbdf290e876b5a00c00f09397921c11013c3 +size 61385376 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/optimizer.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..04baeb1ad55086d386c983224a159983ed885fb2 --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:725b67bba8a7c8b0286053847c3081378277810c7a29c183c24fbe0871520125 +size 122881658 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/rng_state.pth b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c4421ebb7cc7c9f31acec4968a8f80ff6c4bcc06 --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85c805248eed07d14f2eedb8b706397ce25315fc3b96df0bb8479411b871bd7b +size 14244 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/scaler.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363 +size 988 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/scheduler.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..848ca8d531d2184c668c91c1e4f715a3aa39b2d8 --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0f85b1b4566ce3af61937d9c59a498414651a1e667865d284be6f03114163df +size 1064 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/trainer_state.json b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..98d3db68475d3e0e8cd2f3ba63f29287ef20fbfb --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/trainer_state.json @@ -0,0 +1,11961 @@ +{ + "best_global_step": 8856, + "best_metric": 0.7849709563979171, + "best_model_checkpoint": "./results/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856", + "epoch": 492.0, + "eval_steps": 500, + "global_step": 8856, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy": 0.3401972747610332, + "eval_auc": 0.39064302367564674, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25900759435847653, + "eval_f1_macro": 0.3321798728791878, + "eval_loss": 1.0617570877075195, + "eval_pr_auc": 0.1212308124824295, + "eval_precision": 0.15736885928393005, + "eval_precision_macro": 0.49944165947453734, + "eval_pred_class_0": 5256, + "eval_pred_class_1": 14412, + "eval_predicted_binding_ratio": 0.7327638804148872, + "eval_recall": 0.7313769751693002, + "eval_recall_macro": 0.4991767473782156, + "eval_runtime": 0.304, + "eval_samples_per_second": 536.239, + "eval_steps_per_second": 3.29, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 18 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.341010778930242, + "eval_auc": 0.39081343973238586, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2590750585948665, + "eval_f1_macro": 0.33285209231533375, + "eval_loss": 1.0604556798934937, + "eval_pr_auc": 0.12126612292918731, + "eval_precision": 0.1574485825458588, + "eval_precision_macro": 0.4995923731531417, + "eval_pred_class_0": 5276, + "eval_pred_class_1": 14392, + "eval_predicted_binding_ratio": 0.731747000203376, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.4993975193845588, + "eval_runtime": 0.2793, + "eval_samples_per_second": 583.516, + "eval_steps_per_second": 3.58, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 36 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.34223103518405534, + "eval_auc": 0.3911369382652214, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2591765446944969, + "eval_f1_macro": 0.33385837704253485, + "eval_loss": 1.058252215385437, + "eval_pr_auc": 0.12133107613942488, + "eval_precision": 0.15756858376270713, + "eval_precision_macro": 0.4998170849458089, + "eval_pred_class_0": 5306, + "eval_pred_class_1": 14362, + "eval_predicted_binding_ratio": 0.7302216798861094, + "eval_recall": 0.7297645920670751, + "eval_recall_macro": 0.49972867739407356, + "eval_runtime": 0.2676, + "eval_samples_per_second": 609.165, + "eval_steps_per_second": 3.737, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 54 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.3441122635753508, + "eval_auc": 0.3915867840995182, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.259556882103088, + "eval_f1_macro": 0.33544605079873757, + "eval_loss": 1.0551481246948242, + "eval_pr_auc": 0.12142208631760734, + "eval_precision": 0.15788003631031353, + "eval_precision_macro": 0.500391299247358, + "eval_pred_class_0": 5347, + "eval_pred_class_1": 14321, + "eval_predicted_binding_ratio": 0.7281370754525117, + "eval_recall": 0.7291196388261851, + "eval_recall_macro": 0.5005832394650029, + "eval_runtime": 0.2336, + "eval_samples_per_second": 697.796, + "eval_steps_per_second": 4.281, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 72 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.3457392719137686, + "eval_auc": 0.39218283153314865, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2597791072250345, + "eval_f1_macro": 0.3367955302889021, + "eval_loss": 1.0511513948440552, + "eval_pr_auc": 0.12154600341235242, + "eval_precision": 0.15809003710705033, + "eval_precision_macro": 0.5007720380521324, + "eval_pred_class_0": 5385, + "eval_pred_class_1": 14283, + "eval_predicted_binding_ratio": 0.7262050030506406, + "eval_recall": 0.72815220896485, + "eval_recall_macro": 0.5011558413086458, + "eval_runtime": 0.2629, + "eval_samples_per_second": 620.078, + "eval_steps_per_second": 3.804, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 90 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.34873906853772624, + "eval_auc": 0.39291782012189097, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.259894840238054, + "eval_f1_macro": 0.33921701928376435, + "eval_loss": 1.0462485551834106, + "eval_pr_auc": 0.1216940029412557, + "eval_precision": 0.1583133887089962, + "eval_precision_macro": 0.5011632853468087, + "eval_pred_class_0": 5462, + "eval_pred_class_1": 14206, + "eval_predicted_binding_ratio": 0.722290014236323, + "eval_recall": 0.7252499193808449, + "eval_recall_macro": 0.5017569691067321, + "eval_runtime": 0.2393, + "eval_samples_per_second": 681.219, + "eval_steps_per_second": 4.179, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 108 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.3517897091722595, + "eval_auc": 0.3937714770704174, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2599407906193766, + "eval_f1_macro": 0.3416488972772128, + "eval_loss": 1.0405118465423584, + "eval_pr_auc": 0.12187498322705145, + "eval_precision": 0.1585020529520034, + "eval_precision_macro": 0.5014812682659693, + "eval_pred_class_0": 5542, + "eval_pred_class_1": 14126, + "eval_predicted_binding_ratio": 0.7182224933902787, + "eval_recall": 0.7220251531763947, + "eval_recall_macro": 0.5022572195531276, + "eval_runtime": 0.2765, + "eval_samples_per_second": 589.571, + "eval_steps_per_second": 3.617, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 126 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.354586129753915, + "eval_auc": 0.3947741191129793, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2591338858410179, + "eval_f1_macro": 0.3436917965373002, + "eval_loss": 1.0338975191116333, + "eval_pr_auc": 0.12208733120990471, + "eval_precision": 0.1581985320316397, + "eval_precision_macro": 0.5009271275952343, + "eval_pred_class_0": 5635, + "eval_pred_class_1": 14033, + "eval_predicted_binding_ratio": 0.713494000406752, + "eval_recall": 0.7158980973879394, + "eval_recall_macro": 0.5014270471245847, + "eval_runtime": 0.2385, + "eval_samples_per_second": 683.567, + "eval_steps_per_second": 4.194, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 144 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.3598230628431971, + "eval_auc": 0.39592411118975185, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25913503971756396, + "eval_f1_macro": 0.3477761944928628, + "eval_loss": 1.0263975858688354, + "eval_pr_auc": 0.12233214426039367, + "eval_precision": 0.15848567727076435, + "eval_precision_macro": 0.5013938604573427, + "eval_pred_class_0": 5774, + "eval_pred_class_1": 13894, + "eval_predicted_binding_ratio": 0.70642668293675, + "eval_recall": 0.710093518219929, + "eval_recall_macro": 0.502176595531767, + "eval_runtime": 0.2745, + "eval_samples_per_second": 593.873, + "eval_steps_per_second": 3.643, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 162 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.3636872076469392, + "eval_auc": 0.3972021050928084, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25801861623288075, + "eval_f1_macro": 0.35051449303788773, + "eval_loss": 1.0180495977401733, + "eval_pr_auc": 0.12260540333611444, + "eval_precision": 0.15807060874618625, + "eval_precision_macro": 0.5006720376838353, + "eval_pred_class_0": 5902, + "eval_pred_class_1": 13766, + "eval_predicted_binding_ratio": 0.6999186495830791, + "eval_recall": 0.7017091260883586, + "eval_recall_macro": 0.5010628083511147, + "eval_runtime": 0.2708, + "eval_samples_per_second": 601.838, + "eval_steps_per_second": 3.692, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 180 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.3691275167785235, + "eval_auc": 0.39866621357342186, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.258604206500956, + "eval_f1_macro": 0.35478882891419483, + "eval_loss": 1.0087939500808716, + "eval_pr_auc": 0.12292033936081492, + "eval_precision": 0.1587092042537587, + "eval_precision_macro": 0.5016983780261003, + "eval_pred_class_0": 6033, + "eval_pred_class_1": 13635, + "eval_predicted_binding_ratio": 0.6932580841976815, + "eval_recall": 0.6978394066430184, + "eval_recall_macro": 0.5027194256611, + "eval_runtime": 0.2616, + "eval_samples_per_second": 623.126, + "eval_steps_per_second": 3.823, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 198 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.3740593858043523, + "eval_auc": 0.4002664991794433, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25823944086280654, + "eval_f1_macro": 0.35841740282728696, + "eval_loss": 0.9987770318984985, + "eval_pr_auc": 0.12326823892822446, + "eval_precision": 0.15878778897451096, + "eval_precision_macro": 0.5017853397238077, + "eval_pred_class_0": 6172, + "eval_pred_class_1": 13496, + "eval_predicted_binding_ratio": 0.6861907667276794, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.5028947176998165, + "eval_runtime": 0.258, + "eval_samples_per_second": 631.708, + "eval_steps_per_second": 3.876, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 216 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.37980475899938987, + "eval_auc": 0.40207323055334293, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25811945018854154, + "eval_f1_macro": 0.36265804779890953, + "eval_loss": 0.987876832485199, + "eval_pr_auc": 0.12366119818610516, + "eval_precision": 0.15905854133873024, + "eval_precision_macro": 0.5021624301446299, + "eval_pred_class_0": 6327, + "eval_pred_class_1": 13341, + "eval_predicted_binding_ratio": 0.6783099450884685, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.5035528974067893, + "eval_runtime": 0.254, + "eval_samples_per_second": 641.68, + "eval_steps_per_second": 3.937, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 234 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.3867703884482408, + "eval_auc": 0.40404318566725245, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2582866982350409, + "eval_f1_macro": 0.36779990383041317, + "eval_loss": 0.9760332107543945, + "eval_pr_auc": 0.12409453800524387, + "eval_precision": 0.1595744680851064, + "eval_precision_macro": 0.5028818867776484, + "eval_pred_class_0": 6508, + "eval_pred_class_1": 13160, + "eval_predicted_binding_ratio": 0.6691071791742933, + "eval_recall": 0.6772009029345373, + "eval_recall_macro": 0.5048043507851898, + "eval_runtime": 0.2717, + "eval_samples_per_second": 599.919, + "eval_steps_per_second": 3.68, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 252 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.39429530201342283, + "eval_auc": 0.40629005957398867, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2577107607950651, + "eval_f1_macro": 0.37306889008104693, + "eval_loss": 0.9632152915000916, + "eval_pr_auc": 0.12458802431940903, + "eval_precision": 0.15971578622181032, + "eval_precision_macro": 0.5029977740632862, + "eval_pred_class_0": 6720, + "eval_pred_class_1": 12948, + "eval_predicted_binding_ratio": 0.6583282489322758, + "eval_recall": 0.6668816510802967, + "eval_recall_macro": 0.5050772111259515, + "eval_runtime": 0.253, + "eval_samples_per_second": 644.248, + "eval_steps_per_second": 3.952, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 270 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.4020744356314826, + "eval_auc": 0.408681470822737, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25738822935084615, + "eval_f1_macro": 0.37848134871797623, + "eval_loss": 0.9496278166770935, + "eval_pr_auc": 0.12511500176534787, + "eval_precision": 0.16003140950137418, + "eval_precision_macro": 0.5033533652151325, + "eval_pred_class_0": 6933, + "eval_pred_class_1": 12735, + "eval_predicted_binding_ratio": 0.6474984746796827, + "eval_recall": 0.6572073524669462, + "eval_recall_macro": 0.5057630895249562, + "eval_runtime": 0.269, + "eval_samples_per_second": 606.014, + "eval_steps_per_second": 3.718, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 288 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.408989221069758, + "eval_auc": 0.4113766625614338, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.255349135169763, + "eval_f1_macro": 0.38271123621844805, + "eval_loss": 0.9350630640983582, + "eval_pr_auc": 0.1257149536327416, + "eval_precision": 0.15932528579422817, + "eval_precision_macro": 0.5022775332449281, + "eval_pred_class_0": 7159, + "eval_pred_class_1": 12509, + "eval_predicted_binding_ratio": 0.6360077282896075, + "eval_recall": 0.6426959045469204, + "eval_recall_macro": 0.5039700323120913, + "eval_runtime": 0.2687, + "eval_samples_per_second": 606.61, + "eval_steps_per_second": 3.722, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 306 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.4161582265609111, + "eval_auc": 0.41440477389195646, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2538176619663396, + "eval_f1_macro": 0.3871501973338609, + "eval_loss": 0.9196970462799072, + "eval_pr_auc": 0.12640556118775828, + "eval_precision": 0.158935546875, + "eval_precision_macro": 0.5016899956597223, + "eval_pred_class_0": 7380, + "eval_pred_class_1": 12288, + "eval_predicted_binding_ratio": 0.62477120195241, + "eval_recall": 0.6297968397291196, + "eval_recall_macro": 0.5029831666503388, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.105, + "eval_steps_per_second": 3.915, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 324 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.4237848281472443, + "eval_auc": 0.4176888499450513, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25100786464873437, + "eval_f1_macro": 0.3913994084656603, + "eval_loss": 0.9033117294311523, + "eval_pr_auc": 0.12714405405007598, + "eval_precision": 0.15785536159600996, + "eval_precision_macro": 0.5002421610284318, + "eval_pred_class_0": 7638, + "eval_pred_class_1": 12030, + "eval_predicted_binding_ratio": 0.611653447223917, + "eval_recall": 0.6123831022250886, + "eval_recall_macro": 0.5004331156685895, + "eval_runtime": 0.2637, + "eval_samples_per_second": 618.078, + "eval_steps_per_second": 3.792, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 342 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.4312080536912752, + "eval_auc": 0.4212995000006521, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24904343156340203, + "eval_f1_macro": 0.39564573885956833, + "eval_loss": 0.8857852220535278, + "eval_pr_auc": 0.12799421494868934, + "eval_precision": 0.1572566971854866, + "eval_precision_macro": 0.49948708843014167, + "eval_pred_class_0": 7872, + "eval_pred_class_1": 11796, + "eval_predicted_binding_ratio": 0.5997559487492373, + "eval_recall": 0.5981941309255079, + "eval_recall_macro": 0.4990729210793411, + "eval_runtime": 0.2709, + "eval_samples_per_second": 601.789, + "eval_steps_per_second": 3.692, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 360 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.4394956274150905, + "eval_auc": 0.42538333442304876, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24493150684931506, + "eval_f1_macro": 0.399632635701501, + "eval_loss": 0.8671084642410278, + "eval_pr_auc": 0.12894871744717554, + "eval_precision": 0.15549178189407775, + "eval_precision_macro": 0.4973810972146359, + "eval_pred_class_0": 8169, + "eval_pred_class_1": 11499, + "eval_predicted_binding_ratio": 0.5846552776082977, + "eval_recall": 0.5765881973556917, + "eval_recall_macro": 0.4952114645256155, + "eval_runtime": 0.2681, + "eval_samples_per_second": 607.95, + "eval_steps_per_second": 3.73, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 378 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.44824079723408583, + "eval_auc": 0.42976391273864795, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24080033580523297, + "eval_f1_macro": 0.4037241835563183, + "eval_loss": 0.8476783633232117, + "eval_pr_auc": 0.13001972671009082, + "eval_precision": 0.15375681229339766, + "eval_precision_macro": 0.495462476943159, + "eval_pred_class_0": 8475, + "eval_pred_class_1": 11193, + "eval_predicted_binding_ratio": 0.5690970103721782, + "eval_recall": 0.5549822637858756, + "eval_recall_macro": 0.491621632285284, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.096, + "eval_steps_per_second": 3.817, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 396 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.46044335977221884, + "eval_auc": 0.4345819960798662, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.23819095477386934, + "eval_f1_macro": 0.4102471738365922, + "eval_loss": 0.8271914720535278, + "eval_pr_auc": 0.1312038077210987, + "eval_precision": 0.15319974143503556, + "eval_precision_macro": 0.49502955733365084, + "eval_pred_class_0": 8839, + "eval_pred_class_1": 10829, + "eval_predicted_binding_ratio": 0.5505897905226764, + "eval_recall": 0.5349887133182845, + "eval_recall_macro": 0.4907393617898237, + "eval_runtime": 0.2699, + "eval_samples_per_second": 603.817, + "eval_steps_per_second": 3.704, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 414 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.4719849501728696, + "eval_auc": 0.4399397854182523, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2337489854644728, + "eval_f1_macro": 0.4154821023975197, + "eval_loss": 0.8054794669151306, + "eval_pr_auc": 0.13253606290408437, + "eval_precision": 0.1515499425947187, + "eval_precision_macro": 0.4934724539362483, + "eval_pred_class_0": 9216, + "eval_pred_class_1": 10452, + "eval_predicted_binding_ratio": 0.5314215985356925, + "eval_recall": 0.5108029667849081, + "eval_recall_macro": 0.48776099326147077, + "eval_runtime": 0.2459, + "eval_samples_per_second": 662.968, + "eval_steps_per_second": 4.067, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 432 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.48713646532438476, + "eval_auc": 0.4457222328836341, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.23099794160250056, + "eval_f1_macro": 0.42313846887516615, + "eval_loss": 0.7826969027519226, + "eval_pr_auc": 0.13403779679155806, + "eval_precision": 0.15125798722044728, + "eval_precision_macro": 0.4934698556077371, + "eval_pred_class_0": 9652, + "eval_pred_class_1": 10016, + "eval_predicted_binding_ratio": 0.5092536099247509, + "eval_recall": 0.48855207997420186, + "eval_recall_macro": 0.48771178574674356, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.772, + "eval_steps_per_second": 3.772, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 450 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.506152125279642, + "eval_auc": 0.452125351005008, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22648721828462212, + "eval_f1_macro": 0.43188881620754876, + "eval_loss": 0.7587484121322632, + "eval_pr_auc": 0.13570124162691763, + "eval_precision": 0.15038071065989847, + "eval_precision_macro": 0.492983148122742, + "eval_pred_class_0": 10212, + "eval_pred_class_1": 9456, + "eval_predicted_binding_ratio": 0.4807809640024405, + "eval_recall": 0.4585617542728152, + "eval_recall_macro": 0.48681090671327726, + "eval_runtime": 0.2665, + "eval_samples_per_second": 611.566, + "eval_steps_per_second": 3.752, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 468 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.529997966239577, + "eval_auc": 0.4588746151842906, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22371514947934162, + "eval_f1_macro": 0.4433436473661838, + "eval_loss": 0.7342172861099243, + "eval_pr_auc": 0.13749280910612846, + "eval_precision": 0.15124332916997843, + "eval_precision_macro": 0.4941834913044441, + "eval_pred_class_0": 10861, + "eval_pred_class_1": 8807, + "eval_predicted_binding_ratio": 0.44778320113890585, + "eval_recall": 0.4295388584327636, + "eval_recall_macro": 0.4891703467029515, + "eval_runtime": 0.2591, + "eval_samples_per_second": 629.173, + "eval_steps_per_second": 3.86, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 486 + }, + { + "epoch": 27.77777777777778, + "grad_norm": 191838.453125, + "learning_rate": 5.544444444444443e-07, + "loss": 0.954, + "step": 500 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.5579621720561317, + "eval_auc": 0.46628288633295734, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22208303507516106, + "eval_f1_macro": 0.4566736198103078, + "eval_loss": 0.7085328102111816, + "eval_pr_auc": 0.1397221721421834, + "eval_precision": 0.15368421052631578, + "eval_precision_macro": 0.4966212823527809, + "eval_pred_class_0": 11593, + "eval_pred_class_1": 8075, + "eval_predicted_binding_ratio": 0.4105653853976002, + "eval_recall": 0.400193485972267, + "eval_recall_macro": 0.49384334768221605, + "eval_runtime": 0.245, + "eval_samples_per_second": 665.264, + "eval_steps_per_second": 4.081, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 504 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.5890278625177954, + "eval_auc": 0.47432292318642716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21926011784023955, + "eval_f1_macro": 0.4701862470303913, + "eval_loss": 0.6820237636566162, + "eval_pr_auc": 0.1419831923592407, + "eval_precision": 0.15650854936569222, + "eval_precision_macro": 0.4990822386003719, + "eval_pred_class_0": 12416, + "eval_pred_class_1": 7252, + "eval_predicted_binding_ratio": 0.36872076469391907, + "eval_recall": 0.36601096420509516, + "eval_recall_macro": 0.49839149043235986, + "eval_runtime": 0.239, + "eval_samples_per_second": 681.884, + "eval_steps_per_second": 4.183, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 522 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.62385600976205, + "eval_auc": 0.4829859859606367, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21797040169133192, + "eval_f1_macro": 0.4851734455906117, + "eval_loss": 0.6552286148071289, + "eval_pr_auc": 0.144533301478986, + "eval_precision": 0.16213241075640825, + "eval_precision_macro": 0.5032992807407407, + "eval_pred_class_0": 13309, + "eval_pred_class_1": 6359, + "eval_predicted_binding_ratio": 0.32331706324994913, + "eval_recall": 0.3324733956788133, + "eval_recall_macro": 0.5054351043101014, + "eval_runtime": 0.2289, + "eval_samples_per_second": 712.13, + "eval_steps_per_second": 4.369, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 540 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.6593959731543624, + "eval_auc": 0.4923438323703967, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21308586867144366, + "eval_f1_macro": 0.49787408315316334, + "eval_loss": 0.6283431053161621, + "eval_pr_auc": 0.14738118302130468, + "eval_precision": 0.16759053954175906, + "eval_precision_macro": 0.5068452136541568, + "eval_pred_class_0": 14256, + "eval_pred_class_1": 5412, + "eval_predicted_binding_ratio": 0.2751677852348993, + "eval_recall": 0.2924862947436311, + "eval_recall_macro": 0.5102800882784371, + "eval_runtime": 0.2716, + "eval_samples_per_second": 600.185, + "eval_steps_per_second": 3.682, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 558 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.6967154769168192, + "eval_auc": 0.5026651961769109, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21316449017280042, + "eval_f1_macro": 0.5126600281126953, + "eval_loss": 0.6013967990875244, + "eval_pr_auc": 0.15068393162958252, + "eval_precision": 0.18035714285714285, + "eval_precision_macro": 0.5146913446706046, + "eval_pred_class_0": 15188, + "eval_pred_class_1": 4480, + "eval_predicted_binding_ratio": 0.2277811673784828, + "eval_recall": 0.2605611093195743, + "eval_recall_macro": 0.5194578347949956, + "eval_runtime": 0.2705, + "eval_samples_per_second": 602.56, + "eval_steps_per_second": 3.697, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 576 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.7287472035794184, + "eval_auc": 0.5136348320064595, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20692730786383232, + "eval_f1_macro": 0.5216610840892347, + "eval_loss": 0.575495719909668, + "eval_pr_auc": 0.15441446935423722, + "eval_precision": 0.19194704908990623, + "eval_precision_macro": 0.5210140431835268, + "eval_pred_class_0": 16042, + "eval_pred_class_1": 3626, + "eval_predicted_binding_ratio": 0.18436038234695953, + "eval_recall": 0.22444372782973235, + "eval_recall_macro": 0.5237930596654548, + "eval_runtime": 0.2647, + "eval_samples_per_second": 615.887, + "eval_steps_per_second": 3.778, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 594 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.7577791336180598, + "eval_auc": 0.5256758602512032, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20013431833445267, + "eval_f1_macro": 0.5287070633014385, + "eval_loss": 0.5502753853797913, + "eval_pr_auc": 0.15881070257620672, + "eval_precision": 0.20875656742556917, + "eval_precision_macro": 0.5298823579410603, + "eval_pred_class_0": 16813, + "eval_pred_class_1": 2855, + "eval_predicted_binding_ratio": 0.14515965019320723, + "eval_recall": 0.19219606578523057, + "eval_recall_macro": 0.5279203302306971, + "eval_runtime": 0.2621, + "eval_samples_per_second": 621.974, + "eval_steps_per_second": 3.816, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 612 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.7844213951596501, + "eval_auc": 0.5388391234856937, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.19575113808801214, + "eval_f1_macro": 0.5356397775926205, + "eval_loss": 0.5265588164329529, + "eval_pr_auc": 0.16395620275178963, + "eval_precision": 0.23767848917549517, + "eval_precision_macro": 0.5449694383352471, + "eval_pred_class_0": 17497, + "eval_pred_class_1": 2171, + "eval_predicted_binding_ratio": 0.11038234695952817, + "eval_recall": 0.16639793614962914, + "eval_recall_macro": 0.5332502748895668, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.866, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 630 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.8094366483628228, + "eval_auc": 0.5531888075405533, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.19189305735230702, + "eval_f1_macro": 0.5419376520838427, + "eval_loss": 0.5044229626655579, + "eval_pr_auc": 0.16987983494600534, + "eval_precision": 0.28952504879635654, + "eval_precision_macro": 0.5715178054086024, + "eval_pred_class_0": 18131, + "eval_pred_class_1": 1537, + "eval_predicted_binding_ratio": 0.07814724425462681, + "eval_recall": 0.14350209609803288, + "eval_recall_macro": 0.5387939646905326, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.446, + "eval_steps_per_second": 3.794, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 648 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.827791336180598, + "eval_auc": 0.5689342779333475, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.18796451690242147, + "eval_f1_macro": 0.5458235779450257, + "eval_loss": 0.4842270016670227, + "eval_pr_auc": 0.177326879876991, + "eval_precision": 0.3663551401869159, + "eval_precision_macro": 0.6103471582212137, + "eval_pred_class_0": 18598, + "eval_pred_class_1": 1070, + "eval_predicted_binding_ratio": 0.05440309131584299, + "eval_recall": 0.12641083521444696, + "eval_recall_macro": 0.5427430526648682, + "eval_runtime": 0.2392, + "eval_samples_per_second": 681.483, + "eval_steps_per_second": 4.181, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 666 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.8386719544437665, + "eval_auc": 0.5868017348062602, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.1820056715648363, + "eval_f1_macro": 0.5462584975699354, + "eval_loss": 0.46564891934394836, + "eval_pr_auc": 0.18685168882837525, + "eval_precision": 0.4537275064267352, + "eval_precision_macro": 0.6541268553838282, + "eval_pred_class_0": 18890, + "eval_pred_class_1": 778, + "eval_predicted_binding_ratio": 0.039556640227781166, + "eval_recall": 0.11383424701709126, + "eval_recall_macro": 0.5440904198204911, + "eval_runtime": 0.2648, + "eval_samples_per_second": 615.584, + "eval_steps_per_second": 3.777, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 684 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.8441631075859264, + "eval_auc": 0.6057814605899876, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.17982338774417983, + "eval_f1_macro": 0.5468627318225942, + "eval_loss": 0.4492926001548767, + "eval_pr_auc": 0.19848375437748741, + "eval_precision": 0.5283018867924528, + "eval_precision_macro": 0.6915101279275421, + "eval_pred_class_0": 19032, + "eval_pred_class_1": 636, + "eval_predicted_binding_ratio": 0.03233679072605247, + "eval_recall": 0.10835214446952596, + "eval_recall_macro": 0.5451219284549598, + "eval_runtime": 0.2567, + "eval_samples_per_second": 635.049, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 702 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.8456375838926175, + "eval_auc": 0.6262280880815292, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.1758957654723127, + "eval_f1_macro": 0.5453696262568565, + "eval_loss": 0.43461790680885315, + "eval_pr_auc": 0.21275175506055685, + "eval_precision": 0.5557461406518011, + "eval_precision_macro": 0.7051195990133514, + "eval_pred_class_0": 19085, + "eval_pred_class_1": 583, + "eval_predicted_binding_ratio": 0.0296420581655481, + "eval_recall": 0.10448242502418574, + "eval_recall_macro": 0.544424468382196, + "eval_runtime": 0.2668, + "eval_samples_per_second": 610.873, + "eval_steps_per_second": 3.748, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 720 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.8483323164531218, + "eval_auc": 0.6481986497247736, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.18519530183010108, + "eval_f1_macro": 0.5507896621273841, + "eval_loss": 0.42107364535331726, + "eval_pr_auc": 0.23051421419341214, + "eval_precision": 0.6053571428571428, + "eval_precision_macro": 0.730405178085469, + "eval_pred_class_0": 19108, + "eval_pred_class_1": 560, + "eval_predicted_binding_ratio": 0.02847264592231035, + "eval_recall": 0.10931957433086101, + "eval_recall_macro": 0.5479899012476421, + "eval_runtime": 0.2619, + "eval_samples_per_second": 622.385, + "eval_steps_per_second": 3.818, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 738 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.8502135448444174, + "eval_auc": 0.6709813300109956, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20292207792207792, + "eval_f1_macro": 0.5601310726310726, + "eval_loss": 0.4084097743034363, + "eval_pr_auc": 0.25093797354762637, + "eval_precision": 0.6302521008403361, + "eval_precision_macro": 0.7436637739036264, + "eval_pred_class_0": 19073, + "eval_pred_class_1": 595, + "eval_predicted_binding_ratio": 0.03025218629245475, + "eval_recall": 0.12092873266688164, + "eval_recall_macro": 0.5538246608949184, + "eval_runtime": 0.2691, + "eval_samples_per_second": 605.746, + "eval_steps_per_second": 3.716, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 756 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.8526540573520439, + "eval_auc": 0.6936772353365158, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22263948497854077, + "eval_f1_macro": 0.5706266398157138, + "eval_loss": 0.39666271209716797, + "eval_pr_auc": 0.2738840395423864, + "eval_precision": 0.6618819776714514, + "eval_precision_macro": 0.7604089789622948, + "eval_pred_class_0": 19041, + "eval_pred_class_1": 627, + "eval_predicted_binding_ratio": 0.031879194630872486, + "eval_recall": 0.13382779748468235, + "eval_recall_macro": 0.5605156371379469, + "eval_runtime": 0.2268, + "eval_samples_per_second": 718.638, + "eval_steps_per_second": 4.409, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 774 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.8544844417327638, + "eval_auc": 0.7158095511124275, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24325753569539926, + "eval_f1_macro": 0.58138013196988, + "eval_loss": 0.38576817512512207, + "eval_pr_auc": 0.29905525248581355, + "eval_precision": 0.6754772393538914, + "eval_precision_macro": 0.7681910344870789, + "eval_pred_class_0": 18987, + "eval_pred_class_1": 681, + "eval_predicted_binding_ratio": 0.03462477120195241, + "eval_recall": 0.14833924540470816, + "eval_recall_macro": 0.5674997367845657, + "eval_runtime": 0.2567, + "eval_samples_per_second": 635.061, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 792 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.8575859263778727, + "eval_auc": 0.737084336405228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2808729139922978, + "eval_f1_macro": 0.60092007766148, + "eval_loss": 0.37560486793518066, + "eval_pr_auc": 0.3260256629572295, + "eval_precision": 0.6889168765743073, + "eval_precision_macro": 0.7767992245539758, + "eval_pred_class_0": 18874, + "eval_pred_class_1": 794, + "eval_predicted_binding_ratio": 0.040370144396990035, + "eval_recall": 0.1763947113834247, + "eval_recall_macro": 0.5807427773130077, + "eval_runtime": 0.2685, + "eval_samples_per_second": 607.1, + "eval_steps_per_second": 3.725, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 810 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.8611450071181614, + "eval_auc": 0.7571642141385685, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.32451150136037593, + "eval_f1_macro": 0.623565358817779, + "eval_loss": 0.36611661314964294, + "eval_pr_auc": 0.3532860869347882, + "eval_precision": 0.6963906581740976, + "eval_precision_macro": 0.7829117661264593, + "eval_pred_class_0": 18726, + "eval_pred_class_1": 942, + "eval_predicted_binding_ratio": 0.047895057962172055, + "eval_recall": 0.21154466301193164, + "eval_recall_macro": 0.5971407144358867, + "eval_runtime": 0.2698, + "eval_samples_per_second": 604.115, + "eval_steps_per_second": 3.706, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 828 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.8646532438478747, + "eval_auc": 0.7759390708192488, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.3694931312174325, + "eval_f1_macro": 0.6468414565354121, + "eval_loss": 0.3574466407299042, + "eval_pr_auc": 0.380672409235741, + "eval_precision": 0.695807314897413, + "eval_precision_macro": 0.7853328912870632, + "eval_pred_class_0": 18547, + "eval_pred_class_1": 1121, + "eval_predicted_binding_ratio": 0.05699613585519626, + "eval_recall": 0.25153176394711385, + "eval_recall_macro": 0.6154743385438473, + "eval_runtime": 0.2501, + "eval_samples_per_second": 651.868, + "eval_steps_per_second": 3.999, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 846 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.8670937563555013, + "eval_auc": 0.7932697219796829, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.4004587155963303, + "eval_f1_macro": 0.6628608765538834, + "eval_loss": 0.34963178634643555, + "eval_pr_auc": 0.40879055918048346, + "eval_precision": 0.69340746624305, + "eval_precision_macro": 0.7861898540406407, + "eval_pred_class_0": 18409, + "eval_pred_class_1": 1259, + "eval_predicted_binding_ratio": 0.06401260931462274, + "eval_recall": 0.2815220896485005, + "eval_recall_macro": 0.6291113798275701, + "eval_runtime": 0.2518, + "eval_samples_per_second": 647.281, + "eval_steps_per_second": 3.971, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 864 + }, + { + "epoch": 49.0, + "eval_accuracy": 0.8693308928208257, + "eval_auc": 0.8090460638591691, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.43491644678979774, + "eval_f1_macro": 0.6805201987887128, + "eval_loss": 0.3424255847930908, + "eval_pr_auc": 0.43548439720530613, + "eval_precision": 0.6834830684174154, + "eval_precision_macro": 0.783786427463743, + "eval_pred_class_0": 18221, + "eval_pred_class_1": 1447, + "eval_predicted_binding_ratio": 0.07357128330282693, + "eval_recall": 0.3189293776201225, + "eval_recall_macro": 0.6456420293062284, + "eval_runtime": 0.2567, + "eval_samples_per_second": 635.097, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 882 + }, + { + "epoch": 50.0, + "eval_accuracy": 0.8711612772015457, + "eval_auc": 0.8231584209269593, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.46153846153846156, + "eval_f1_macro": 0.6941824562962304, + "eval_loss": 0.3358187675476074, + "eval_pr_auc": 0.46013674866792464, + "eval_precision": 0.6766355140186916, + "eval_precision_macro": 0.7825407542966181, + "eval_pred_class_0": 18063, + "eval_pred_class_1": 1605, + "eval_predicted_binding_ratio": 0.0816046369737645, + "eval_recall": 0.35020960980328925, + "eval_recall_macro": 0.659441136162585, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.797, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 900 + }, + { + "epoch": 51.0, + "eval_accuracy": 0.8745169818995322, + "eval_auc": 0.8357514570475526, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.49363972096840375, + "eval_f1_macro": 0.7110123043354003, + "eval_loss": 0.32985639572143555, + "eval_pr_auc": 0.48277553791567623, + "eval_precision": 0.6785109983079526, + "eval_precision_macro": 0.7862239260888744, + "eval_pred_class_0": 17895, + "eval_pred_class_1": 1773, + "eval_predicted_binding_ratio": 0.0901464307504576, + "eval_recall": 0.38793937439535636, + "eval_recall_macro": 0.6767668140160521, + "eval_runtime": 0.2496, + "eval_samples_per_second": 653.0, + "eval_steps_per_second": 4.006, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 918 + }, + { + "epoch": 52.0, + "eval_accuracy": 0.8772117144600367, + "eval_auc": 0.8463606400457255, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.523574669560071, + "eval_f1_macro": 0.7265493507137326, + "eval_loss": 0.3246362507343292, + "eval_pr_auc": 0.5014434788718165, + "eval_precision": 0.6742886178861789, + "eval_precision_macro": 0.787031314592807, + "eval_pred_class_0": 17700, + "eval_pred_class_1": 1968, + "eval_predicted_binding_ratio": 0.10006101281269067, + "eval_recall": 0.4279264753305385, + "eval_recall_macro": 0.6946175504557563, + "eval_runtime": 0.2301, + "eval_samples_per_second": 708.248, + "eval_steps_per_second": 4.345, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 936 + }, + { + "epoch": 53.0, + "eval_accuracy": 0.878991254830181, + "eval_auc": 0.8556500280578212, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5445847684653655, + "eval_f1_macro": 0.7374052543587455, + "eval_loss": 0.3201504647731781, + "eval_pr_auc": 0.5184804467620471, + "eval_precision": 0.6696470588235294, + "eval_precision_macro": 0.786998185969936, + "eval_pred_class_0": 17543, + "eval_pred_class_1": 2125, + "eval_predicted_binding_ratio": 0.10804352247305267, + "eval_recall": 0.45888423089326025, + "eval_recall_macro": 0.7082554190018906, + "eval_runtime": 0.2666, + "eval_samples_per_second": 611.344, + "eval_steps_per_second": 3.751, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 954 + }, + { + "epoch": 54.0, + "eval_accuracy": 0.8792454748830588, + "eval_auc": 0.8636336358823378, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5594509367464292, + "eval_f1_macro": 0.744742407539513, + "eval_loss": 0.31616976857185364, + "eval_pr_auc": 0.5331175601979875, + "eval_precision": 0.6585152838427948, + "eval_precision_macro": 0.7834238290545543, + "eval_pred_class_0": 17378, + "eval_pred_class_1": 2290, + "eval_predicted_binding_ratio": 0.11643278421801911, + "eval_recall": 0.48629474363108677, + "eval_recall_macro": 0.719546237029523, + "eval_runtime": 0.2694, + "eval_samples_per_second": 604.964, + "eval_steps_per_second": 3.711, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 972 + }, + { + "epoch": 55.0, + "eval_accuracy": 0.8814317673378076, + "eval_auc": 0.8703512791725087, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.578757225433526, + "eval_f1_macro": 0.7548815712966447, + "eval_loss": 0.31280621886253357, + "eval_pr_auc": 0.5453871030590061, + "eval_precision": 0.657905544147844, + "eval_precision_macro": 0.7854606348952531, + "eval_pred_class_0": 17233, + "eval_pred_class_1": 2435, + "eval_predicted_binding_ratio": 0.12380516575147447, + "eval_recall": 0.5166075459529185, + "eval_recall_macro": 0.7331634337478723, + "eval_runtime": 0.3648, + "eval_samples_per_second": 446.874, + "eval_steps_per_second": 2.742, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 990 + }, + { + "epoch": 55.55555555555556, + "grad_norm": 18517.669921875, + "learning_rate": 9.996314582053105e-07, + "loss": 0.4604, + "step": 1000 + }, + { + "epoch": 56.0, + "eval_accuracy": 0.8834146837502542, + "eval_auc": 0.8759527216222862, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5956621407159232, + "eval_f1_macro": 0.7637749289648232, + "eval_loss": 0.3100614845752716, + "eval_pr_auc": 0.5551596710183998, + "eval_precision": 0.6571984435797665, + "eval_precision_macro": 0.7873078426812156, + "eval_pred_class_0": 17098, + "eval_pred_class_1": 2570, + "eval_predicted_binding_ratio": 0.1306691071791743, + "eval_recall": 0.5446630119316349, + "eval_recall_macro": 0.745742503732462, + "eval_runtime": 0.2507, + "eval_samples_per_second": 650.302, + "eval_steps_per_second": 3.99, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1008 + }, + { + "epoch": 57.0, + "eval_accuracy": 0.8840756558877364, + "eval_auc": 0.8809651824326759, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6064894718674491, + "eval_f1_macro": 0.7692574960553631, + "eval_loss": 0.30748215317726135, + "eval_pr_auc": 0.5634298069700459, + "eval_precision": 0.6524322317118455, + "eval_precision_macro": 0.7866284869899434, + "eval_pred_class_0": 16975, + "eval_pred_class_1": 2693, + "eval_predicted_binding_ratio": 0.13692292047996746, + "eval_recall": 0.5665914221218962, + "eval_recall_macro": 0.7550467824679621, + "eval_runtime": 0.265, + "eval_samples_per_second": 615.163, + "eval_steps_per_second": 3.774, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1026 + }, + { + "epoch": 58.0, + "eval_accuracy": 0.8849908480780964, + "eval_auc": 0.8853361353068065, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6121399176954733, + "eval_f1_macro": 0.7723127955239544, + "eval_loss": 0.30511021614074707, + "eval_pr_auc": 0.5712324508006517, + "eval_precision": 0.6536067374588063, + "eval_precision_macro": 0.7879535133831199, + "eval_pred_class_0": 16937, + "eval_pred_class_1": 2731, + "eval_predicted_binding_ratio": 0.13885499288183853, + "eval_recall": 0.5756207674943566, + "eval_recall_macro": 0.7592596503615321, + "eval_runtime": 0.2627, + "eval_samples_per_second": 620.515, + "eval_steps_per_second": 3.807, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1044 + }, + { + "epoch": 59.0, + "eval_accuracy": 0.8853467561521253, + "eval_auc": 0.8892084338643703, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.616822429906542, + "eval_f1_macro": 0.7747051971959543, + "eval_loss": 0.3030014634132385, + "eval_pr_auc": 0.5778370115776272, + "eval_precision": 0.6519396551724138, + "eval_precision_macro": 0.7878864350252024, + "eval_pred_class_0": 16884, + "eval_pred_class_1": 2784, + "eval_predicted_binding_ratio": 0.1415497254423429, + "eval_recall": 0.5852950661077072, + "eval_recall_macro": 0.7634026486450891, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.008, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1062 + }, + { + "epoch": 60.0, + "eval_accuracy": 0.884685784014643, + "eval_auc": 0.8924710108272688, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6202277294038848, + "eval_f1_macro": 0.7761251343338811, + "eval_loss": 0.3011925220489502, + "eval_pr_auc": 0.5832812236308141, + "eval_precision": 0.6450714036920934, + "eval_precision_macro": 0.7853564436451774, + "eval_pred_class_0": 16797, + "eval_pred_class_1": 2871, + "eval_predicted_binding_ratio": 0.14597315436241612, + "eval_recall": 0.5972267010641729, + "eval_recall_macro": 0.7678594421600216, + "eval_runtime": 0.2437, + "eval_samples_per_second": 668.98, + "eval_steps_per_second": 4.104, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1080 + }, + { + "epoch": 61.0, + "eval_accuracy": 0.8856518202155786, + "eval_auc": 0.8954398707041407, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6242272347535506, + "eval_f1_macro": 0.7783964874556335, + "eval_loss": 0.29942846298217773, + "eval_pr_auc": 0.5886815510653964, + "eval_precision": 0.6477115117891817, + "eval_precision_macro": 0.7871243450270979, + "eval_pred_class_0": 16784, + "eval_pred_class_1": 2884, + "eval_predicted_binding_ratio": 0.14663412649989832, + "eval_recall": 0.6023863269912931, + "eval_recall_macro": 0.7705297965613797, + "eval_runtime": 0.2695, + "eval_samples_per_second": 604.921, + "eval_steps_per_second": 3.711, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1098 + }, + { + "epoch": 62.0, + "eval_accuracy": 0.8865161683953631, + "eval_auc": 0.8978366542923133, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6288659793814433, + "eval_f1_macro": 0.7809416026191173, + "eval_loss": 0.29785510897636414, + "eval_pr_auc": 0.593021329597711, + "eval_precision": 0.6491589426707861, + "eval_precision_macro": 0.7884708470441367, + "eval_pred_class_0": 16755, + "eval_pred_class_1": 2913, + "eval_predicted_binding_ratio": 0.14810860280658938, + "eval_recall": 0.6098032892615285, + "eval_recall_macro": 0.7740571948209012, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.719, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1116 + }, + { + "epoch": 63.0, + "eval_accuracy": 0.8871262965222697, + "eval_auc": 0.8998632136201572, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6315964155326916, + "eval_f1_macro": 0.7824748814379159, + "eval_loss": 0.2964444160461426, + "eval_pr_auc": 0.5970041919243015, + "eval_precision": 0.6505982905982906, + "eval_precision_macro": 0.789523000044412, + "eval_pred_class_0": 16743, + "eval_pred_class_1": 2925, + "eval_predicted_binding_ratio": 0.14871873093349602, + "eval_recall": 0.6136730087068688, + "eval_recall_macro": 0.7759920545435715, + "eval_runtime": 0.2695, + "eval_samples_per_second": 604.792, + "eval_steps_per_second": 3.71, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1134 + }, + { + "epoch": 64.0, + "eval_accuracy": 0.8876855806386008, + "eval_auc": 0.9017429582012333, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6356589147286822, + "eval_f1_macro": 0.7846343742639293, + "eval_loss": 0.2951850891113281, + "eval_pr_auc": 0.6005268804358049, + "eval_precision": 0.650573936529372, + "eval_precision_macro": 0.7901498917652248, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.6214124475975492, + "eval_recall_macro": 0.7794694277584535, + "eval_runtime": 0.2675, + "eval_samples_per_second": 609.376, + "eval_steps_per_second": 3.739, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1152 + }, + { + "epoch": 65.0, + "eval_accuracy": 0.8881940207443563, + "eval_auc": 0.9033021142666618, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6377861966727063, + "eval_f1_macro": 0.785840340182137, + "eval_loss": 0.2939698398113251, + "eval_pr_auc": 0.6035211605243039, + "eval_precision": 0.6518518518518519, + "eval_precision_macro": 0.7910415086304414, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.6243147371815544, + "eval_recall_macro": 0.7809507530297222, + "eval_runtime": 0.2684, + "eval_samples_per_second": 607.415, + "eval_steps_per_second": 3.726, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1170 + }, + { + "epoch": 66.0, + "eval_accuracy": 0.8882448647549319, + "eval_auc": 0.9048048023731414, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6389618922470434, + "eval_f1_macro": 0.7864263262967652, + "eval_loss": 0.29283198714256287, + "eval_pr_auc": 0.6066927627742578, + "eval_precision": 0.6511550050217609, + "eval_precision_macro": 0.7909273016835919, + "eval_pred_class_0": 16681, + "eval_pred_class_1": 2987, + "eval_predicted_binding_ratio": 0.1518710595891804, + "eval_recall": 0.6272170267655595, + "eval_recall_macro": 0.7821604539875966, + "eval_runtime": 0.2764, + "eval_samples_per_second": 589.68, + "eval_steps_per_second": 3.618, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1188 + }, + { + "epoch": 67.0, + "eval_accuracy": 0.888346552776083, + "eval_auc": 0.9061457752769495, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6402359108781127, + "eval_f1_macro": 0.7870775124924988, + "eval_loss": 0.29169291257858276, + "eval_pr_auc": 0.6096183698390041, + "eval_precision": 0.6506826506826506, + "eval_precision_macro": 0.7909278839971909, + "eval_pred_class_0": 16665, + "eval_pred_class_1": 3003, + "eval_predicted_binding_ratio": 0.15268456375838926, + "eval_recall": 0.6301193163495646, + "eval_recall_macro": 0.783400335424737, + "eval_runtime": 0.2635, + "eval_samples_per_second": 618.516, + "eval_steps_per_second": 3.795, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1206 + }, + { + "epoch": 68.0, + "eval_accuracy": 0.8887533048606874, + "eval_auc": 0.9074730837522218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.641781270464964, + "eval_f1_macro": 0.7879665952661885, + "eval_loss": 0.2904839515686035, + "eval_pr_auc": 0.6127271933864005, + "eval_precision": 0.6518124376454939, + "eval_precision_macro": 0.7916645766644131, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6320541760722348, + "eval_recall_macro": 0.7844281262446042, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.958, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1224 + }, + { + "epoch": 69.0, + "eval_accuracy": 0.889261744966443, + "eval_auc": 0.9085174295528148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6431847968545217, + "eval_f1_macro": 0.788822778783544, + "eval_loss": 0.28959015011787415, + "eval_pr_auc": 0.6152976575518759, + "eval_precision": 0.6536796536796536, + "eval_precision_macro": 0.7926964124983926, + "eval_pred_class_0": 16665, + "eval_pred_class_1": 3003, + "eval_predicted_binding_ratio": 0.15268456375838926, + "eval_recall": 0.6330216059335698, + "eval_recall_macro": 0.7851231045301337, + "eval_runtime": 0.2452, + "eval_samples_per_second": 664.894, + "eval_steps_per_second": 4.079, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1242 + }, + { + "epoch": 70.0, + "eval_accuracy": 0.889363432987594, + "eval_auc": 0.9095093658465239, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6446766819072501, + "eval_f1_macro": 0.7895790973067505, + "eval_loss": 0.2887136936187744, + "eval_pr_auc": 0.6176593727552148, + "eval_precision": 0.6529937148527952, + "eval_precision_macro": 0.7926428472131204, + "eval_pred_class_0": 16645, + "eval_pred_class_1": 3023, + "eval_predicted_binding_ratio": 0.15370144396990035, + "eval_recall": 0.636568848758465, + "eval_recall_macro": 0.7866251016291872, + "eval_runtime": 0.2634, + "eval_samples_per_second": 618.817, + "eval_steps_per_second": 3.796, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1260 + }, + { + "epoch": 71.0, + "eval_accuracy": 0.889821029082774, + "eval_auc": 0.9104295930879169, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6464349812367434, + "eval_f1_macro": 0.7905888279869988, + "eval_loss": 0.28783899545669556, + "eval_pr_auc": 0.619972501272285, + "eval_precision": 0.6542272126816381, + "eval_precision_macro": 0.7934597601869728, + "eval_pred_class_0": 16640, + "eval_pred_class_1": 3028, + "eval_predicted_binding_ratio": 0.1539556640227781, + "eval_recall": 0.6388261851015802, + "eval_recall_macro": 0.7878141307592769, + "eval_runtime": 0.2615, + "eval_samples_per_second": 623.334, + "eval_steps_per_second": 3.824, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1278 + }, + { + "epoch": 72.0, + "eval_accuracy": 0.8899735611145008, + "eval_auc": 0.911281293804153, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.646289637136319, + "eval_f1_macro": 0.7905721170208057, + "eval_loss": 0.28697267174720764, + "eval_pr_auc": 0.6225153426830469, + "eval_precision": 0.6552867086509778, + "eval_precision_macro": 0.7938916277024632, + "eval_pred_class_0": 16651, + "eval_pred_class_1": 3017, + "eval_predicted_binding_ratio": 0.15339637990644703, + "eval_recall": 0.6375362786198001, + "eval_recall_macro": 0.7873804408732489, + "eval_runtime": 0.2764, + "eval_samples_per_second": 589.762, + "eval_steps_per_second": 3.618, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1296 + }, + { + "epoch": 73.0, + "eval_accuracy": 0.890736221273134, + "eval_auc": 0.911925575502615, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6505122784192552, + "eval_f1_macro": 0.7928790035842321, + "eval_loss": 0.2863345444202423, + "eval_pr_auc": 0.6235765349975187, + "eval_precision": 0.6561679790026247, + "eval_precision_macro": 0.7949612458190018, + "eval_pred_class_0": 16620, + "eval_pred_class_1": 3048, + "eval_predicted_binding_ratio": 0.1549725442342892, + "eval_recall": 0.6449532408900355, + "eval_recall_macro": 0.7908474781742385, + "eval_runtime": 0.2631, + "eval_samples_per_second": 619.574, + "eval_steps_per_second": 3.801, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1314 + }, + { + "epoch": 74.0, + "eval_accuracy": 0.891193817368314, + "eval_auc": 0.9126060328997004, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6506692784851453, + "eval_f1_macro": 0.793115428161573, + "eval_loss": 0.28558436036109924, + "eval_pr_auc": 0.625919370718976, + "eval_precision": 0.6588429752066116, + "eval_precision_macro": 0.7961342196828587, + "eval_pred_class_0": 16643, + "eval_pred_class_1": 3025, + "eval_predicted_binding_ratio": 0.15380313199105144, + "eval_recall": 0.6426959045469204, + "eval_recall_macro": 0.7902016976709372, + "eval_runtime": 0.265, + "eval_samples_per_second": 615.167, + "eval_steps_per_second": 3.774, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1332 + }, + { + "epoch": 75.0, + "eval_accuracy": 0.8916005694529184, + "eval_auc": 0.9132873078266985, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6526555881394591, + "eval_f1_macro": 0.794217425975266, + "eval_loss": 0.28493690490722656, + "eval_pr_auc": 0.6278320531638758, + "eval_precision": 0.6595324333223576, + "eval_precision_macro": 0.7967555738856391, + "eval_pred_class_0": 16631, + "eval_pred_class_1": 3037, + "eval_predicted_binding_ratio": 0.1544132601179581, + "eval_recall": 0.6459206707513705, + "eval_recall_macro": 0.7917537198146302, + "eval_runtime": 0.2564, + "eval_samples_per_second": 635.773, + "eval_steps_per_second": 3.9, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1350 + }, + { + "epoch": 76.0, + "eval_accuracy": 0.8920073215375229, + "eval_auc": 0.9139370494570753, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6551948051948052, + "eval_f1_macro": 0.7955863102414826, + "eval_loss": 0.28430166840553284, + "eval_pr_auc": 0.6292546024902547, + "eval_precision": 0.6596927100359594, + "eval_precision_macro": 0.7972435493102309, + "eval_pred_class_0": 16609, + "eval_pred_class_1": 3059, + "eval_predicted_binding_ratio": 0.1555318283506203, + "eval_recall": 0.6507578200580458, + "eval_recall_macro": 0.7939610311131058, + "eval_runtime": 0.2569, + "eval_samples_per_second": 634.459, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1368 + }, + { + "epoch": 77.0, + "eval_accuracy": 0.8921598535692495, + "eval_auc": 0.9146080371326758, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6542787286063569, + "eval_f1_macro": 0.7951975553215214, + "eval_loss": 0.283497154712677, + "eval_pr_auc": 0.6315022943889131, + "eval_precision": 0.6615029663810151, + "eval_precision_macro": 0.7978670296615908, + "eval_pred_class_0": 16634, + "eval_pred_class_1": 3034, + "eval_predicted_binding_ratio": 0.15426072808623145, + "eval_recall": 0.6472105772331506, + "eval_recall_macro": 0.7926099364103822, + "eval_runtime": 0.2203, + "eval_samples_per_second": 740.067, + "eval_steps_per_second": 4.54, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1386 + }, + { + "epoch": 78.0, + "eval_accuracy": 0.8924140736221273, + "eval_auc": 0.9151027497871649, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.655037495924356, + "eval_f1_macro": 0.7956531976941219, + "eval_loss": 0.2829034626483917, + "eval_pr_auc": 0.6331226154536788, + "eval_precision": 0.6623804813715793, + "eval_precision_macro": 0.7983678781970611, + "eval_pred_class_0": 16635, + "eval_pred_class_1": 3033, + "eval_predicted_binding_ratio": 0.1542098840756559, + "eval_recall": 0.6478555304740407, + "eval_recall_macro": 0.7930229544686254, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.571, + "eval_steps_per_second": 3.899, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1404 + }, + { + "epoch": 79.0, + "eval_accuracy": 0.8925157616432784, + "eval_auc": 0.9156585533376076, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6559244791666666, + "eval_f1_macro": 0.7961172166862497, + "eval_loss": 0.28232645988464355, + "eval_pr_auc": 0.6343326075351273, + "eval_precision": 0.6621754847190273, + "eval_precision_macro": 0.7984260882241754, + "eval_pred_class_0": 16625, + "eval_pred_class_1": 3043, + "eval_predicted_binding_ratio": 0.15471832418141143, + "eval_recall": 0.6497903901967107, + "eval_recall_macro": 0.7938696624128962, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.665, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1422 + }, + { + "epoch": 80.0, + "eval_accuracy": 0.8929733577384584, + "eval_auc": 0.9162826303682348, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6572219508223416, + "eval_f1_macro": 0.7969043930945569, + "eval_loss": 0.28162533044815063, + "eval_pr_auc": 0.6363164977912346, + "eval_precision": 0.6638157894736842, + "eval_precision_macro": 0.7993423426560147, + "eval_pred_class_0": 16628, + "eval_pred_class_1": 3040, + "eval_predicted_binding_ratio": 0.15456579214968477, + "eval_recall": 0.6507578200580458, + "eval_recall_macro": 0.79453446021916, + "eval_runtime": 0.272, + "eval_samples_per_second": 599.266, + "eval_steps_per_second": 3.676, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1440 + }, + { + "epoch": 81.0, + "eval_accuracy": 0.8928208257067317, + "eval_auc": 0.9167705886684476, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.657124268054652, + "eval_f1_macro": 0.7968036671115732, + "eval_loss": 0.2811121940612793, + "eval_pr_auc": 0.6378571407612313, + "eval_precision": 0.6629471611421069, + "eval_precision_macro": 0.7989544782306408, + "eval_pred_class_0": 16621, + "eval_pred_class_1": 3047, + "eval_predicted_binding_ratio": 0.15492170022371365, + "eval_recall": 0.6514027732989358, + "eval_recall_macro": 0.7947060344432748, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.342, + "eval_steps_per_second": 3.843, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1458 + }, + { + "epoch": 82.0, + "eval_accuracy": 0.893125889770185, + "eval_auc": 0.9172343422437541, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6581001951854262, + "eval_f1_macro": 0.7973820247953165, + "eval_loss": 0.2804972231388092, + "eval_pr_auc": 0.6394259907419034, + "eval_precision": 0.6639317361339022, + "eval_precision_macro": 0.7995370130040789, + "eval_pred_class_0": 16621, + "eval_pred_class_1": 3047, + "eval_predicted_binding_ratio": 0.15492170022371365, + "eval_recall": 0.6523702031602708, + "eval_recall_macro": 0.7952802908117405, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.86, + "eval_steps_per_second": 3.827, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1476 + }, + { + "epoch": 83.0, + "eval_accuracy": 0.8933801098230628, + "eval_auc": 0.9176662705474707, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6597436313483693, + "eval_f1_macro": 0.7982647858607822, + "eval_loss": 0.28001976013183594, + "eval_pr_auc": 0.6407525699560299, + "eval_precision": 0.6639451338994121, + "eval_precision_macro": 0.7998155152816343, + "eval_pred_class_0": 16606, + "eval_pred_class_1": 3062, + "eval_predicted_binding_ratio": 0.15568436038234695, + "eval_recall": 0.6555949693647211, + "eval_recall_macro": 0.7967417715176355, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.967, + "eval_steps_per_second": 3.902, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1494 + }, + { + "epoch": 83.33333333333333, + "grad_norm": 11845.0048828125, + "learning_rate": 9.86567120987093e-07, + "loss": 0.2741, + "step": 1500 + }, + { + "epoch": 84.0, + "eval_accuracy": 0.8932275777913362, + "eval_auc": 0.9181069872977458, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6592015579357352, + "eval_f1_macro": 0.7979494857864604, + "eval_loss": 0.2794816195964813, + "eval_pr_auc": 0.6421665674351047, + "eval_precision": 0.6635086573015354, + "eval_precision_macro": 0.799538997766201, + "eval_pred_class_0": 16607, + "eval_pred_class_1": 3061, + "eval_predicted_binding_ratio": 0.15563351637177142, + "eval_recall": 0.654950016123831, + "eval_recall_macro": 0.7963891144179245, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.121, + "eval_steps_per_second": 3.841, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1512 + }, + { + "epoch": 85.0, + "eval_accuracy": 0.8932275777913362, + "eval_auc": 0.9184344761551537, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6595330739299611, + "eval_f1_macro": 0.7981095181516664, + "eval_loss": 0.27904370427131653, + "eval_pr_auc": 0.6429990318434126, + "eval_precision": 0.6631887838278449, + "eval_precision_macro": 0.7994577736379149, + "eval_pred_class_0": 16601, + "eval_pred_class_1": 3067, + "eval_predicted_binding_ratio": 0.15593858043522474, + "eval_recall": 0.6559174459851661, + "eval_recall_macro": 0.796782287910794, + "eval_runtime": 0.2526, + "eval_samples_per_second": 645.288, + "eval_steps_per_second": 3.959, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1530 + }, + { + "epoch": 86.0, + "eval_accuracy": 0.8933292658124873, + "eval_auc": 0.9189190864757253, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6585286458333334, + "eval_f1_macro": 0.797660321952579, + "eval_loss": 0.27837634086608887, + "eval_pr_auc": 0.6447520419594072, + "eval_precision": 0.664804469273743, + "eval_precision_macro": 0.7999811820052926, + "eval_pred_class_0": 16625, + "eval_pred_class_1": 3043, + "eval_predicted_binding_ratio": 0.15471832418141143, + "eval_recall": 0.6523702031602708, + "eval_recall_macro": 0.7954010127288045, + "eval_runtime": 0.2401, + "eval_samples_per_second": 678.873, + "eval_steps_per_second": 4.165, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1548 + }, + { + "epoch": 87.0, + "eval_accuracy": 0.8936851738865161, + "eval_auc": 0.9193183038504471, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6602761982128351, + "eval_f1_macro": 0.7986291029941847, + "eval_loss": 0.27788689732551575, + "eval_pr_auc": 0.6462039154555116, + "eval_precision": 0.6653569089718402, + "eval_precision_macro": 0.8005067920325675, + "eval_pred_class_0": 16614, + "eval_pred_class_1": 3054, + "eval_predicted_binding_ratio": 0.15527760829774254, + "eval_recall": 0.655272492744276, + "eval_recall_macro": 0.7967917965622751, + "eval_runtime": 0.2547, + "eval_samples_per_second": 640.0, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1566 + }, + { + "epoch": 88.0, + "eval_accuracy": 0.8941427699816962, + "eval_auc": 0.9197055599839506, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.662012987012987, + "eval_f1_macro": 0.7996283888525269, + "eval_loss": 0.2774609923362732, + "eval_pr_auc": 0.6474319229516793, + "eval_precision": 0.6665576985943119, + "eval_precision_macro": 0.8013082309577014, + "eval_pred_class_0": 16609, + "eval_pred_class_1": 3059, + "eval_predicted_binding_ratio": 0.1555318283506203, + "eval_recall": 0.6575298290873912, + "eval_recall_macro": 0.7979808256923646, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.753, + "eval_steps_per_second": 4.005, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1584 + }, + { + "epoch": 89.0, + "eval_accuracy": 0.8946003660768761, + "eval_auc": 0.9201319018332661, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6636378387149116, + "eval_f1_macro": 0.8005736295133055, + "eval_loss": 0.27695581316947937, + "eval_pr_auc": 0.6490255599400047, + "eval_precision": 0.6678641410842586, + "eval_precision_macro": 0.8021363340613392, + "eval_pred_class_0": 16606, + "eval_pred_class_1": 3062, + "eval_predicted_binding_ratio": 0.15568436038234695, + "eval_recall": 0.6594646888100613, + "eval_recall_macro": 0.7990387969914977, + "eval_runtime": 0.269, + "eval_samples_per_second": 606.045, + "eval_steps_per_second": 3.718, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1602 + }, + { + "epoch": 90.0, + "eval_accuracy": 0.8950579621720561, + "eval_auc": 0.920403555344157, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6648262422864566, + "eval_f1_macro": 0.8013081720805965, + "eval_loss": 0.27657878398895264, + "eval_pr_auc": 0.6500340451314033, + "eval_precision": 0.6696107294733399, + "eval_precision_macro": 0.8030794000144978, + "eval_pred_class_0": 16611, + "eval_pred_class_1": 3057, + "eval_predicted_binding_ratio": 0.1554301403294692, + "eval_recall": 0.6601096420509514, + "eval_recall_macro": 0.7995725369668047, + "eval_runtime": 0.2609, + "eval_samples_per_second": 624.642, + "eval_steps_per_second": 3.832, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1620 + }, + { + "epoch": 91.0, + "eval_accuracy": 0.8953630262355095, + "eval_auc": 0.9207465675374016, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6654746423927178, + "eval_f1_macro": 0.8017284012349317, + "eval_loss": 0.27611449360847473, + "eval_pr_auc": 0.6512249430736048, + "eval_precision": 0.6709275647328745, + "eval_precision_macro": 0.8037492731289094, + "eval_pred_class_0": 16617, + "eval_pred_class_1": 3051, + "eval_predicted_binding_ratio": 0.15512507626601588, + "eval_recall": 0.6601096420509514, + "eval_recall_macro": 0.7997536198424009, + "eval_runtime": 0.2605, + "eval_samples_per_second": 625.736, + "eval_steps_per_second": 3.839, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1638 + }, + { + "epoch": 92.0, + "eval_accuracy": 0.8954647142566605, + "eval_auc": 0.9211146506479597, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6662337662337663, + "eval_f1_macro": 0.802130627992697, + "eval_loss": 0.275691956281662, + "eval_pr_auc": 0.6524138573777828, + "eval_precision": 0.6708074534161491, + "eval_precision_macro": 0.8038244624537546, + "eval_pred_class_0": 16609, + "eval_pred_class_1": 3059, + "eval_predicted_binding_ratio": 0.1555318283506203, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8004692699557154, + "eval_runtime": 0.2702, + "eval_samples_per_second": 603.318, + "eval_steps_per_second": 3.701, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1656 + }, + { + "epoch": 93.0, + "eval_accuracy": 0.8956680902989628, + "eval_auc": 0.9214137790034065, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6666666666666666, + "eval_f1_macro": 0.8024110910186859, + "eval_loss": 0.27531710267066956, + "eval_pr_auc": 0.6535015844647576, + "eval_precision": 0.6716857610474631, + "eval_precision_macro": 0.8042712197761243, + "eval_pred_class_0": 16613, + "eval_pred_class_1": 3055, + "eval_predicted_binding_ratio": 0.15532845230831807, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8005899918727795, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.638, + "eval_steps_per_second": 3.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1674 + }, + { + "epoch": 94.0, + "eval_accuracy": 0.8956172462883872, + "eval_auc": 0.9217489565349906, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6664500406173842, + "eval_f1_macro": 0.802288641055505, + "eval_loss": 0.27483227849006653, + "eval_pr_auc": 0.6546621751515824, + "eval_precision": 0.6715782580222659, + "eval_precision_macro": 0.8041892734676155, + "eval_pred_class_0": 16614, + "eval_pred_class_1": 3054, + "eval_predicted_binding_ratio": 0.15527760829774254, + "eval_recall": 0.6613995485327314, + "eval_recall_macro": 0.8004287535625569, + "eval_runtime": 0.2382, + "eval_samples_per_second": 684.396, + "eval_steps_per_second": 4.199, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1692 + }, + { + "epoch": 95.0, + "eval_accuracy": 0.8959731543624161, + "eval_auc": 0.9220882801111304, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6672088484059857, + "eval_f1_macro": 0.8027800298435859, + "eval_loss": 0.27437010407447815, + "eval_pr_auc": 0.6561534178394561, + "eval_precision": 0.6731211027239908, + "eval_precision_macro": 0.8049740042228342, + "eval_pred_class_0": 16621, + "eval_pred_class_1": 3047, + "eval_predicted_binding_ratio": 0.15492170022371365, + "eval_recall": 0.6613995485327314, + "eval_recall_macro": 0.800640016917419, + "eval_runtime": 0.2619, + "eval_samples_per_second": 622.273, + "eval_steps_per_second": 3.818, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1710 + }, + { + "epoch": 96.0, + "eval_accuracy": 0.8958206223306895, + "eval_auc": 0.9223952917907324, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6666666666666666, + "eval_f1_macro": 0.8024646720298894, + "eval_loss": 0.27396437525749207, + "eval_pr_auc": 0.6571939169041311, + "eval_precision": 0.6726854891661195, + "eval_precision_macro": 0.8046979364973901, + "eval_pred_class_0": 16622, + "eval_pred_class_1": 3046, + "eval_predicted_binding_ratio": 0.1548708562131381, + "eval_recall": 0.6607545952918413, + "eval_recall_macro": 0.8002873598177079, + "eval_runtime": 0.2627, + "eval_samples_per_second": 620.423, + "eval_steps_per_second": 3.806, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1728 + }, + { + "epoch": 97.0, + "eval_accuracy": 0.8960239983729916, + "eval_auc": 0.9226829844599047, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6679655788277318, + "eval_f1_macro": 0.8031632457541016, + "eval_loss": 0.2735843360424042, + "eval_pr_auc": 0.6581443956834908, + "eval_precision": 0.6726618705035972, + "eval_precision_macro": 0.8049040839573975, + "eval_pred_class_0": 16610, + "eval_pred_class_1": 3058, + "eval_predicted_binding_ratio": 0.15548098434004473, + "eval_recall": 0.6633344082554015, + "eval_recall_macro": 0.801456544382424, + "eval_runtime": 0.2325, + "eval_samples_per_second": 701.163, + "eval_steps_per_second": 4.302, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1746 + }, + { + "epoch": 98.0, + "eval_accuracy": 0.8961765304047183, + "eval_auc": 0.9230679047936586, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.668075422626788, + "eval_f1_macro": 0.8032698713905396, + "eval_loss": 0.2730555534362793, + "eval_pr_auc": 0.6597232694357793, + "eval_precision": 0.6735496558505408, + "eval_precision_macro": 0.8053010360254088, + "eval_pred_class_0": 16617, + "eval_pred_class_1": 3051, + "eval_predicted_binding_ratio": 0.15512507626601588, + "eval_recall": 0.6626894550145115, + "eval_recall_macro": 0.801284970158309, + "eval_runtime": 0.2564, + "eval_samples_per_second": 635.817, + "eval_steps_per_second": 3.901, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1764 + }, + { + "epoch": 99.0, + "eval_accuracy": 0.8961256863941428, + "eval_auc": 0.9233850285396773, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6689353427321342, + "eval_f1_macro": 0.8036671286252258, + "eval_loss": 0.272703617811203, + "eval_pr_auc": 0.6608360801532023, + "eval_precision": 0.6723127035830619, + "eval_precision_macro": 0.8049176483332829, + "eval_pred_class_0": 16598, + "eval_pred_class_1": 3070, + "eval_predicted_binding_ratio": 0.1560911124669514, + "eval_recall": 0.6655917445985166, + "eval_recall_macro": 0.8024343101576514, + "eval_runtime": 0.2609, + "eval_samples_per_second": 624.703, + "eval_steps_per_second": 3.833, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1782 + }, + { + "epoch": 100.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9236704438040937, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6696891191709845, + "eval_f1_macro": 0.804084607836397, + "eval_loss": 0.27227067947387695, + "eval_pr_auc": 0.661859432748859, + "eval_precision": 0.672520325203252, + "eval_precision_macro": 0.8051325786806955, + "eval_pred_class_0": 16593, + "eval_pred_class_1": 3075, + "eval_predicted_binding_ratio": 0.15634533251982916, + "eval_recall": 0.6668816510802967, + "eval_recall_macro": 0.8030490829192756, + "eval_runtime": 0.2555, + "eval_samples_per_second": 638.071, + "eval_steps_per_second": 3.915, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1800 + }, + { + "epoch": 101.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9239191675474416, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6695821185617103, + "eval_f1_macro": 0.8040329626642457, + "eval_loss": 0.2719270884990692, + "eval_pr_auc": 0.6626733311213273, + "eval_precision": 0.6726326065733811, + "eval_precision_macro": 0.8051623412499325, + "eval_pred_class_0": 16595, + "eval_pred_class_1": 3073, + "eval_predicted_binding_ratio": 0.15624364449867806, + "eval_recall": 0.6665591744598517, + "eval_recall_macro": 0.802918025088319, + "eval_runtime": 0.2736, + "eval_samples_per_second": 595.794, + "eval_steps_per_second": 3.655, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1818 + }, + { + "epoch": 102.0, + "eval_accuracy": 0.896786658531625, + "eval_auc": 0.9242257996595844, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6714146973130463, + "eval_f1_macro": 0.805096334723234, + "eval_loss": 0.271486759185791, + "eval_pr_auc": 0.6637007390734015, + "eval_precision": 0.6740331491712708, + "eval_precision_macro": 0.8060660592459934, + "eval_pred_class_0": 16591, + "eval_pred_class_1": 3077, + "eval_predicted_binding_ratio": 0.15644702054098028, + "eval_recall": 0.6688165108029668, + "eval_recall_macro": 0.8041372346976746, + "eval_runtime": 0.2489, + "eval_samples_per_second": 654.758, + "eval_steps_per_second": 4.017, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1836 + }, + { + "epoch": 103.0, + "eval_accuracy": 0.8973459426479561, + "eval_auc": 0.9244978230054357, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.673248098397799, + "eval_f1_macro": 0.8061779895433214, + "eval_loss": 0.2711206376552582, + "eval_pr_auc": 0.6646142778873767, + "eval_precision": 0.6757634827810266, + "eval_precision_macro": 0.8071101922645338, + "eval_pred_class_0": 16590, + "eval_pred_class_1": 3078, + "eval_predicted_binding_ratio": 0.15649786455155582, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.8052555669553397, + "eval_runtime": 0.2242, + "eval_samples_per_second": 727.107, + "eval_steps_per_second": 4.461, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1854 + }, + { + "epoch": 104.0, + "eval_accuracy": 0.8971934106162294, + "eval_auc": 0.9248346842593396, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6732385261797027, + "eval_f1_macro": 0.8061196854381076, + "eval_loss": 0.2707342207431793, + "eval_pr_auc": 0.6659244139495173, + "eval_precision": 0.6747651441528992, + "eval_precision_macro": 0.8066847854532062, + "eval_pred_class_0": 16581, + "eval_pred_class_1": 3087, + "eval_predicted_binding_ratio": 0.15695546064673582, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8055581990104113, + "eval_runtime": 0.255, + "eval_samples_per_second": 639.197, + "eval_steps_per_second": 3.921, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1872 + }, + { + "epoch": 105.0, + "eval_accuracy": 0.8975493186902583, + "eval_auc": 0.9250955152313902, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6746326497658647, + "eval_f1_macro": 0.8069177490147248, + "eval_loss": 0.2704195976257324, + "eval_pr_auc": 0.666907608407933, + "eval_precision": 0.675614489003881, + "eval_precision_macro": 0.8072811827258788, + "eval_pred_class_0": 16576, + "eval_pred_class_1": 3092, + "eval_predicted_binding_ratio": 0.15720968069961358, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8065558093510122, + "eval_runtime": 0.2579, + "eval_samples_per_second": 631.977, + "eval_steps_per_second": 3.877, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1890 + }, + { + "epoch": 106.0, + "eval_accuracy": 0.8977018507219849, + "eval_auc": 0.9254069649305167, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6749596122778675, + "eval_f1_macro": 0.8071292359343842, + "eval_loss": 0.26996490359306335, + "eval_pr_auc": 0.6681450085536085, + "eval_precision": 0.6762706377468436, + "eval_precision_macro": 0.8076147808433838, + "eval_pred_class_0": 16579, + "eval_pred_class_1": 3089, + "eval_predicted_binding_ratio": 0.15705714866788692, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8066463507888102, + "eval_runtime": 0.2667, + "eval_samples_per_second": 611.086, + "eval_steps_per_second": 3.749, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1908 + }, + { + "epoch": 107.0, + "eval_accuracy": 0.8980577587960138, + "eval_auc": 0.9256414889578861, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6758286176232822, + "eval_f1_macro": 0.8076738937412058, + "eval_loss": 0.2696084678173065, + "eval_pr_auc": 0.6691135839215693, + "eval_precision": 0.6776913099870299, + "eval_precision_macro": 0.8083644683075526, + "eval_pred_class_0": 16584, + "eval_pred_class_1": 3084, + "eval_predicted_binding_ratio": 0.15680292861500916, + "eval_recall": 0.673976136730087, + "eval_recall_macro": 0.8069886719746289, + "eval_runtime": 0.2591, + "eval_samples_per_second": 629.158, + "eval_steps_per_second": 3.86, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1926 + }, + { + "epoch": 108.0, + "eval_accuracy": 0.8980069147854383, + "eval_auc": 0.9258192627838369, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6769726247987118, + "eval_f1_macro": 0.8082079811791663, + "eval_loss": 0.26944610476493835, + "eval_pr_auc": 0.6696389857739906, + "eval_precision": 0.676101640398842, + "eval_precision_macro": 0.8078859551713395, + "eval_pred_class_0": 16559, + "eval_pred_class_1": 3109, + "eval_predicted_binding_ratio": 0.158074028879398, + "eval_recall": 0.6778458561754273, + "eval_recall_macro": 0.8085311854668409, + "eval_runtime": 0.261, + "eval_samples_per_second": 624.512, + "eval_steps_per_second": 3.831, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1944 + }, + { + "epoch": 109.0, + "eval_accuracy": 0.898159446817165, + "eval_auc": 0.9261110139050743, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.676779086654833, + "eval_f1_macro": 0.8081683537924276, + "eval_loss": 0.26896923780441284, + "eval_pr_auc": 0.6708410126864026, + "eval_precision": 0.6773255813953488, + "eval_precision_macro": 0.8083707318031536, + "eval_pred_class_0": 16572, + "eval_pred_class_1": 3096, + "eval_predicted_binding_ratio": 0.1574130567419158, + "eval_recall": 0.6762334730732021, + "eval_recall_macro": 0.8079664377498563, + "eval_runtime": 0.2579, + "eval_samples_per_second": 632.096, + "eval_steps_per_second": 3.878, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1962 + }, + { + "epoch": 110.0, + "eval_accuracy": 0.8985153548911938, + "eval_auc": 0.9263511243868452, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6783757653883339, + "eval_f1_macro": 0.809064127789247, + "eval_loss": 0.26861709356307983, + "eval_pr_auc": 0.6718712574127733, + "eval_precision": 0.677938808373591, + "eval_precision_macro": 0.808902387342021, + "eval_pred_class_0": 16563, + "eval_pred_class_1": 3105, + "eval_predicted_binding_ratio": 0.15787065283709578, + "eval_recall": 0.6788132860367624, + "eval_recall_macro": 0.8092261637523704, + "eval_runtime": 0.2543, + "eval_samples_per_second": 640.936, + "eval_steps_per_second": 3.932, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1980 + }, + { + "epoch": 111.0, + "eval_accuracy": 0.8989729509863738, + "eval_auc": 0.9265832055569767, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6794644297467334, + "eval_f1_macro": 0.8097506232989936, + "eval_loss": 0.2682516574859619, + "eval_pr_auc": 0.6727910026400046, + "eval_precision": 0.6797934151065204, + "eval_precision_macro": 0.8098725675411902, + "eval_pred_class_0": 16570, + "eval_pred_class_1": 3098, + "eval_predicted_binding_ratio": 0.1575147447630669, + "eval_recall": 0.6791357626572073, + "eval_recall_macro": 0.809628845896721, + "eval_runtime": 0.2091, + "eval_samples_per_second": 779.644, + "eval_steps_per_second": 4.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1998 + }, + { + "epoch": 111.11111111111111, + "grad_norm": 13330.4609375, + "learning_rate": 9.552616846852138e-07, + "loss": 0.252, + "step": 2000 + }, + { + "epoch": 112.0, + "eval_accuracy": 0.899176327028676, + "eval_auc": 0.9269119888367457, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6793856103476152, + "eval_f1_macro": 0.8097842051315767, + "eval_loss": 0.2677896022796631, + "eval_pr_auc": 0.6743175064299574, + "eval_precision": 0.6812581063553826, + "eval_precision_macro": 0.8104795114507255, + "eval_pred_class_0": 16584, + "eval_pred_class_1": 3084, + "eval_predicted_binding_ratio": 0.15680292861500916, + "eval_recall": 0.6775233795549823, + "eval_recall_macro": 0.8090942786590025, + "eval_runtime": 0.2454, + "eval_samples_per_second": 664.193, + "eval_steps_per_second": 4.075, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2016 + }, + { + "epoch": 113.0, + "eval_accuracy": 0.8994813910921293, + "eval_auc": 0.927058521341044, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6812832500403031, + "eval_f1_macro": 0.8108073208521016, + "eval_loss": 0.26760444045066833, + "eval_pr_auc": 0.6746134464200654, + "eval_precision": 0.6811734364925854, + "eval_precision_macro": 0.8107666047608406, + "eval_pred_class_0": 16566, + "eval_pred_class_1": 3102, + "eval_predicted_binding_ratio": 0.15771812080536912, + "eval_recall": 0.6813930990003225, + "eval_recall_macro": 0.8108480555060766, + "eval_runtime": 0.2588, + "eval_samples_per_second": 629.901, + "eval_steps_per_second": 3.864, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2034 + }, + { + "epoch": 114.0, + "eval_accuracy": 0.8993797030709783, + "eval_auc": 0.9272620862892311, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6811664250040277, + "eval_f1_macro": 0.81071512110173, + "eval_loss": 0.267299622297287, + "eval_pr_auc": 0.6753372489001316, + "eval_precision": 0.6806181584030908, + "eval_precision_macro": 0.8105119532505733, + "eval_pred_class_0": 16562, + "eval_pred_class_1": 3106, + "eval_predicted_binding_ratio": 0.15792149684767134, + "eval_recall": 0.6817155756207675, + "eval_recall_macro": 0.8109187523785011, + "eval_runtime": 0.2924, + "eval_samples_per_second": 557.453, + "eval_steps_per_second": 3.42, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2052 + }, + { + "epoch": 115.0, + "eval_accuracy": 0.8994813910921293, + "eval_auc": 0.9274648142425077, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6817962337035248, + "eval_f1_macro": 0.8110548055574955, + "eval_loss": 0.2670327126979828, + "eval_pr_auc": 0.6759104665767571, + "eval_precision": 0.6805912596401028, + "eval_precision_macro": 0.8106085073266955, + "eval_pred_class_0": 16556, + "eval_pred_class_1": 3112, + "eval_predicted_binding_ratio": 0.15822656091112466, + "eval_recall": 0.6830054821025475, + "eval_recall_macro": 0.811503344660859, + "eval_runtime": 0.2501, + "eval_samples_per_second": 651.841, + "eval_steps_per_second": 3.999, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2070 + }, + { + "epoch": 116.0, + "eval_accuracy": 0.8995830791132805, + "eval_auc": 0.927707601161492, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6824248271426274, + "eval_f1_macro": 0.8113938913621764, + "eval_loss": 0.2667410373687744, + "eval_pr_auc": 0.676645416517246, + "eval_precision": 0.6805644644002565, + "eval_precision_macro": 0.8107051929252038, + "eval_pred_class_0": 16550, + "eval_pred_class_1": 3118, + "eval_predicted_binding_ratio": 0.15853162497457798, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.812087936943217, + "eval_runtime": 0.2676, + "eval_samples_per_second": 609.094, + "eval_steps_per_second": 3.737, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2088 + }, + { + "epoch": 117.0, + "eval_accuracy": 0.8998881431767338, + "eval_auc": 0.927944061956154, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6828796907714608, + "eval_f1_macro": 0.8117208850210732, + "eval_loss": 0.26635268330574036, + "eval_pr_auc": 0.6777081363329963, + "eval_precision": 0.6821106821106822, + "eval_precision_macro": 0.8114357758379498, + "eval_pred_class_0": 16560, + "eval_pred_class_1": 3108, + "eval_predicted_binding_ratio": 0.15802318486882244, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8120069041569002, + "eval_runtime": 0.2624, + "eval_samples_per_second": 621.137, + "eval_steps_per_second": 3.811, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2106 + }, + { + "epoch": 118.0, + "eval_accuracy": 0.9001932072401871, + "eval_auc": 0.9282739839383012, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6831315577078289, + "eval_f1_macro": 0.8119498952052617, + "eval_loss": 0.2659379541873932, + "eval_pr_auc": 0.6790830976589266, + "eval_precision": 0.6839043309631545, + "eval_precision_macro": 0.8122369488772572, + "eval_pred_class_0": 16574, + "eval_pred_class_1": 3094, + "eval_predicted_binding_ratio": 0.1573113687207647, + "eval_recall": 0.6823605288616575, + "eval_recall_macro": 0.8116637557086703, + "eval_runtime": 0.2592, + "eval_samples_per_second": 628.745, + "eval_steps_per_second": 3.857, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2124 + }, + { + "epoch": 119.0, + "eval_accuracy": 0.9001932072401871, + "eval_auc": 0.9284926050623749, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6838460299565148, + "eval_f1_macro": 0.812294615183528, + "eval_loss": 0.2656570076942444, + "eval_pr_auc": 0.6798372835892805, + "eval_precision": 0.6830759330759331, + "eval_precision_macro": 0.8120089810307202, + "eval_pred_class_0": 16560, + "eval_pred_class_1": 3108, + "eval_predicted_binding_ratio": 0.15802318486882244, + "eval_recall": 0.6846178652047726, + "eval_recall_macro": 0.8125811605253657, + "eval_runtime": 0.2617, + "eval_samples_per_second": 622.824, + "eval_steps_per_second": 3.821, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2142 + }, + { + "epoch": 120.0, + "eval_accuracy": 0.9001932072401871, + "eval_auc": 0.9287595480437707, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6841512469831054, + "eval_f1_macro": 0.8124418563951485, + "eval_loss": 0.26531943678855896, + "eval_pr_auc": 0.6808986584956077, + "eval_precision": 0.682723185613359, + "eval_precision_macro": 0.8119125170545953, + "eval_pred_class_0": 16554, + "eval_pred_class_1": 3114, + "eval_predicted_binding_ratio": 0.1583282489322758, + "eval_recall": 0.6855852950661077, + "eval_recall_macro": 0.8129743340182352, + "eval_runtime": 0.2686, + "eval_samples_per_second": 606.857, + "eval_steps_per_second": 3.723, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2160 + }, + { + "epoch": 121.0, + "eval_accuracy": 0.9005999593247915, + "eval_auc": 0.9289936341086871, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6851344822032533, + "eval_f1_macro": 0.8130595887334677, + "eval_loss": 0.26495063304901123, + "eval_pr_auc": 0.6818525990139518, + "eval_precision": 0.6843629343629344, + "eval_precision_macro": 0.8127732546210807, + "eval_pred_class_0": 16560, + "eval_pred_class_1": 3108, + "eval_predicted_binding_ratio": 0.15802318486882244, + "eval_recall": 0.6859077716865527, + "eval_recall_macro": 0.8133468356833198, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.159, + "eval_steps_per_second": 3.817, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2178 + }, + { + "epoch": 122.0, + "eval_accuracy": 0.9010067114093959, + "eval_auc": 0.9291471740122346, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6880307643005928, + "eval_f1_macro": 0.8146000626156236, + "eval_loss": 0.2648627460002899, + "eval_pr_auc": 0.6821768129155847, + "eval_precision": 0.6837579617834395, + "eval_precision_macro": 0.8130188647252145, + "eval_pred_class_0": 16528, + "eval_pred_class_1": 3140, + "eval_predicted_binding_ratio": 0.1596501932072402, + "eval_recall": 0.692357304095453, + "eval_recall_macro": 0.8162094361365779, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.373, + "eval_steps_per_second": 3.849, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2196 + }, + { + "epoch": 123.0, + "eval_accuracy": 0.9011083994305471, + "eval_auc": 0.9293803062922532, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.687851067244423, + "eval_f1_macro": 0.8145493064661928, + "eval_loss": 0.26444998383522034, + "eval_pr_auc": 0.6832369783380787, + "eval_precision": 0.6846645367412141, + "eval_precision_macro": 0.8133686693864494, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.8157455657712839, + "eval_runtime": 0.2527, + "eval_samples_per_second": 645.052, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2214 + }, + { + "epoch": 124.0, + "eval_accuracy": 0.9017185275574537, + "eval_auc": 0.929644952403895, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6885774125986789, + "eval_f1_macro": 0.815114870687036, + "eval_loss": 0.26401567459106445, + "eval_pr_auc": 0.6842971435384069, + "eval_precision": 0.6880231809401159, + "eval_precision_macro": 0.8149088251035563, + "eval_pred_class_0": 16562, + "eval_pred_class_1": 3106, + "eval_predicted_binding_ratio": 0.15792149684767134, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8153213845367371, + "eval_runtime": 0.2233, + "eval_samples_per_second": 729.933, + "eval_steps_per_second": 4.478, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2232 + }, + { + "epoch": 125.0, + "eval_accuracy": 0.9017185275574537, + "eval_auc": 0.9298876614628875, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6892782510850346, + "eval_f1_macro": 0.8154529561328843, + "eval_loss": 0.26374292373657227, + "eval_pr_auc": 0.6852698748697685, + "eval_precision": 0.6871794871794872, + "eval_precision_macro": 0.8146738625165021, + "eval_pred_class_0": 16548, + "eval_pred_class_1": 3120, + "eval_predicted_binding_ratio": 0.1586333129957291, + "eval_recall": 0.691389874234118, + "eval_recall_macro": 0.8162387893534325, + "eval_runtime": 0.2352, + "eval_samples_per_second": 692.981, + "eval_steps_per_second": 4.251, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2250 + }, + { + "epoch": 126.0, + "eval_accuracy": 0.9020235916209071, + "eval_auc": 0.9300761410376911, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6909382518043304, + "eval_f1_macro": 0.8163612439650636, + "eval_loss": 0.2635449767112732, + "eval_pr_auc": 0.685931037905785, + "eval_precision": 0.6873005743458839, + "eval_precision_macro": 0.815012329026093, + "eval_pred_class_0": 16534, + "eval_pred_class_1": 3134, + "eval_predicted_binding_ratio": 0.15934512914378687, + "eval_recall": 0.6946146404385682, + "eval_recall_macro": 0.8177304505385936, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.44, + "eval_steps_per_second": 3.88, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2268 + }, + { + "epoch": 127.0, + "eval_accuracy": 0.902837095790116, + "eval_auc": 0.9303049910181687, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6925181013676589, + "eval_f1_macro": 0.8174102840403102, + "eval_loss": 0.263118714094162, + "eval_pr_auc": 0.6869422132706717, + "eval_precision": 0.691072575465639, + "eval_precision_macro": 0.8168725206674576, + "eval_pred_class_0": 16554, + "eval_pred_class_1": 3114, + "eval_predicted_binding_ratio": 0.1583282489322758, + "eval_recall": 0.6939696871976782, + "eval_recall_macro": 0.8179512225449368, + "eval_runtime": 0.2583, + "eval_samples_per_second": 630.976, + "eval_steps_per_second": 3.871, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2286 + }, + { + "epoch": 128.0, + "eval_accuracy": 0.9027354077689648, + "eval_auc": 0.9304756990498765, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6929866795056974, + "eval_f1_macro": 0.8176004232749752, + "eval_loss": 0.2629205286502838, + "eval_pr_auc": 0.6875064110834537, + "eval_precision": 0.689776357827476, + "eval_precision_macro": 0.8164083143593783, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.6962270235407932, + "eval_recall_macro": 0.8188082664031002, + "eval_runtime": 0.2527, + "eval_samples_per_second": 644.934, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2304 + }, + { + "epoch": 129.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9306633221647718, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6937530110807772, + "eval_f1_macro": 0.818077689508494, + "eval_loss": 0.2625824213027954, + "eval_pr_auc": 0.6881187823465719, + "eval_precision": 0.690978886756238, + "eval_precision_macro": 0.8170466915947796, + "eval_pred_class_0": 16542, + "eval_pred_class_1": 3126, + "eval_predicted_binding_ratio": 0.15893837705918243, + "eval_recall": 0.6965495001612383, + "eval_recall_macro": 0.8191204071096527, + "eval_runtime": 0.2718, + "eval_samples_per_second": 599.717, + "eval_steps_per_second": 3.679, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2322 + }, + { + "epoch": 130.0, + "eval_accuracy": 0.902938783811267, + "eval_auc": 0.9308322783466673, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6933333333333334, + "eval_f1_macro": 0.817839388722781, + "eval_loss": 0.2622954547405243, + "eval_pr_auc": 0.6887035254510873, + "eval_precision": 0.6907810499359796, + "eval_precision_macro": 0.816890766747487, + "eval_pred_class_0": 16544, + "eval_pred_class_1": 3124, + "eval_predicted_binding_ratio": 0.15883668903803133, + "eval_recall": 0.6959045469203483, + "eval_recall_macro": 0.8187979304892078, + "eval_runtime": 0.2604, + "eval_samples_per_second": 625.881, + "eval_steps_per_second": 3.84, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2340 + }, + { + "epoch": 131.0, + "eval_accuracy": 0.9027354077689648, + "eval_auc": 0.9309924460820045, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6929866795056974, + "eval_f1_macro": 0.8176004232749752, + "eval_loss": 0.26203182339668274, + "eval_pr_auc": 0.6893090005690568, + "eval_precision": 0.689776357827476, + "eval_precision_macro": 0.8164083143593783, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.6962270235407932, + "eval_recall_macro": 0.8188082664031002, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.443, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2358 + }, + { + "epoch": 132.0, + "eval_accuracy": 0.902938783811267, + "eval_auc": 0.9312000694822565, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6936286310383566, + "eval_f1_macro": 0.817981812876073, + "eval_loss": 0.26177045702934265, + "eval_pr_auc": 0.6902838377634022, + "eval_precision": 0.6904153354632588, + "eval_precision_macro": 0.8167882699809945, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.6968719767816833, + "eval_recall_macro": 0.8191911039820772, + "eval_runtime": 0.2756, + "eval_samples_per_second": 591.34, + "eval_steps_per_second": 3.628, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2376 + }, + { + "epoch": 133.0, + "eval_accuracy": 0.902938783811267, + "eval_auc": 0.9314264084780033, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6933333333333334, + "eval_f1_macro": 0.817839388722781, + "eval_loss": 0.2614164650440216, + "eval_pr_auc": 0.6912123921690412, + "eval_precision": 0.6907810499359796, + "eval_precision_macro": 0.816890766747487, + "eval_pred_class_0": 16544, + "eval_pred_class_1": 3124, + "eval_predicted_binding_ratio": 0.15883668903803133, + "eval_recall": 0.6959045469203483, + "eval_recall_macro": 0.8187979304892078, + "eval_runtime": 0.2552, + "eval_samples_per_second": 638.769, + "eval_steps_per_second": 3.919, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2394 + }, + { + "epoch": 134.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9316330780933575, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6934576434656807, + "eval_f1_macro": 0.8179352236654993, + "eval_loss": 0.26109954714775085, + "eval_pr_auc": 0.6919150376493911, + "eval_precision": 0.6913461538461538, + "eval_precision_macro": 0.817149992562429, + "eval_pred_class_0": 16548, + "eval_pred_class_1": 3120, + "eval_predicted_binding_ratio": 0.1586333129957291, + "eval_recall": 0.6955820702999033, + "eval_recall_macro": 0.8187272336167832, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.557, + "eval_steps_per_second": 3.844, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2412 + }, + { + "epoch": 135.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9318176062735843, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.694733472066592, + "eval_f1_macro": 0.8185505131193367, + "eval_loss": 0.2609698474407196, + "eval_pr_auc": 0.69244594350898, + "eval_precision": 0.6897647806738716, + "eval_precision_macro": 0.8167078351983328, + "eval_pred_class_0": 16522, + "eval_pred_class_1": 3146, + "eval_predicted_binding_ratio": 0.1599552572706935, + "eval_recall": 0.6997742663656885, + "eval_recall_macro": 0.8204309854192178, + "eval_runtime": 0.2491, + "eval_samples_per_second": 654.333, + "eval_steps_per_second": 4.014, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2430 + }, + { + "epoch": 136.0, + "eval_accuracy": 0.9034472239170226, + "eval_auc": 0.9320372006475538, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6941536479304236, + "eval_f1_macro": 0.8184144035830462, + "eval_loss": 0.2604828178882599, + "eval_pr_auc": 0.6935318303087233, + "eval_precision": 0.6933719433719434, + "eval_precision_macro": 0.8181231697536046, + "eval_pred_class_0": 16560, + "eval_pred_class_1": 3108, + "eval_predicted_binding_ratio": 0.15802318486882244, + "eval_recall": 0.6949371170590132, + "eval_recall_macro": 0.8187065617889984, + "eval_runtime": 0.2485, + "eval_samples_per_second": 655.847, + "eval_steps_per_second": 4.024, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2448 + }, + { + "epoch": 137.0, + "eval_accuracy": 0.9033963799064471, + "eval_auc": 0.9321443165310757, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6960972488803583, + "eval_f1_macro": 0.8193338378363828, + "eval_loss": 0.2604370415210724, + "eval_pr_auc": 0.693643099537468, + "eval_precision": 0.6905744208187877, + "eval_precision_macro": 0.8172857573610195, + "eval_pred_class_0": 16517, + "eval_pred_class_1": 3151, + "eval_predicted_binding_ratio": 0.16020947732357127, + "eval_recall": 0.7017091260883586, + "eval_recall_macro": 0.8214285957598189, + "eval_runtime": 0.2215, + "eval_samples_per_second": 736.024, + "eval_steps_per_second": 4.515, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2466 + }, + { + "epoch": 138.0, + "eval_accuracy": 0.9034980679275981, + "eval_auc": 0.9323432098797633, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.695638229634381, + "eval_f1_macro": 0.8191484199531422, + "eval_loss": 0.2601032257080078, + "eval_pr_auc": 0.694473981068256, + "eval_precision": 0.691866028708134, + "eval_precision_macro": 0.817746962215919, + "eval_pred_class_0": 16533, + "eval_pred_class_1": 3135, + "eval_predicted_binding_ratio": 0.1593959731543624, + "eval_recall": 0.6994517897452435, + "eval_recall_macro": 0.8205715519016554, + "eval_runtime": 0.2592, + "eval_samples_per_second": 628.928, + "eval_steps_per_second": 3.858, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2484 + }, + { + "epoch": 138.88888888888889, + "grad_norm": 12954.3583984375, + "learning_rate": 9.068887706579789e-07, + "loss": 0.2385, + "step": 2500 + }, + { + "epoch": 139.0, + "eval_accuracy": 0.904311572096807, + "eval_auc": 0.9325454122780963, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6969404186795491, + "eval_f1_macro": 0.8200635197304043, + "eval_loss": 0.25969693064689636, + "eval_pr_auc": 0.6954050242581626, + "eval_precision": 0.6960437439691219, + "eval_precision_macro": 0.819729100681946, + "eval_pred_class_0": 16559, + "eval_pred_class_1": 3109, + "eval_predicted_binding_ratio": 0.158074028879398, + "eval_recall": 0.6978394066430184, + "eval_recall_macro": 0.820399150415129, + "eval_runtime": 0.2657, + "eval_samples_per_second": 613.439, + "eval_steps_per_second": 3.763, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2502 + }, + { + "epoch": 140.0, + "eval_accuracy": 0.903853976001627, + "eval_auc": 0.9326487714170208, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6971977582065653, + "eval_f1_macro": 0.8200261554019741, + "eval_loss": 0.2596379518508911, + "eval_pr_auc": 0.6956168134976223, + "eval_precision": 0.6924300254452926, + "eval_precision_macro": 0.8182556808417458, + "eval_pred_class_0": 16524, + "eval_pred_class_1": 3144, + "eval_predicted_binding_ratio": 0.15985356924954242, + "eval_recall": 0.7020316027088036, + "eval_recall_macro": 0.8218312779041694, + "eval_runtime": 0.2604, + "eval_samples_per_second": 626.028, + "eval_steps_per_second": 3.841, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2520 + }, + { + "epoch": 141.0, + "eval_accuracy": 0.904311572096807, + "eval_auc": 0.9329799294265357, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6962556488056811, + "eval_f1_macro": 0.819733135205496, + "eval_loss": 0.25904589891433716, + "eval_pr_auc": 0.6970902220334548, + "eval_precision": 0.6969305331179322, + "eval_precision_macro": 0.8199852086334245, + "eval_pred_class_0": 16573, + "eval_pred_class_1": 3095, + "eval_predicted_binding_ratio": 0.15736221273134024, + "eval_recall": 0.6955820702999033, + "eval_recall_macro": 0.8194817455984336, + "eval_runtime": 0.2524, + "eval_samples_per_second": 645.771, + "eval_steps_per_second": 3.962, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2538 + }, + { + "epoch": 142.0, + "eval_accuracy": 0.9047691681919869, + "eval_auc": 0.9330630060376336, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994061948322902, + "eval_f1_macro": 0.8214143192859533, + "eval_loss": 0.2590080201625824, + "eval_pr_auc": 0.697179333543334, + "eval_precision": 0.6961661341853035, + "eval_precision_macro": 0.8202078705755396, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.7026765559496937, + "eval_recall_macro": 0.8226366421928706, + "eval_runtime": 0.2495, + "eval_samples_per_second": 653.345, + "eval_steps_per_second": 4.008, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2556 + }, + { + "epoch": 143.0, + "eval_accuracy": 0.9047691681919869, + "eval_auc": 0.9332962551076398, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991164658634538, + "eval_f1_macro": 0.8212745809731632, + "eval_loss": 0.25865858793258667, + "eval_pr_auc": 0.6981950669404262, + "eval_precision": 0.6965428937259923, + "eval_precision_macro": 0.8203156925109651, + "eval_pred_class_0": 16544, + "eval_pred_class_1": 3124, + "eval_predicted_binding_ratio": 0.15883668903803133, + "eval_recall": 0.7017091260883586, + "eval_recall_macro": 0.8222434687000011, + "eval_runtime": 0.2633, + "eval_samples_per_second": 619.149, + "eval_steps_per_second": 3.798, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2574 + }, + { + "epoch": 144.0, + "eval_accuracy": 0.9050233882448647, + "eval_auc": 0.9334749729859892, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995818591186876, + "eval_f1_macro": 0.8215887434369935, + "eval_loss": 0.2584296464920044, + "eval_pr_auc": 0.6986932637956595, + "eval_precision": 0.697786333012512, + "eval_precision_macro": 0.8209190259709409, + "eval_pred_class_0": 16551, + "eval_pred_class_1": 3117, + "eval_predicted_binding_ratio": 0.15848078096400245, + "eval_recall": 0.7013866494679136, + "eval_recall_macro": 0.8222633132653747, + "eval_runtime": 0.2769, + "eval_samples_per_second": 588.703, + "eval_steps_per_second": 3.612, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2592 + }, + { + "epoch": 145.0, + "eval_accuracy": 0.9050742322554403, + "eval_auc": 0.9336868786857826, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991136180499598, + "eval_f1_macro": 0.8213807805320277, + "eval_loss": 0.25803840160369873, + "eval_pr_auc": 0.699660351667112, + "eval_precision": 0.6987757731958762, + "eval_precision_macro": 0.8212545854629465, + "eval_pred_class_0": 16564, + "eval_pred_class_1": 3104, + "eval_predicted_binding_ratio": 0.15781980882652025, + "eval_recall": 0.6994517897452435, + "eval_recall_macro": 0.8215071467589017, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.163, + "eval_steps_per_second": 3.891, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2610 + }, + { + "epoch": 146.0, + "eval_accuracy": 0.9047183241814114, + "eval_auc": 0.9337996783486955, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995831997435076, + "eval_f1_macro": 0.8214817322060338, + "eval_loss": 0.2579362094402313, + "eval_pr_auc": 0.6999214829412527, + "eval_precision": 0.6955690149824674, + "eval_precision_macro": 0.8199882459220607, + "eval_pred_class_0": 16531, + "eval_pred_class_1": 3137, + "eval_predicted_binding_ratio": 0.15949766117551353, + "eval_recall": 0.7036439858110287, + "eval_recall_macro": 0.8229996352064741, + "eval_runtime": 0.2538, + "eval_samples_per_second": 642.317, + "eval_steps_per_second": 3.941, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2628 + }, + { + "epoch": 147.0, + "eval_accuracy": 0.9051759202765914, + "eval_auc": 0.9339161666287131, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.701647736362182, + "eval_f1_macro": 0.8226388900943448, + "eval_loss": 0.2578524649143219, + "eval_pr_auc": 0.7001081914566338, + "eval_precision": 0.6961904761904761, + "eval_precision_macro": 0.820610070399391, + "eval_pred_class_0": 16518, + "eval_pred_class_1": 3150, + "eval_predicted_binding_ratio": 0.16015863331299574, + "eval_recall": 0.7071912286359239, + "eval_recall_macro": 0.8247128956603896, + "eval_runtime": 0.2529, + "eval_samples_per_second": 644.43, + "eval_steps_per_second": 3.954, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2646 + }, + { + "epoch": 148.0, + "eval_accuracy": 0.9050233882448647, + "eval_auc": 0.9341703697689739, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995818591186876, + "eval_f1_macro": 0.8215887434369935, + "eval_loss": 0.25731131434440613, + "eval_pr_auc": 0.7012902373057504, + "eval_precision": 0.697786333012512, + "eval_precision_macro": 0.8209190259709409, + "eval_pred_class_0": 16551, + "eval_pred_class_1": 3117, + "eval_predicted_binding_ratio": 0.15848078096400245, + "eval_recall": 0.7013866494679136, + "eval_recall_macro": 0.8222633132653747, + "eval_runtime": 0.2512, + "eval_samples_per_second": 648.858, + "eval_steps_per_second": 3.981, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2664 + }, + { + "epoch": 149.0, + "eval_accuracy": 0.9052776082977425, + "eval_auc": 0.9343408636857047, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003377834968635, + "eval_f1_macro": 0.8220430425380088, + "eval_loss": 0.25706911087036133, + "eval_pr_auc": 0.7018656885391451, + "eval_precision": 0.6986521181001284, + "eval_precision_macro": 0.8214140242506442, + "eval_pred_class_0": 16552, + "eval_pred_class_1": 3116, + "eval_predicted_binding_ratio": 0.1584299369534269, + "eval_recall": 0.7020316027088036, + "eval_recall_macro": 0.8226763313236177, + "eval_runtime": 0.2513, + "eval_samples_per_second": 648.672, + "eval_steps_per_second": 3.98, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2682 + }, + { + "epoch": 150.0, + "eval_accuracy": 0.9054301403294692, + "eval_auc": 0.9345392996069414, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7006758931445124, + "eval_f1_macro": 0.822259931959612, + "eval_loss": 0.2567782402038574, + "eval_pr_auc": 0.7026441101697649, + "eval_precision": 0.6993254095727593, + "eval_precision_macro": 0.8217557280421937, + "eval_pred_class_0": 16555, + "eval_pred_class_1": 3113, + "eval_predicted_binding_ratio": 0.15827740492170023, + "eval_recall": 0.7020316027088036, + "eval_recall_macro": 0.8227668727614157, + "eval_runtime": 0.2591, + "eval_samples_per_second": 629.188, + "eval_steps_per_second": 3.86, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2700 + }, + { + "epoch": 151.0, + "eval_accuracy": 0.9051250762660159, + "eval_auc": 0.934685296823797, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003853564547207, + "eval_f1_macro": 0.8220121780461352, + "eval_loss": 0.25664329528808594, + "eval_pr_auc": 0.7029644506395569, + "eval_precision": 0.6974736168851935, + "eval_precision_macro": 0.8209271234175075, + "eval_pred_class_0": 16541, + "eval_pred_class_1": 3127, + "eval_predicted_binding_ratio": 0.158989221069758, + "eval_recall": 0.7033215091905837, + "eval_recall_macro": 0.8231100212096456, + "eval_runtime": 0.2408, + "eval_samples_per_second": 676.856, + "eval_steps_per_second": 4.152, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2718 + }, + { + "epoch": 152.0, + "eval_accuracy": 0.9055826723611958, + "eval_auc": 0.9349076649599691, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7008216529724505, + "eval_f1_macro": 0.8223840221758023, + "eval_loss": 0.25626465678215027, + "eval_pr_auc": 0.7040942732775656, + "eval_precision": 0.7002575660012879, + "eval_precision_macro": 0.82217322207805, + "eval_pred_class_0": 16562, + "eval_pred_class_1": 3106, + "eval_predicted_binding_ratio": 0.15792149684767134, + "eval_recall": 0.7013866494679136, + "eval_recall_macro": 0.8225952985373008, + "eval_runtime": 0.2564, + "eval_samples_per_second": 635.699, + "eval_steps_per_second": 3.9, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2736 + }, + { + "epoch": 153.0, + "eval_accuracy": 0.905226764287167, + "eval_auc": 0.9350594919437002, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7014734144778988, + "eval_f1_macro": 0.8225728005545544, + "eval_loss": 0.2562600076198578, + "eval_pr_auc": 0.7046022469135804, + "eval_precision": 0.6967865097041044, + "eval_precision_macro": 0.8208289583316286, + "eval_pred_class_0": 16525, + "eval_pred_class_1": 3143, + "eval_predicted_binding_ratio": 0.15980272523896685, + "eval_recall": 0.7062237987745889, + "eval_recall_macro": 0.8243499026467862, + "eval_runtime": 0.2358, + "eval_samples_per_second": 691.279, + "eval_steps_per_second": 4.241, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2754 + }, + { + "epoch": 154.0, + "eval_accuracy": 0.9054809843400448, + "eval_auc": 0.9352728672508359, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7010773436243769, + "eval_f1_macro": 0.8224715159707777, + "eval_loss": 0.25581732392311096, + "eval_pr_auc": 0.7056629719926804, + "eval_precision": 0.6991661321359846, + "eval_precision_macro": 0.821758292654095, + "eval_pred_class_0": 16550, + "eval_pred_class_1": 3118, + "eval_predicted_binding_ratio": 0.15853162497457798, + "eval_recall": 0.7029990325701386, + "eval_recall_macro": 0.8231902267335512, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.5, + "eval_steps_per_second": 3.844, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2772 + }, + { + "epoch": 155.0, + "eval_accuracy": 0.9057860484034981, + "eval_auc": 0.9354356530283926, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7025204687750842, + "eval_f1_macro": 0.8232752161134611, + "eval_loss": 0.25560733675956726, + "eval_pr_auc": 0.7062858637533224, + "eval_precision": 0.6994884910485933, + "eval_precision_macro": 0.8221444873622652, + "eval_pred_class_0": 16540, + "eval_pred_class_1": 3128, + "eval_predicted_binding_ratio": 0.15904006508033353, + "eval_recall": 0.7055788455336988, + "eval_recall_macro": 0.8244197722567993, + "eval_runtime": 0.2671, + "eval_samples_per_second": 610.27, + "eval_steps_per_second": 3.744, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2790 + }, + { + "epoch": 156.0, + "eval_accuracy": 0.9058877364246491, + "eval_auc": 0.9356189159837551, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7028415475999358, + "eval_f1_macro": 0.8234659606184656, + "eval_loss": 0.2553412616252899, + "eval_pr_auc": 0.7070636650718337, + "eval_precision": 0.6998081841432225, + "eval_precision_macro": 0.8223345636556499, + "eval_pred_class_0": 16540, + "eval_pred_class_1": 3128, + "eval_predicted_binding_ratio": 0.15904006508033353, + "eval_recall": 0.7059013221541438, + "eval_recall_macro": 0.8246111910462879, + "eval_runtime": 0.2539, + "eval_samples_per_second": 642.017, + "eval_steps_per_second": 3.939, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2808 + }, + { + "epoch": 157.0, + "eval_accuracy": 0.9061419564775269, + "eval_auc": 0.9357918624902231, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7031199742682535, + "eval_f1_macro": 0.8236899466727462, + "eval_loss": 0.25509998202323914, + "eval_pr_auc": 0.7077594222055618, + "eval_precision": 0.7013153673403913, + "eval_precision_macro": 0.8230158493399438, + "eval_pred_class_0": 16551, + "eval_pred_class_1": 3117, + "eval_predicted_binding_ratio": 0.15848078096400245, + "eval_recall": 0.7049338922928088, + "eval_recall_macro": 0.8243689199497484, + "eval_runtime": 0.255, + "eval_samples_per_second": 639.309, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2826 + }, + { + "epoch": 158.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9359727409833409, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7034593724859212, + "eval_f1_macro": 0.8239074586532139, + "eval_loss": 0.2548539340496063, + "eval_pr_auc": 0.7084539339673516, + "eval_precision": 0.7019910083493899, + "eval_precision_macro": 0.8233586792381238, + "eval_pred_class_0": 16554, + "eval_pred_class_1": 3114, + "eval_predicted_binding_ratio": 0.1583282489322758, + "eval_recall": 0.7049338922928088, + "eval_recall_macro": 0.8244594613875464, + "eval_runtime": 0.2566, + "eval_samples_per_second": 635.242, + "eval_steps_per_second": 3.897, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2844 + }, + { + "epoch": 159.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9360812778117108, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7043380822794941, + "eval_f1_macro": 0.8242594639388645, + "eval_loss": 0.2548294961452484, + "eval_pr_auc": 0.708809734771939, + "eval_precision": 0.6993006993006993, + "eval_precision_macro": 0.822383674913635, + "eval_pred_class_0": 16522, + "eval_pred_class_1": 3146, + "eval_predicted_binding_ratio": 0.1599552572706935, + "eval_recall": 0.709448564979039, + "eval_recall_macro": 0.8261735491038733, + "eval_runtime": 0.27, + "eval_samples_per_second": 603.758, + "eval_steps_per_second": 3.704, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2862 + }, + { + "epoch": 160.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9362697476540152, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7040308334671591, + "eval_f1_macro": 0.82418310527748, + "eval_loss": 0.25451889634132385, + "eval_pr_auc": 0.70979034812957, + "eval_precision": 0.7012156110044786, + "eval_precision_macro": 0.8231322886360805, + "eval_pred_class_0": 16542, + "eval_pred_class_1": 3126, + "eval_predicted_binding_ratio": 0.15893837705918243, + "eval_recall": 0.7068687520154788, + "eval_recall_macro": 0.8252458083732854, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.302, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2880 + }, + { + "epoch": 161.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9364340127714132, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7047863796980405, + "eval_f1_macro": 0.824637224883753, + "eval_loss": 0.25425252318382263, + "eval_pr_auc": 0.7104452483887299, + "eval_precision": 0.70208, + "eval_precision_macro": 0.8236265925164723, + "eval_pred_class_0": 16543, + "eval_pred_class_1": 3125, + "eval_predicted_binding_ratio": 0.15888753304860687, + "eval_recall": 0.7075137052563689, + "eval_recall_macro": 0.8256588264315284, + "eval_runtime": 0.2609, + "eval_samples_per_second": 624.767, + "eval_steps_per_second": 3.833, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2898 + }, + { + "epoch": 162.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9366581522223957, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7045965927354548, + "eval_f1_macro": 0.8245456841795291, + "eval_loss": 0.25392982363700867, + "eval_pr_auc": 0.7114160919166963, + "eval_precision": 0.7023389939122077, + "eval_precision_macro": 0.8237022823552698, + "eval_pred_class_0": 16547, + "eval_pred_class_1": 3121, + "eval_predicted_binding_ratio": 0.15868415700630464, + "eval_recall": 0.7068687520154788, + "eval_recall_macro": 0.8253967107696154, + "eval_runtime": 0.2487, + "eval_samples_per_second": 655.473, + "eval_steps_per_second": 4.021, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2916 + }, + { + "epoch": 163.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.936698911928028, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7069544364508393, + "eval_f1_macro": 0.8257724934589374, + "eval_loss": 0.25399070978164673, + "eval_pr_auc": 0.7113652786898896, + "eval_precision": 0.7010145846544071, + "eval_precision_macro": 0.8235604593370134, + "eval_pred_class_0": 16514, + "eval_pred_class_1": 3154, + "eval_predicted_binding_ratio": 0.16036200935529796, + "eval_recall": 0.7129958078039342, + "eval_recall_macro": 0.8280377119541189, + "eval_runtime": 0.2519, + "eval_samples_per_second": 647.019, + "eval_steps_per_second": 3.969, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2934 + }, + { + "epoch": 164.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9369248810888143, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7062780269058296, + "eval_f1_macro": 0.8254283885284618, + "eval_loss": 0.25362086296081543, + "eval_pr_auc": 0.7123203296069245, + "eval_precision": 0.7015590200445434, + "eval_precision_macro": 0.8236690712930735, + "eval_pred_class_0": 16525, + "eval_pred_class_1": 3143, + "eval_predicted_binding_ratio": 0.15980272523896685, + "eval_recall": 0.7110609480812641, + "eval_recall_macro": 0.827221184489114, + "eval_runtime": 0.261, + "eval_samples_per_second": 624.444, + "eval_steps_per_second": 3.831, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2952 + }, + { + "epoch": 165.0, + "eval_accuracy": 0.9072096806996136, + "eval_auc": 0.9371562809840187, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7070155723230053, + "eval_f1_macro": 0.8259456391835222, + "eval_loss": 0.2532632350921631, + "eval_pr_auc": 0.7133697274093473, + "eval_precision": 0.7039641943734015, + "eval_precision_macro": 0.8248055554696512, + "eval_pred_class_0": 16540, + "eval_pred_class_1": 3128, + "eval_predicted_binding_ratio": 0.15904006508033353, + "eval_recall": 0.710093518219929, + "eval_recall_macro": 0.8270996353096386, + "eval_runtime": 0.2545, + "eval_samples_per_second": 640.417, + "eval_steps_per_second": 3.929, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2970 + }, + { + "epoch": 166.0, + "eval_accuracy": 0.9071588366890381, + "eval_auc": 0.9372476886142239, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7072779737095223, + "eval_f1_macro": 0.8260542385315996, + "eval_loss": 0.2531469762325287, + "eval_pr_auc": 0.7136781835058345, + "eval_precision": 0.7032196365954734, + "eval_precision_macro": 0.8245394656269969, + "eval_pred_class_0": 16531, + "eval_pred_class_1": 3137, + "eval_predicted_binding_ratio": 0.15949766117551353, + "eval_recall": 0.7113834247017091, + "eval_recall_macro": 0.8275936861541986, + "eval_runtime": 0.2653, + "eval_samples_per_second": 614.378, + "eval_steps_per_second": 3.769, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2988 + }, + { + "epoch": 166.66666666666666, + "grad_norm": 14056.4111328125, + "learning_rate": 8.432618494003656e-07, + "loss": 0.2279, + "step": 3000 + }, + { + "epoch": 167.0, + "eval_accuracy": 0.9072605247101891, + "eval_auc": 0.9373718655684177, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7077859660365268, + "eval_f1_macro": 0.8263351175441593, + "eval_loss": 0.25298377871513367, + "eval_pr_auc": 0.7140728736694715, + "eval_precision": 0.7032792104425343, + "eval_precision_macro": 0.8246534613355044, + "eval_pred_class_0": 16527, + "eval_pred_class_1": 3141, + "eval_predicted_binding_ratio": 0.15970103721781573, + "eval_recall": 0.7123508545630441, + "eval_recall_macro": 0.8280472206056, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.943, + "eval_steps_per_second": 3.773, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3006 + }, + { + "epoch": 168.0, + "eval_accuracy": 0.9075655887736425, + "eval_auc": 0.9374749229998747, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7094918504314478, + "eval_f1_macro": 0.8272654245808608, + "eval_loss": 0.25285741686820984, + "eval_pr_auc": 0.7143683695359583, + "eval_precision": 0.7031992397846056, + "eval_precision_macro": 0.8249204363177162, + "eval_pred_class_0": 16511, + "eval_pred_class_1": 3157, + "eval_predicted_binding_ratio": 0.16051454138702462, + "eval_recall": 0.7158980973879394, + "eval_recall_macro": 0.8296699396217176, + "eval_runtime": 0.2383, + "eval_samples_per_second": 683.908, + "eval_steps_per_second": 4.196, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3024 + }, + { + "epoch": 169.0, + "eval_accuracy": 0.9076672767947935, + "eval_auc": 0.9377269168628721, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7087876844130853, + "eval_f1_macro": 0.8269618180373584, + "eval_loss": 0.25237372517585754, + "eval_pr_auc": 0.7154735567799367, + "eval_precision": 0.7049441786283892, + "eval_precision_macro": 0.8255259815297635, + "eval_pred_class_0": 16533, + "eval_pred_class_1": 3135, + "eval_predicted_binding_ratio": 0.1593959731543624, + "eval_recall": 0.7126733311834892, + "eval_recall_macro": 0.8284197222706846, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.364, + "eval_steps_per_second": 3.843, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3042 + }, + { + "epoch": 170.0, + "eval_accuracy": 0.9079214968476713, + "eval_auc": 0.9378333125414714, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7099151049175076, + "eval_f1_macro": 0.8275952704051472, + "eval_loss": 0.2522483766078949, + "eval_pr_auc": 0.7157732693940176, + "eval_precision": 0.7052832590706556, + "eval_precision_macro": 0.8258656401852128, + "eval_pred_class_0": 16526, + "eval_pred_class_1": 3142, + "eval_predicted_binding_ratio": 0.1597518812283913, + "eval_recall": 0.7146081909061593, + "eval_recall_macro": 0.8293569716527538, + "eval_runtime": 0.2212, + "eval_samples_per_second": 736.855, + "eval_steps_per_second": 4.521, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3060 + }, + { + "epoch": 171.0, + "eval_accuracy": 0.9079214968476713, + "eval_auc": 0.9379969158489405, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7098221438871976, + "eval_f1_macro": 0.8275504434498686, + "eval_loss": 0.25200438499450684, + "eval_pr_auc": 0.7163721445757975, + "eval_precision": 0.7054140127388535, + "eval_precision_macro": 0.8259040054013725, + "eval_pred_class_0": 16528, + "eval_pred_class_1": 3140, + "eval_predicted_binding_ratio": 0.1596501932072402, + "eval_recall": 0.7142857142857143, + "eval_recall_macro": 0.8292259138217972, + "eval_runtime": 0.2637, + "eval_samples_per_second": 618.089, + "eval_steps_per_second": 3.792, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3078 + }, + { + "epoch": 172.0, + "eval_accuracy": 0.9083282489322758, + "eval_auc": 0.9382153715205318, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7102683593122289, + "eval_f1_macro": 0.8279092226905722, + "eval_loss": 0.25164341926574707, + "eval_pr_auc": 0.7173810220120935, + "eval_precision": 0.7078795643818065, + "eval_precision_macro": 0.8270148456503497, + "eval_pred_class_0": 16546, + "eval_pred_class_1": 3122, + "eval_predicted_binding_ratio": 0.1587350010168802, + "eval_recall": 0.7126733311834892, + "eval_recall_macro": 0.8288120685011429, + "eval_runtime": 0.2387, + "eval_samples_per_second": 682.942, + "eval_steps_per_second": 4.19, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3096 + }, + { + "epoch": 173.0, + "eval_accuracy": 0.9084807809640024, + "eval_auc": 0.9383315970230777, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.711168164313222, + "eval_f1_macro": 0.8283970352740591, + "eval_loss": 0.2515103816986084, + "eval_pr_auc": 0.7177908159595219, + "eval_precision": 0.7077610986905142, + "eval_precision_macro": 0.8271223707155178, + "eval_pred_class_0": 16537, + "eval_pred_class_1": 3131, + "eval_predicted_binding_ratio": 0.15919259711206019, + "eval_recall": 0.7146081909061593, + "eval_recall_macro": 0.8296889569246799, + "eval_runtime": 0.2615, + "eval_samples_per_second": 623.31, + "eval_steps_per_second": 3.824, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3114 + }, + { + "epoch": 174.0, + "eval_accuracy": 0.9088366890380313, + "eval_auc": 0.9384487568455233, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7133493205435651, + "eval_f1_macro": 0.8295745121505045, + "eval_loss": 0.25142860412597656, + "eval_pr_auc": 0.7181535155231535, + "eval_precision": 0.7073557387444515, + "eval_precision_macro": 0.8273365831908039, + "eval_pred_class_0": 16514, + "eval_pred_class_1": 3154, + "eval_predicted_binding_ratio": 0.16036200935529796, + "eval_recall": 0.7194453402128346, + "eval_recall_macro": 0.8318660877438894, + "eval_runtime": 0.27, + "eval_samples_per_second": 603.694, + "eval_steps_per_second": 3.704, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3132 + }, + { + "epoch": 175.0, + "eval_accuracy": 0.9090400650803335, + "eval_auc": 0.9386484385266265, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7135308246597278, + "eval_f1_macro": 0.8297338931856857, + "eval_loss": 0.2510823905467987, + "eval_pr_auc": 0.7190368898285651, + "eval_precision": 0.7086513994910941, + "eval_precision_macro": 0.8279095777411898, + "eval_pred_class_0": 16524, + "eval_pred_class_1": 3144, + "eval_predicted_binding_ratio": 0.15985356924954242, + "eval_recall": 0.7184779103514995, + "eval_recall_macro": 0.8315936361680839, + "eval_runtime": 0.2642, + "eval_samples_per_second": 616.851, + "eval_steps_per_second": 3.784, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3150 + }, + { + "epoch": 176.0, + "eval_accuracy": 0.9090909090909091, + "eval_auc": 0.9388042461024309, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7136450992953235, + "eval_f1_macro": 0.8298069567551197, + "eval_loss": 0.25084683299064636, + "eval_pr_auc": 0.7196986972872739, + "eval_precision": 0.7088768692332167, + "eval_precision_macro": 0.8280239111672891, + "eval_pred_class_0": 16525, + "eval_pred_class_1": 3143, + "eval_predicted_binding_ratio": 0.15980272523896685, + "eval_recall": 0.7184779103514995, + "eval_recall_macro": 0.83162381664735, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.711, + "eval_steps_per_second": 3.869, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3168 + }, + { + "epoch": 177.0, + "eval_accuracy": 0.9093451291437868, + "eval_auc": 0.9389100189010969, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7144001281435207, + "eval_f1_macro": 0.8302608322100373, + "eval_loss": 0.25058600306510925, + "eval_pr_auc": 0.7201232901620662, + "eval_precision": 0.7097390197326544, + "eval_precision_macro": 0.8285170954889824, + "eval_pred_class_0": 16526, + "eval_pred_class_1": 3142, + "eval_predicted_binding_ratio": 0.1597518812283913, + "eval_recall": 0.7191228635923895, + "eval_recall_macro": 0.832036834705593, + "eval_runtime": 0.2429, + "eval_samples_per_second": 671.088, + "eval_steps_per_second": 4.117, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3186 + }, + { + "epoch": 178.0, + "eval_accuracy": 0.9091417531014846, + "eval_auc": 0.9390935349014322, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.713392141138733, + "eval_f1_macro": 0.8297029283682245, + "eval_loss": 0.2502758800983429, + "eval_pr_auc": 0.7210120001490467, + "eval_precision": 0.7096362476068921, + "eval_precision_macro": 0.8282970157836081, + "eval_pred_class_0": 16534, + "eval_pred_class_1": 3134, + "eval_predicted_binding_ratio": 0.15934512914378687, + "eval_recall": 0.7171880038697195, + "eval_recall_macro": 0.83112976580279, + "eval_runtime": 0.27, + "eval_samples_per_second": 603.763, + "eval_steps_per_second": 3.704, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3204 + }, + { + "epoch": 179.0, + "eval_accuracy": 0.9097518812283913, + "eval_auc": 0.939160767004228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7162270183852918, + "eval_f1_macro": 0.8312854205617097, + "eval_loss": 0.2502012550830841, + "eval_pr_auc": 0.7211443774602971, + "eval_precision": 0.7102092580849715, + "eval_precision_macro": 0.8290358389250096, + "eval_pred_class_0": 16514, + "eval_pred_class_1": 3154, + "eval_predicted_binding_ratio": 0.16036200935529796, + "eval_recall": 0.7223476297968398, + "eval_recall_macro": 0.8335888568492861, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.383, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3222 + }, + { + "epoch": 180.0, + "eval_accuracy": 0.9099552572706935, + "eval_auc": 0.9392979952395233, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7171378374061651, + "eval_f1_macro": 0.8317964319305957, + "eval_loss": 0.2500424385070801, + "eval_pr_auc": 0.7216701546304439, + "eval_precision": 0.7104430379746836, + "eval_precision_macro": 0.8292946956289701, + "eval_pred_class_0": 16508, + "eval_pred_class_1": 3160, + "eval_predicted_binding_ratio": 0.16066707341875128, + "eval_recall": 0.7239600128990649, + "eval_recall_macro": 0.8343648679211326, + "eval_runtime": 0.2706, + "eval_samples_per_second": 602.263, + "eval_steps_per_second": 3.695, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3240 + }, + { + "epoch": 181.0, + "eval_accuracy": 0.910006101281269, + "eval_auc": 0.9394494621207929, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7173427020121367, + "eval_f1_macro": 0.8319131723763289, + "eval_loss": 0.24980410933494568, + "eval_pr_auc": 0.722382187322872, + "eval_precision": 0.7105346409364125, + "eval_precision_macro": 0.8293692166334694, + "eval_pred_class_0": 16507, + "eval_pred_class_1": 3161, + "eval_predicted_binding_ratio": 0.1607179174293268, + "eval_recall": 0.7242824895195098, + "eval_recall_macro": 0.8345261062313551, + "eval_runtime": 0.2696, + "eval_samples_per_second": 604.516, + "eval_steps_per_second": 3.709, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3258 + }, + { + "epoch": 182.0, + "eval_accuracy": 0.9102094773235713, + "eval_auc": 0.9395796926893379, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7179814755669115, + "eval_f1_macro": 0.8322930296138966, + "eval_loss": 0.2495713084936142, + "eval_pr_auc": 0.7229232947500771, + "eval_precision": 0.7111673521037646, + "eval_precision_macro": 0.8297461525769929, + "eval_pred_class_0": 16507, + "eval_pred_class_1": 3161, + "eval_predicted_binding_ratio": 0.1607179174293268, + "eval_recall": 0.7249274427603999, + "eval_recall_macro": 0.8349089438103321, + "eval_runtime": 0.2615, + "eval_samples_per_second": 623.403, + "eval_steps_per_second": 3.825, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3276 + }, + { + "epoch": 183.0, + "eval_accuracy": 0.910362009355298, + "eval_auc": 0.9396445500623882, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7187749242303397, + "eval_f1_macro": 0.8327294647159501, + "eval_loss": 0.2494269460439682, + "eval_pr_auc": 0.7232071190390562, + "eval_precision": 0.7111742424242424, + "eval_precision_macro": 0.8298901515151516, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.726539825862625, + "eval_recall_macro": 0.8356547744029127, + "eval_runtime": 0.2535, + "eval_samples_per_second": 643.015, + "eval_steps_per_second": 3.945, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3294 + }, + { + "epoch": 184.0, + "eval_accuracy": 0.9106670734187513, + "eval_auc": 0.9397990826808293, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7191945021575835, + "eval_f1_macro": 0.8330396163256251, + "eval_loss": 0.24917152523994446, + "eval_pr_auc": 0.7237798760580164, + "eval_precision": 0.7129277566539924, + "eval_precision_macro": 0.830694740730097, + "eval_pred_class_0": 16512, + "eval_pred_class_1": 3156, + "eval_predicted_binding_ratio": 0.16046369737644905, + "eval_recall": 0.7255723960012899, + "eval_recall_macro": 0.8354426837856392, + "eval_runtime": 0.2605, + "eval_samples_per_second": 625.837, + "eval_steps_per_second": 3.839, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3312 + }, + { + "epoch": 185.0, + "eval_accuracy": 0.9110738255033557, + "eval_auc": 0.9399604475135382, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7199359487590072, + "eval_f1_macro": 0.8335408491792982, + "eval_loss": 0.24888525903224945, + "eval_pr_auc": 0.7244599264333298, + "eval_precision": 0.7150127226463104, + "eval_precision_macro": 0.8316954196625403, + "eval_pred_class_0": 16524, + "eval_pred_class_1": 3144, + "eval_predicted_binding_ratio": 0.15985356924954242, + "eval_recall": 0.7249274427603999, + "eval_recall_macro": 0.8354220119578544, + "eval_runtime": 0.2549, + "eval_samples_per_second": 639.342, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3330 + }, + { + "epoch": 186.0, + "eval_accuracy": 0.9109721374822046, + "eval_auc": 0.9401063571379032, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7194359878224643, + "eval_f1_macro": 0.8332638467590944, + "eval_loss": 0.2486649453639984, + "eval_pr_auc": 0.7249907062273525, + "eval_precision": 0.714968152866242, + "eval_precision_macro": 0.8315886262879129, + "eval_pred_class_0": 16528, + "eval_pred_class_1": 3140, + "eval_predicted_binding_ratio": 0.1596501932072402, + "eval_recall": 0.7239600128990649, + "eval_recall_macro": 0.8349684775064528, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.8, + "eval_steps_per_second": 3.944, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3348 + }, + { + "epoch": 187.0, + "eval_accuracy": 0.9112263575350824, + "eval_auc": 0.9402670406956853, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7203715566944267, + "eval_f1_macro": 0.8338047799185901, + "eval_loss": 0.24845102429389954, + "eval_pr_auc": 0.7255585294747666, + "eval_precision": 0.7155583837098314, + "eval_precision_macro": 0.832000069313312, + "eval_pred_class_0": 16525, + "eval_pred_class_1": 3143, + "eval_predicted_binding_ratio": 0.15980272523896685, + "eval_recall": 0.7252499193808449, + "eval_recall_macro": 0.8356436112266088, + "eval_runtime": 0.2397, + "eval_samples_per_second": 680.081, + "eval_steps_per_second": 4.172, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3366 + }, + { + "epoch": 188.0, + "eval_accuracy": 0.9114805775879601, + "eval_auc": 0.9404174175370716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7215736446505677, + "eval_f1_macro": 0.8344742146415792, + "eval_loss": 0.24825866520404816, + "eval_pr_auc": 0.7262464197023197, + "eval_precision": 0.7157360406091371, + "eval_precision_macro": 0.8322867657635175, + "eval_pred_class_0": 16516, + "eval_pred_class_1": 3152, + "eval_predicted_binding_ratio": 0.16026032133414683, + "eval_recall": 0.72750725572396, + "eval_recall_macro": 0.8367119184396343, + "eval_runtime": 0.2593, + "eval_samples_per_second": 628.496, + "eval_steps_per_second": 3.856, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3384 + }, + { + "epoch": 189.0, + "eval_accuracy": 0.9114297335773845, + "eval_auc": 0.9404972726910148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7223461906279885, + "eval_f1_macro": 0.8348286515416876, + "eval_loss": 0.24819281697273254, + "eval_pr_auc": 0.7264794032375063, + "eval_precision": 0.7141506460762685, + "eval_precision_macro": 0.8317646228259488, + "eval_pred_class_0": 16495, + "eval_pred_class_1": 3173, + "eval_predicted_binding_ratio": 0.16132804555623348, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.8379923162699333, + "eval_runtime": 0.2478, + "eval_samples_per_second": 657.696, + "eval_steps_per_second": 4.035, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3402 + }, + { + "epoch": 190.0, + "eval_accuracy": 0.9116839536302623, + "eval_auc": 0.940656301723974, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.722922316158877, + "eval_f1_macro": 0.8351963018783921, + "eval_loss": 0.24791164696216583, + "eval_pr_auc": 0.7271914277283732, + "eval_precision": 0.7152777777777778, + "eval_precision_macro": 0.8323358585858586, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.8381432186662634, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.211, + "eval_steps_per_second": 3.866, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3420 + }, + { + "epoch": 191.0, + "eval_accuracy": 0.9118873296725646, + "eval_auc": 0.9407912914845091, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7233838786911413, + "eval_f1_macro": 0.8354907358742514, + "eval_loss": 0.24768619239330292, + "eval_pr_auc": 0.727892471269696, + "eval_precision": 0.7161820480404552, + "eval_precision_macro": 0.8327941262984632, + "eval_pred_class_0": 16504, + "eval_pred_class_1": 3164, + "eval_predicted_binding_ratio": 0.1608704494610535, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.8382639405833274, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.424, + "eval_steps_per_second": 3.935, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3438 + }, + { + "epoch": 192.0, + "eval_accuracy": 0.9119381736831401, + "eval_auc": 0.9409245683252279, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7240280433397068, + "eval_f1_macro": 0.8358192243316804, + "eval_loss": 0.2475385069847107, + "eval_pr_auc": 0.7283869598485145, + "eval_precision": 0.7155905511811024, + "eval_precision_macro": 0.832663401462133, + "eval_pred_class_0": 16493, + "eval_pred_class_1": 3175, + "eval_predicted_binding_ratio": 0.16142973357738458, + "eval_recall": 0.7326668816510803, + "eval_recall_macro": 0.8390804680483324, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.682, + "eval_steps_per_second": 3.943, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3456 + }, + { + "epoch": 193.0, + "eval_accuracy": 0.9120398617042912, + "eval_auc": 0.9410766288889338, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7232885476647473, + "eval_f1_macro": 0.8354987049773379, + "eval_loss": 0.24721089005470276, + "eval_pr_auc": 0.7290279509427341, + "eval_precision": 0.7175499841320215, + "eval_precision_macro": 0.8333466455139735, + "eval_pred_class_0": 16517, + "eval_pred_class_1": 3151, + "eval_predicted_binding_ratio": 0.16020947732357127, + "eval_recall": 0.7291196388261851, + "eval_recall_macro": 0.837699192866343, + "eval_runtime": 0.2594, + "eval_samples_per_second": 628.255, + "eval_steps_per_second": 3.854, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3474 + }, + { + "epoch": 194.0, + "eval_accuracy": 0.9122432377465934, + "eval_auc": 0.9411616325348253, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7239283429302623, + "eval_f1_macro": 0.8358790547924193, + "eval_loss": 0.2470986247062683, + "eval_pr_auc": 0.7293209780794321, + "eval_precision": 0.7181847032688036, + "eval_precision_macro": 0.8337245487646312, + "eval_pred_class_0": 16517, + "eval_pred_class_1": 3151, + "eval_predicted_binding_ratio": 0.16020947732357127, + "eval_recall": 0.7297645920670751, + "eval_recall_macro": 0.83808203044532, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.922, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3492 + }, + { + "epoch": 194.44444444444446, + "grad_norm": 15854.8017578125, + "learning_rate": 7.667662546617938e-07, + "loss": 0.2185, + "step": 3500 + }, + { + "epoch": 195.0, + "eval_accuracy": 0.9121923937360179, + "eval_auc": 0.9412192294636534, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7248685677871595, + "eval_f1_macro": 0.8363143165624445, + "eval_loss": 0.2470363825559616, + "eval_pr_auc": 0.7294473908430833, + "eval_precision": 0.7163098236775819, + "eval_precision_macro": 0.8331124670170592, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7336343115124153, + "eval_recall_macro": 0.839624543937532, + "eval_runtime": 0.2532, + "eval_samples_per_second": 643.796, + "eval_steps_per_second": 3.95, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3510 + }, + { + "epoch": 196.0, + "eval_accuracy": 0.9123449257677445, + "eval_auc": 0.9413068608842632, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7252151737328658, + "eval_f1_macro": 0.8365353589310388, + "eval_loss": 0.2468923032283783, + "eval_pr_auc": 0.7297206006779651, + "eval_precision": 0.7169870784746297, + "eval_precision_macro": 0.8334556489675362, + "eval_pred_class_0": 16495, + "eval_pred_class_1": 3173, + "eval_predicted_binding_ratio": 0.16132804555623348, + "eval_recall": 0.7336343115124153, + "eval_recall_macro": 0.83971508537533, + "eval_runtime": 0.2673, + "eval_samples_per_second": 609.86, + "eval_steps_per_second": 3.741, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3528 + }, + { + "epoch": 197.0, + "eval_accuracy": 0.9124466137888957, + "eval_auc": 0.9415050729580239, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7246562200191877, + "eval_f1_macro": 0.8363018721763311, + "eval_loss": 0.2465437948703766, + "eval_pr_auc": 0.7306168212570879, + "eval_precision": 0.7186806216301934, + "eval_precision_macro": 0.8340602623742853, + "eval_pred_class_0": 16515, + "eval_pred_class_1": 3153, + "eval_predicted_binding_ratio": 0.1603111653447224, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.8385959258552536, + "eval_runtime": 0.2512, + "eval_samples_per_second": 648.769, + "eval_steps_per_second": 3.98, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3546 + }, + { + "epoch": 198.0, + "eval_accuracy": 0.9127008338417735, + "eval_auc": 0.9416286951597772, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7254996003197443, + "eval_f1_macro": 0.836798347664482, + "eval_loss": 0.24635502696037292, + "eval_pr_auc": 0.7311574695224861, + "eval_precision": 0.7194039315155358, + "eval_precision_macro": 0.8345112185130059, + "eval_pred_class_0": 16514, + "eval_pred_class_1": 3154, + "eval_predicted_binding_ratio": 0.16036200935529796, + "eval_recall": 0.7316994517897453, + "eval_recall_macro": 0.8391400017444531, + "eval_runtime": 0.2567, + "eval_samples_per_second": 635.086, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3564 + }, + { + "epoch": 199.0, + "eval_accuracy": 0.9126499898311979, + "eval_auc": 0.941704083096699, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7255591054313099, + "eval_f1_macro": 0.8368090605158727, + "eval_loss": 0.24623039364814758, + "eval_pr_auc": 0.7315041353273113, + "eval_precision": 0.7188983855650523, + "eval_precision_macro": 0.8343113891602595, + "eval_pred_class_0": 16509, + "eval_pred_class_1": 3159, + "eval_predicted_binding_ratio": 0.16061622940817571, + "eval_recall": 0.7323444050306352, + "eval_recall_macro": 0.8393719369271001, + "eval_runtime": 0.2608, + "eval_samples_per_second": 625.096, + "eval_steps_per_second": 3.835, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3582 + }, + { + "epoch": 200.0, + "eval_accuracy": 0.912751677852349, + "eval_auc": 0.9417882886776758, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7266645428480408, + "eval_f1_macro": 0.8373778882187448, + "eval_loss": 0.24614199995994568, + "eval_pr_auc": 0.7317905290059212, + "eval_precision": 0.7179729304375196, + "eval_precision_macro": 0.8341244192542944, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7355691712350855, + "eval_recall_macro": 0.8407428761951972, + "eval_runtime": 0.2558, + "eval_samples_per_second": 637.19, + "eval_steps_per_second": 3.909, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3600 + }, + { + "epoch": 201.0, + "eval_accuracy": 0.9129042098840756, + "eval_auc": 0.9419125142943645, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7274463007159905, + "eval_f1_macro": 0.837808654578745, + "eval_loss": 0.2459731251001358, + "eval_pr_auc": 0.7322365639924645, + "eval_precision": 0.717964824120603, + "eval_precision_macro": 0.8342614705412528, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7371815543373106, + "eval_recall_macro": 0.8414887067877777, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.607, + "eval_steps_per_second": 3.924, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3618 + }, + { + "epoch": 202.0, + "eval_accuracy": 0.9131075859263779, + "eval_auc": 0.9421228725268236, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7264286857691692, + "eval_f1_macro": 0.8373900508237788, + "eval_loss": 0.2455640733242035, + "eval_pr_auc": 0.7332689412482398, + "eval_precision": 0.721233312142403, + "eval_precision_macro": 0.8354381062588301, + "eval_pred_class_0": 16522, + "eval_pred_class_1": 3146, + "eval_predicted_binding_ratio": 0.1599552572706935, + "eval_recall": 0.7316994517897453, + "eval_recall_macro": 0.8393814455785812, + "eval_runtime": 0.2535, + "eval_samples_per_second": 643.062, + "eval_steps_per_second": 3.945, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3636 + }, + { + "epoch": 203.0, + "eval_accuracy": 0.9132092739475289, + "eval_auc": 0.9422253168108461, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7266613290632506, + "eval_f1_macro": 0.8375381529725912, + "eval_loss": 0.24535632133483887, + "eval_pr_auc": 0.7337329229212076, + "eval_precision": 0.7216921119592875, + "eval_precision_macro": 0.8356705536799585, + "eval_pred_class_0": 16524, + "eval_pred_class_1": 3144, + "eval_predicted_binding_ratio": 0.15985356924954242, + "eval_recall": 0.7316994517897453, + "eval_recall_macro": 0.8394418065371132, + "eval_runtime": 0.2645, + "eval_samples_per_second": 616.363, + "eval_steps_per_second": 3.781, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3654 + }, + { + "epoch": 204.0, + "eval_accuracy": 0.9133109619686801, + "eval_auc": 0.9423872461284946, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7268066015061689, + "eval_f1_macro": 0.8376441226295008, + "eval_loss": 0.2451435625553131, + "eval_pr_auc": 0.7344416461934514, + "eval_precision": 0.7222929936305732, + "eval_precision_macro": 0.8359468356342605, + "eval_pred_class_0": 16528, + "eval_pred_class_1": 3140, + "eval_predicted_binding_ratio": 0.1596501932072402, + "eval_recall": 0.7313769751693002, + "eval_recall_macro": 0.8393711096646888, + "eval_runtime": 0.24, + "eval_samples_per_second": 679.026, + "eval_steps_per_second": 4.166, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3672 + }, + { + "epoch": 205.0, + "eval_accuracy": 0.9138194020744357, + "eval_auc": 0.9424586242758461, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7293629251157592, + "eval_f1_macro": 0.8390563302747484, + "eval_loss": 0.24504327774047852, + "eval_pr_auc": 0.7346240551024029, + "eval_precision": 0.7223276407337128, + "eval_precision_macro": 0.8364152440915626, + "eval_pred_class_0": 16506, + "eval_pred_class_1": 3162, + "eval_predicted_binding_ratio": 0.16076876143990237, + "eval_recall": 0.7365366010964205, + "eval_recall_macro": 0.8417698397526527, + "eval_runtime": 0.2606, + "eval_samples_per_second": 625.36, + "eval_steps_per_second": 3.837, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3690 + }, + { + "epoch": 206.0, + "eval_accuracy": 0.91376855806386, + "eval_auc": 0.9426215171108914, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.729073482428115, + "eval_f1_macro": 0.8388988164347613, + "eval_loss": 0.2448122650384903, + "eval_pr_auc": 0.7352376772544322, + "eval_precision": 0.7223805001582779, + "eval_precision_macro": 0.8363855980711433, + "eval_pred_class_0": 16509, + "eval_pred_class_1": 3159, + "eval_predicted_binding_ratio": 0.16061622940817571, + "eval_recall": 0.7358916478555305, + "eval_recall_macro": 0.8414775436114739, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.41, + "eval_steps_per_second": 3.88, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3708 + }, + { + "epoch": 207.0, + "eval_accuracy": 0.9139210900955868, + "eval_auc": 0.9426929439207377, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7299409794225554, + "eval_f1_macro": 0.8393709493840633, + "eval_loss": 0.24476298689842224, + "eval_pr_auc": 0.7354485111055644, + "eval_precision": 0.7222222222222222, + "eval_precision_macro": 0.8364747474747475, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7378265075782006, + "eval_recall_macro": 0.8423544320350108, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.323, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3726 + }, + { + "epoch": 208.0, + "eval_accuracy": 0.9138702460850112, + "eval_auc": 0.9428956913390121, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.72896, + "eval_f1_macro": 0.8388800483588226, + "eval_loss": 0.24439764022827148, + "eval_pr_auc": 0.736435270049487, + "eval_precision": 0.723404255319149, + "eval_precision_macro": 0.8367914187788916, + "eval_pred_class_0": 16519, + "eval_pred_class_1": 3149, + "eval_predicted_binding_ratio": 0.16010778930242017, + "eval_recall": 0.7346017413737504, + "eval_recall_macro": 0.8410136732461799, + "eval_runtime": 0.2835, + "eval_samples_per_second": 574.921, + "eval_steps_per_second": 3.527, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3744 + }, + { + "epoch": 209.0, + "eval_accuracy": 0.9140736221273134, + "eval_auc": 0.9429205676063466, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.730805989168525, + "eval_f1_macro": 0.83984185960937, + "eval_loss": 0.24438706040382385, + "eval_pr_auc": 0.7364306271247294, + "eval_precision": 0.722064841045011, + "eval_precision_macro": 0.8365645289452815, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7397613673008707, + "eval_recall_macro": 0.8432313204585479, + "eval_runtime": 0.2624, + "eval_samples_per_second": 621.222, + "eval_steps_per_second": 3.811, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3762 + }, + { + "epoch": 210.0, + "eval_accuracy": 0.9142769981696156, + "eval_auc": 0.9430619808161933, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7309288222151292, + "eval_f1_macro": 0.8399730291904192, + "eval_loss": 0.2441486269235611, + "eval_pr_auc": 0.7370811402900591, + "eval_precision": 0.7235387045813586, + "eval_precision_macro": 0.8371980622222068, + "eval_pred_class_0": 16503, + "eval_pred_class_1": 3165, + "eval_predicted_binding_ratio": 0.16092129347162903, + "eval_recall": 0.7384714608190907, + "eval_recall_macro": 0.842827811051786, + "eval_runtime": 0.2675, + "eval_samples_per_second": 609.291, + "eval_steps_per_second": 3.738, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3780 + }, + { + "epoch": 211.0, + "eval_accuracy": 0.9146329062436445, + "eval_auc": 0.9431815542983806, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7325155328978812, + "eval_f1_macro": 0.8408637738901821, + "eval_loss": 0.24397221207618713, + "eval_pr_auc": 0.7374915342644908, + "eval_precision": 0.7238664987405542, + "eval_precision_macro": 0.8376184300639468, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7413737504030957, + "eval_recall_macro": 0.8442185948852564, + "eval_runtime": 0.264, + "eval_samples_per_second": 617.425, + "eval_steps_per_second": 3.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3798 + }, + { + "epoch": 212.0, + "eval_accuracy": 0.9144295302013423, + "eval_auc": 0.9432977798009264, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7319636884854276, + "eval_f1_macro": 0.8405258137499286, + "eval_loss": 0.24377743899822235, + "eval_pr_auc": 0.7380125440447487, + "eval_precision": 0.7230962869729389, + "eval_precision_macro": 0.837200053735105, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7410512737826508, + "eval_recall_macro": 0.8439668151372359, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.044, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3816 + }, + { + "epoch": 213.0, + "eval_accuracy": 0.9144295302013423, + "eval_auc": 0.9434619086633391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7311072056239016, + "eval_f1_macro": 0.8401129643018078, + "eval_loss": 0.24348998069763184, + "eval_pr_auc": 0.7388402605739814, + "eval_precision": 0.7245091830272324, + "eval_precision_macro": 0.8376331499630408, + "eval_pred_class_0": 16510, + "eval_pred_class_1": 3158, + "eval_predicted_binding_ratio": 0.16056538539760015, + "eval_recall": 0.7378265075782006, + "eval_recall_macro": 0.8426562368276709, + "eval_runtime": 0.2496, + "eval_samples_per_second": 652.954, + "eval_steps_per_second": 4.006, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3834 + }, + { + "epoch": 214.0, + "eval_accuracy": 0.9147854382753712, + "eval_auc": 0.943601764673353, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7310654685494223, + "eval_f1_macro": 0.8402185728440683, + "eval_loss": 0.24328412115573883, + "eval_pr_auc": 0.7395557790782865, + "eval_precision": 0.727563078888534, + "eval_precision_macro": 0.838897945080114, + "eval_pred_class_0": 16537, + "eval_pred_class_1": 3131, + "eval_predicted_binding_ratio": 0.15919259711206019, + "eval_recall": 0.7346017413737504, + "eval_recall_macro": 0.841556921872968, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.38, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3852 + }, + { + "epoch": 215.0, + "eval_accuracy": 0.9146837502542201, + "eval_auc": 0.9436924715636332, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7310897435897435, + "eval_f1_macro": 0.8401943762667112, + "eval_loss": 0.2431441992521286, + "eval_pr_auc": 0.7398761361047077, + "eval_precision": 0.7266645428480408, + "eval_precision_macro": 0.838527383046018, + "eval_pred_class_0": 16529, + "eval_pred_class_1": 3139, + "eval_predicted_binding_ratio": 0.15959934919666463, + "eval_recall": 0.7355691712350855, + "eval_recall_macro": 0.8418897344073055, + "eval_runtime": 0.2583, + "eval_samples_per_second": 631.075, + "eval_steps_per_second": 3.872, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3870 + }, + { + "epoch": 216.0, + "eval_accuracy": 0.9147345942647956, + "eval_auc": 0.9437266715649686, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7332591060919358, + "eval_f1_macro": 0.8412581348487456, + "eval_loss": 0.24318096041679382, + "eval_pr_auc": 0.739819563727558, + "eval_precision": 0.7234777150031387, + "eval_precision_macro": 0.8375913025931845, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7433086101257659, + "eval_recall_macro": 0.8450653028295274, + "eval_runtime": 0.2695, + "eval_samples_per_second": 604.716, + "eval_steps_per_second": 3.71, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3888 + }, + { + "epoch": 217.0, + "eval_accuracy": 0.915039658328249, + "eval_auc": 0.9438657295100676, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7336202773792444, + "eval_f1_macro": 0.8415401994826537, + "eval_loss": 0.24295340478420258, + "eval_pr_auc": 0.740473021691125, + "eval_precision": 0.7254098360655737, + "eval_precision_macro": 0.8384566154139702, + "eval_pred_class_0": 16496, + "eval_pred_class_1": 3172, + "eval_predicted_binding_ratio": 0.16127720154565792, + "eval_recall": 0.7420187036439858, + "eval_recall_macro": 0.8447221543812975, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.926, + "eval_steps_per_second": 3.852, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3906 + }, + { + "epoch": 218.0, + "eval_accuracy": 0.9148362822859467, + "eval_auc": 0.9439684365715622, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7333227193122114, + "eval_f1_macro": 0.8413247993777817, + "eval_loss": 0.24282881617546082, + "eval_pr_auc": 0.7409189185674594, + "eval_precision": 0.7242138364779874, + "eval_precision_macro": 0.837907500480624, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7426636568848759, + "eval_recall_macro": 0.8448635481261465, + "eval_runtime": 0.2549, + "eval_samples_per_second": 639.434, + "eval_steps_per_second": 3.923, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3924 + }, + { + "epoch": 219.0, + "eval_accuracy": 0.9149888143176734, + "eval_auc": 0.9440560582596731, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7337579617834394, + "eval_f1_macro": 0.8415885646284089, + "eval_loss": 0.24268390238285065, + "eval_pr_auc": 0.7412204047828347, + "eval_precision": 0.724756212645486, + "eval_precision_macro": 0.8382104794199593, + "eval_pred_class_0": 16489, + "eval_pred_class_1": 3179, + "eval_predicted_binding_ratio": 0.1616331096196868, + "eval_recall": 0.7429861335053208, + "eval_recall_macro": 0.845085147394901, + "eval_runtime": 0.2558, + "eval_samples_per_second": 637.151, + "eval_steps_per_second": 3.909, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3942 + }, + { + "epoch": 220.0, + "eval_accuracy": 0.9152430343705511, + "eval_auc": 0.9441587945186644, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7346808849275823, + "eval_f1_macro": 0.842123366857946, + "eval_loss": 0.24253520369529724, + "eval_pr_auc": 0.7415422610927452, + "eval_precision": 0.7253299811439347, + "eval_precision_macro": 0.8386142808788943, + "eval_pred_class_0": 16486, + "eval_pred_class_1": 3182, + "eval_predicted_binding_ratio": 0.16178564165141346, + "eval_recall": 0.7442760399871009, + "eval_recall_macro": 0.8457602811150571, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.818, + "eval_steps_per_second": 3.888, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3960 + }, + { + "epoch": 221.0, + "eval_accuracy": 0.9153955664022778, + "eval_auc": 0.9442562168332251, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7354531001589825, + "eval_f1_macro": 0.8425495241156832, + "eval_loss": 0.2423904687166214, + "eval_pr_auc": 0.741902799087436, + "eval_precision": 0.7253057384760113, + "eval_precision_macro": 0.8387436514456639, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.745888423089326, + "eval_recall_macro": 0.8465061117076376, + "eval_runtime": 0.2433, + "eval_samples_per_second": 669.926, + "eval_steps_per_second": 4.11, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3978 + }, + { + "epoch": 222.0, + "eval_accuracy": 0.9153955664022778, + "eval_auc": 0.944399226172901, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.734609250398724, + "eval_f1_macro": 0.8421428275824746, + "eval_loss": 0.24210123717784882, + "eval_pr_auc": 0.7426309034006747, + "eval_precision": 0.7267276743452193, + "eval_precision_macro": 0.8391805533372256, + "eval_pred_class_0": 16499, + "eval_pred_class_1": 3169, + "eval_predicted_binding_ratio": 0.16112466951393126, + "eval_recall": 0.7426636568848759, + "eval_recall_macro": 0.8451955333980726, + "eval_runtime": 0.2674, + "eval_samples_per_second": 609.607, + "eval_steps_per_second": 3.74, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3996 + }, + { + "epoch": 222.22222222222223, + "grad_norm": 16301.5107421875, + "learning_rate": 6.802697587657594e-07, + "loss": 0.211, + "step": 4000 + }, + { + "epoch": 223.0, + "eval_accuracy": 0.9156497864551556, + "eval_auc": 0.9445356174132858, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.735026353617633, + "eval_f1_macro": 0.8424338117294514, + "eval_loss": 0.2418563961982727, + "eval_pr_auc": 0.7432491183346004, + "eval_precision": 0.7281645569620253, + "eval_precision_macro": 0.8398516024451512, + "eval_pred_class_0": 16508, + "eval_pred_class_1": 3160, + "eval_predicted_binding_ratio": 0.16066707341875128, + "eval_recall": 0.7420187036439858, + "eval_recall_macro": 0.8450843201324897, + "eval_runtime": 0.2638, + "eval_samples_per_second": 617.886, + "eval_steps_per_second": 3.791, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4014 + }, + { + "epoch": 224.0, + "eval_accuracy": 0.9159040065080334, + "eval_auc": 0.9445885524751136, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.736624203821656, + "eval_f1_macro": 0.8432939508943711, + "eval_loss": 0.24180874228477478, + "eval_pr_auc": 0.7434863322076849, + "eval_precision": 0.7275872916011324, + "eval_precision_macro": 0.8398989281099847, + "eval_pred_class_0": 16489, + "eval_pred_class_1": 3179, + "eval_predicted_binding_ratio": 0.1616331096196868, + "eval_recall": 0.745888423089326, + "eval_recall_macro": 0.8468079165002977, + "eval_runtime": 0.2501, + "eval_samples_per_second": 651.671, + "eval_steps_per_second": 3.998, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4032 + }, + { + "epoch": 225.0, + "eval_accuracy": 0.9159548505186089, + "eval_auc": 0.9446536531606378, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7374106433677522, + "eval_f1_macro": 0.8436909456056703, + "eval_loss": 0.24177636206150055, + "eval_pr_auc": 0.7437995583771988, + "eval_precision": 0.7266750156543519, + "eval_precision_macro": 0.8396638402297497, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7484682360528861, + "eval_recall_macro": 0.8478865596272157, + "eval_runtime": 0.2167, + "eval_samples_per_second": 752.113, + "eval_steps_per_second": 4.614, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4050 + }, + { + "epoch": 226.0, + "eval_accuracy": 0.9161582265609112, + "eval_auc": 0.9447719419529625, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.737629276054097, + "eval_f1_macro": 0.843868342907385, + "eval_loss": 0.24154822528362274, + "eval_pr_auc": 0.7443388353128296, + "eval_precision": 0.7280150753768844, + "eval_precision_macro": 0.8402572343640063, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7475008061915511, + "eval_recall_macro": 0.8476141080514102, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.202, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4068 + }, + { + "epoch": 227.0, + "eval_accuracy": 0.9162599145820622, + "eval_auc": 0.9448749507219245, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7381974248927039, + "eval_f1_macro": 0.8441781495775367, + "eval_loss": 0.2414349913597107, + "eval_pr_auc": 0.7447448668341484, + "eval_precision": 0.7278996865203762, + "eval_precision_macro": 0.840312265884293, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7487907126733312, + "eval_recall_macro": 0.8481987003337683, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.465, + "eval_steps_per_second": 3.88, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4086 + }, + { + "epoch": 228.0, + "eval_accuracy": 0.9163107585926378, + "eval_auc": 0.9449481196490842, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.738314785373609, + "eval_f1_macro": 0.8442527143596241, + "eval_loss": 0.2413274347782135, + "eval_pr_auc": 0.7451543649133733, + "eval_precision": 0.7281279397930386, + "eval_precision_macro": 0.840427826926679, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7487907126733312, + "eval_recall_macro": 0.8482288808130343, + "eval_runtime": 0.2185, + "eval_samples_per_second": 746.149, + "eval_steps_per_second": 4.578, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4104 + }, + { + "epoch": 229.0, + "eval_accuracy": 0.9162599145820622, + "eval_auc": 0.9450982726429946, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7376135096383623, + "eval_f1_macro": 0.843896745442007, + "eval_loss": 0.2410273402929306, + "eval_pr_auc": 0.7459954701052622, + "eval_precision": 0.728904282115869, + "eval_precision_macro": 0.8406224054285385, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7465333763302161, + "eval_recall_macro": 0.8472812955170728, + "eval_runtime": 0.2597, + "eval_samples_per_second": 627.64, + "eval_steps_per_second": 3.851, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4122 + }, + { + "epoch": 230.0, + "eval_accuracy": 0.9167683546878178, + "eval_auc": 0.9451833444163787, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7392066273697626, + "eval_f1_macro": 0.8448445490519523, + "eval_loss": 0.2408701479434967, + "eval_pr_auc": 0.74641099889946, + "eval_precision": 0.7304785894206549, + "eval_precision_macro": 0.8415611477299734, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7481457594324411, + "eval_recall_macro": 0.8482383894645154, + "eval_runtime": 0.2678, + "eval_samples_per_second": 608.615, + "eval_steps_per_second": 3.734, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4140 + }, + { + "epoch": 231.0, + "eval_accuracy": 0.9167175106772423, + "eval_auc": 0.9452793360535927, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7393380012730745, + "eval_f1_macro": 0.8448898647294817, + "eval_loss": 0.24077175557613373, + "eval_pr_auc": 0.7467905435777061, + "eval_precision": 0.7298146402764687, + "eval_precision_macro": 0.8413101105537636, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7491131892937762, + "eval_recall_macro": 0.8486013824781189, + "eval_runtime": 0.2588, + "eval_samples_per_second": 629.884, + "eval_steps_per_second": 3.864, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4158 + }, + { + "epoch": 232.0, + "eval_accuracy": 0.9163616026032133, + "eval_auc": 0.9453335850027798, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7391786903440621, + "eval_f1_macro": 0.8446869866386211, + "eval_loss": 0.24075280129909515, + "eval_pr_auc": 0.7469096614042753, + "eval_precision": 0.7270742358078602, + "eval_precision_macro": 0.840149923152381, + "eval_pred_class_0": 16462, + "eval_pred_class_1": 3206, + "eval_predicted_binding_ratio": 0.16300589790522677, + "eval_recall": 0.7516930022573364, + "eval_recall_macro": 0.8494385817709088, + "eval_runtime": 0.2648, + "eval_samples_per_second": 615.647, + "eval_steps_per_second": 3.777, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4176 + }, + { + "epoch": 233.0, + "eval_accuracy": 0.9168191986983933, + "eval_auc": 0.9454244865430391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7405645417063115, + "eval_f1_macro": 0.845516906033295, + "eval_loss": 0.24061860144138336, + "eval_pr_auc": 0.7472467341999135, + "eval_precision": 0.7285491419656787, + "eval_precision_macro": 0.8410102813636934, + "eval_pred_class_0": 16463, + "eval_pred_class_1": 3205, + "eval_predicted_binding_ratio": 0.1629550538946512, + "eval_recall": 0.7529829087391164, + "eval_recall_macro": 0.8502344374081289, + "eval_runtime": 0.2576, + "eval_samples_per_second": 632.77, + "eval_steps_per_second": 3.882, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4194 + }, + { + "epoch": 234.0, + "eval_accuracy": 0.9171242627618467, + "eval_auc": 0.9455514956544295, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7409408773045136, + "eval_f1_macro": 0.8458063544009555, + "eval_loss": 0.24034352600574493, + "eval_pr_auc": 0.7478642707879094, + "eval_precision": 0.7304920087746788, + "eval_precision_macro": 0.841880100399963, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7516930022573364, + "eval_recall_macro": 0.849891288959899, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.626, + "eval_steps_per_second": 3.924, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4212 + }, + { + "epoch": 235.0, + "eval_accuracy": 0.9170734187512711, + "eval_auc": 0.945608099868364, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7413984461709212, + "eval_f1_macro": 0.8460087995182923, + "eval_loss": 0.24029456079006195, + "eval_pr_auc": 0.7479864003180418, + "eval_precision": 0.7292576419213974, + "eval_precision_macro": 0.8414542370705274, + "eval_pred_class_0": 16462, + "eval_pred_class_1": 3206, + "eval_predicted_binding_ratio": 0.16300589790522677, + "eval_recall": 0.7539503386004515, + "eval_recall_macro": 0.8507785132973285, + "eval_runtime": 0.2166, + "eval_samples_per_second": 752.582, + "eval_steps_per_second": 4.617, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4230 + }, + { + "epoch": 236.0, + "eval_accuracy": 0.9172767947935733, + "eval_auc": 0.9457173958316524, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7416229950770208, + "eval_f1_macro": 0.8461890816058248, + "eval_loss": 0.2400863915681839, + "eval_pr_auc": 0.7485735786505677, + "eval_precision": 0.7306007509386734, + "eval_precision_macro": 0.8420487970332027, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7529829087391164, + "eval_recall_macro": 0.850506061721523, + "eval_runtime": 0.2654, + "eval_samples_per_second": 614.11, + "eval_steps_per_second": 3.768, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4248 + }, + { + "epoch": 237.0, + "eval_accuracy": 0.9173784828147244, + "eval_auc": 0.9457852508143816, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7421044278685923, + "eval_f1_macro": 0.8464570875531852, + "eval_loss": 0.24001120030879974, + "eval_pr_auc": 0.7487297504117033, + "eval_precision": 0.730625, + "eval_precision_macro": 0.8421463596065095, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7539503386004515, + "eval_recall_macro": 0.8509595961729245, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.497, + "eval_steps_per_second": 3.825, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4266 + }, + { + "epoch": 238.0, + "eval_accuracy": 0.9176835468781778, + "eval_auc": 0.9458918216779619, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7428934413212641, + "eval_f1_macro": 0.8469453737675663, + "eval_loss": 0.23980914056301117, + "eval_pr_auc": 0.7491659673680734, + "eval_precision": 0.7318523153942428, + "eval_precision_macro": 0.8427959974251447, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7542728152208965, + "eval_recall_macro": 0.8512717368794771, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.674, + "eval_steps_per_second": 3.869, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4284 + }, + { + "epoch": 239.0, + "eval_accuracy": 0.9175818588570266, + "eval_auc": 0.9459814190633611, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7427392477384542, + "eval_f1_macro": 0.8468350393376697, + "eval_loss": 0.2397017627954483, + "eval_pr_auc": 0.7495363660441035, + "eval_precision": 0.73125, + "eval_precision_macro": 0.8425195834345397, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7545952918413416, + "eval_recall_macro": 0.8513424337519016, + "eval_runtime": 0.2693, + "eval_samples_per_second": 605.187, + "eval_steps_per_second": 3.713, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4302 + }, + { + "epoch": 240.0, + "eval_accuracy": 0.9177852348993288, + "eval_auc": 0.9460787245879343, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7425569176882661, + "eval_f1_macro": 0.846819224235148, + "eval_loss": 0.2394852489233017, + "eval_pr_auc": 0.7500368484248636, + "eval_precision": 0.7333333333333333, + "eval_precision_macro": 0.8433466763706938, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7520154788777813, + "eval_recall_macro": 0.8504146930213136, + "eval_runtime": 0.2663, + "eval_samples_per_second": 612.01, + "eval_steps_per_second": 3.755, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4320 + }, + { + "epoch": 241.0, + "eval_accuracy": 0.91788692292048, + "eval_auc": 0.9461055278900622, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7437728066000318, + "eval_f1_macro": 0.8474411515820368, + "eval_loss": 0.23946216702461243, + "eval_pr_auc": 0.7500126507936957, + "eval_precision": 0.7320424734540912, + "eval_precision_macro": 0.8430344761294506, + "eval_pred_class_0": 16466, + "eval_pred_class_1": 3202, + "eval_predicted_binding_ratio": 0.16280252186292454, + "eval_recall": 0.7558851983231216, + "eval_recall_macro": 0.8520477479513235, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.523, + "eval_steps_per_second": 3.881, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4338 + }, + { + "epoch": 242.0, + "eval_accuracy": 0.9178360789099044, + "eval_auc": 0.9462651603379567, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7429207763283487, + "eval_f1_macro": 0.8470125818101653, + "eval_loss": 0.23918889462947845, + "eval_pr_auc": 0.7507506323790389, + "eval_precision": 0.7331240188383046, + "eval_precision_macro": 0.8433259480225619, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7529829087391164, + "eval_recall_macro": 0.8508380469934491, + "eval_runtime": 0.2685, + "eval_samples_per_second": 607.073, + "eval_steps_per_second": 3.724, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4356 + }, + { + "epoch": 243.0, + "eval_accuracy": 0.9181411429733577, + "eval_auc": 0.9463650352422546, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7440381558028617, + "eval_f1_macro": 0.847659094847506, + "eval_loss": 0.2390899360179901, + "eval_pr_auc": 0.7511682669955798, + "eval_precision": 0.7337723424270931, + "eval_precision_macro": 0.8437961778887089, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7545952918413416, + "eval_recall_macro": 0.8516744190238277, + "eval_runtime": 0.2667, + "eval_samples_per_second": 611.141, + "eval_steps_per_second": 3.749, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4374 + }, + { + "epoch": 244.0, + "eval_accuracy": 0.918446207036811, + "eval_auc": 0.9465344196541042, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7436061381074168, + "eval_f1_macro": 0.8475588127054617, + "eval_loss": 0.2388136237859726, + "eval_pr_auc": 0.7520628478642977, + "eval_precision": 0.7372424722662441, + "eval_precision_macro": 0.8451548762954184, + "eval_pred_class_0": 16513, + "eval_pred_class_1": 3155, + "eval_predicted_binding_ratio": 0.1604128533658735, + "eval_recall": 0.7500806191551113, + "eval_recall_macro": 0.8500206922660327, + "eval_runtime": 0.2635, + "eval_samples_per_second": 618.538, + "eval_steps_per_second": 3.795, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4392 + }, + { + "epoch": 245.0, + "eval_accuracy": 0.9182936750050844, + "eval_auc": 0.946561008841255, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7443931923015746, + "eval_f1_macro": 0.8478842115097998, + "eval_loss": 0.2388090342283249, + "eval_pr_auc": 0.7520269916576209, + "eval_precision": 0.7344632768361582, + "eval_precision_macro": 0.844145847858681, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7545952918413416, + "eval_recall_macro": 0.8517649604616258, + "eval_runtime": 0.2356, + "eval_samples_per_second": 691.824, + "eval_steps_per_second": 4.244, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4410 + }, + { + "epoch": 246.0, + "eval_accuracy": 0.9185478950579622, + "eval_auc": 0.9466103526109676, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7456335344553826, + "eval_f1_macro": 0.8485719582198821, + "eval_loss": 0.23877908289432526, + "eval_pr_auc": 0.7522268586665612, + "eval_precision": 0.7344385361276197, + "eval_precision_macro": 0.844360910951309, + "eval_pred_class_0": 16471, + "eval_pred_class_1": 3197, + "eval_predicted_binding_ratio": 0.16254830181004679, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.8529643255056077, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.409, + "eval_steps_per_second": 3.868, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4428 + }, + { + "epoch": 247.0, + "eval_accuracy": 0.9181919869839333, + "eval_auc": 0.9466863828928207, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.74464370734804, + "eval_f1_macro": 0.847968894691123, + "eval_loss": 0.23864901065826416, + "eval_pr_auc": 0.7525479720158522, + "eval_precision": 0.733125, + "eval_precision_macro": 0.84363925491863, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7565301515640116, + "eval_recall_macro": 0.8524909464888326, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.776, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4446 + }, + { + "epoch": 248.0, + "eval_accuracy": 0.9184970510473867, + "eval_auc": 0.9467725739035849, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7454343338097507, + "eval_f1_macro": 0.8484579580910492, + "eval_loss": 0.23847386240959167, + "eval_pr_auc": 0.752960490937812, + "eval_precision": 0.7343554443053817, + "eval_precision_macro": 0.8442903982090288, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7568526281844566, + "eval_recall_macro": 0.8528030871953852, + "eval_runtime": 0.2626, + "eval_samples_per_second": 620.673, + "eval_steps_per_second": 3.808, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4464 + }, + { + "epoch": 249.0, + "eval_accuracy": 0.9183953630262355, + "eval_auc": 0.9468334214870647, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7452785272179019, + "eval_f1_macro": 0.8483468464756075, + "eval_loss": 0.238382488489151, + "eval_pr_auc": 0.7531576423642267, + "eval_precision": 0.73375, + "eval_precision_macro": 0.8440124787466602, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.8528737840678097, + "eval_runtime": 0.2634, + "eval_samples_per_second": 618.912, + "eval_steps_per_second": 3.797, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4482 + }, + { + "epoch": 250.0, + "grad_norm": 32703.09375, + "learning_rate": 5.870150616070439e-07, + "loss": 0.2045, + "step": 4500 + }, + { + "epoch": 250.0, + "eval_accuracy": 0.9189546471425666, + "eval_auc": 0.9469757203543168, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7464206172446707, + "eval_f1_macro": 0.849095331315225, + "eval_loss": 0.23810486495494843, + "eval_pr_auc": 0.7538503066163141, + "eval_precision": 0.7365777080062794, + "eval_precision_macro": 0.8453864697284325, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7565301515640116, + "eval_recall_macro": 0.8529436536778228, + "eval_runtime": 0.2683, + "eval_samples_per_second": 607.492, + "eval_steps_per_second": 3.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4500 + }, + { + "epoch": 251.0, + "eval_accuracy": 0.9192088671954444, + "eval_auc": 0.9470911964544428, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7465305471367044, + "eval_f1_macro": 0.8492382980338313, + "eval_loss": 0.23794293403625488, + "eval_pr_auc": 0.7543678751573928, + "eval_precision": 0.7386363636363636, + "eval_precision_macro": 0.8462575757575758, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7545952918413416, + "eval_recall_macro": 0.8523082090884139, + "eval_runtime": 0.2575, + "eval_samples_per_second": 632.943, + "eval_steps_per_second": 3.883, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4518 + }, + { + "epoch": 252.0, + "eval_accuracy": 0.9186495830791133, + "eval_auc": 0.9471146615094285, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7459510955859003, + "eval_f1_macro": 0.8487610069611806, + "eval_loss": 0.2380078136920929, + "eval_pr_auc": 0.7543445385135205, + "eval_precision": 0.7347513293712856, + "eval_precision_macro": 0.8445476639570896, + "eval_pred_class_0": 16471, + "eval_pred_class_1": 3197, + "eval_predicted_binding_ratio": 0.16254830181004679, + "eval_recall": 0.7574975814253466, + "eval_recall_macro": 0.8531557442950962, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.051, + "eval_steps_per_second": 3.816, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4536 + }, + { + "epoch": 253.0, + "eval_accuracy": 0.9187004270896888, + "eval_auc": 0.9472153636761378, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7461501825686617, + "eval_f1_macro": 0.8488749520465066, + "eval_loss": 0.23785638809204102, + "eval_pr_auc": 0.7549126832149435, + "eval_precision": 0.7348342714196373, + "eval_precision_macro": 0.8446181071730852, + "eval_pred_class_0": 16470, + "eval_pred_class_1": 3198, + "eval_predicted_binding_ratio": 0.16259914582062232, + "eval_recall": 0.7578200580457917, + "eval_recall_macro": 0.8533169826053187, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.46, + "eval_steps_per_second": 3.825, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4554 + }, + { + "epoch": 254.0, + "eval_accuracy": 0.918903803131991, + "eval_auc": 0.9473291852514412, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7465437788018433, + "eval_f1_macro": 0.849136671654349, + "eval_loss": 0.23767386376857758, + "eval_pr_auc": 0.7554825230801616, + "eval_precision": 0.7359022556390977, + "eval_precision_macro": 0.8451300547435596, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7574975814253466, + "eval_recall_macro": 0.8533066466914263, + "eval_runtime": 0.2597, + "eval_samples_per_second": 627.53, + "eval_steps_per_second": 3.85, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4572 + }, + { + "epoch": 255.0, + "eval_accuracy": 0.9196156192800488, + "eval_auc": 0.9474168847995438, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7481280866656046, + "eval_f1_macro": 0.8501522492676461, + "eval_loss": 0.23749451339244843, + "eval_pr_auc": 0.7559063738482461, + "eval_precision": 0.739294710327456, + "eval_precision_macro": 0.8468181046180088, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.8535981155701939, + "eval_runtime": 0.267, + "eval_samples_per_second": 610.581, + "eval_steps_per_second": 3.746, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4590 + }, + { + "epoch": 256.0, + "eval_accuracy": 0.9195647752694732, + "eval_auc": 0.9474761751831905, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7480089200382287, + "eval_f1_macro": 0.8500768176935048, + "eval_loss": 0.23738548159599304, + "eval_pr_auc": 0.7561572732622138, + "eval_precision": 0.7390620081838212, + "eval_precision_macro": 0.8467003692001515, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.853567935090928, + "eval_runtime": 0.2595, + "eval_samples_per_second": 628.111, + "eval_steps_per_second": 3.853, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4608 + }, + { + "epoch": 257.0, + "eval_accuracy": 0.919818995322351, + "eval_auc": 0.9475989409250355, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7486051331101546, + "eval_f1_macro": 0.8504541559450298, + "eval_loss": 0.23718814551830292, + "eval_pr_auc": 0.7567216824275462, + "eval_precision": 0.7402269861286255, + "eval_precision_macro": 0.8472897782243516, + "eval_pred_class_0": 16496, + "eval_pred_class_1": 3172, + "eval_predicted_binding_ratio": 0.16127720154565792, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.853718837487258, + "eval_runtime": 0.2144, + "eval_samples_per_second": 760.368, + "eval_steps_per_second": 4.665, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4626 + }, + { + "epoch": 258.0, + "eval_accuracy": 0.9193613992271711, + "eval_auc": 0.9475927315907009, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.748013981569749, + "eval_f1_macro": 0.8500072329009691, + "eval_loss": 0.23723167181015015, + "eval_pr_auc": 0.7565508642499409, + "eval_precision": 0.7372377074851237, + "eval_precision_macro": 0.845948140540741, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7591099645275717, + "eval_recall_macro": 0.8542335601596028, + "eval_runtime": 0.2406, + "eval_samples_per_second": 677.35, + "eval_steps_per_second": 4.156, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4644 + }, + { + "epoch": 259.0, + "eval_accuracy": 0.9197681513117755, + "eval_auc": 0.9476849762158163, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7486460656259956, + "eval_f1_macro": 0.8504558902151395, + "eval_loss": 0.23700466752052307, + "eval_pr_auc": 0.7570817989267699, + "eval_precision": 0.7396915328926661, + "eval_precision_macro": 0.8470757706910725, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7578200580457917, + "eval_recall_macro": 0.8539507726699049, + "eval_runtime": 0.2559, + "eval_samples_per_second": 636.906, + "eval_steps_per_second": 3.907, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4662 + }, + { + "epoch": 260.0, + "eval_accuracy": 0.9199715273540777, + "eval_auc": 0.9477206555569931, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7495225970719287, + "eval_f1_macro": 0.8509503339952407, + "eval_loss": 0.23693729937076569, + "eval_pr_auc": 0.7572519889982183, + "eval_precision": 0.7398680490103676, + "eval_precision_macro": 0.8473073942352414, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7594324411480168, + "eval_recall_macro": 0.8547267837417515, + "eval_runtime": 0.2431, + "eval_samples_per_second": 670.398, + "eval_steps_per_second": 4.113, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4680 + }, + { + "epoch": 261.0, + "eval_accuracy": 0.919818995322351, + "eval_auc": 0.9477750407611654, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7492447129909365, + "eval_f1_macro": 0.8507623994645729, + "eval_loss": 0.23685960471630096, + "eval_pr_auc": 0.7574807332819814, + "eval_precision": 0.739021329987453, + "eval_precision_macro": 0.8469075096539207, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7597549177684618, + "eval_recall_macro": 0.85476730013491, + "eval_runtime": 0.2608, + "eval_samples_per_second": 625.06, + "eval_steps_per_second": 3.835, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4698 + }, + { + "epoch": 262.0, + "eval_accuracy": 0.9200223713646533, + "eval_auc": 0.947870010485989, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7495621716287215, + "eval_f1_macro": 0.8509874086097018, + "eval_loss": 0.23665639758110046, + "eval_pr_auc": 0.7579557575566394, + "eval_precision": 0.740251572327044, + "eval_precision_macro": 0.8474729477355745, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7591099645275717, + "eval_recall_macro": 0.854625906390061, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.828, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4716 + }, + { + "epoch": 263.0, + "eval_accuracy": 0.9201749033963799, + "eval_auc": 0.9479619047411422, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7501591343093571, + "eval_f1_macro": 0.8513291133243506, + "eval_loss": 0.23653987050056458, + "eval_pr_auc": 0.7584013256784117, + "eval_precision": 0.7404963870562362, + "eval_precision_macro": 0.8476822244653337, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7600773943889068, + "eval_recall_macro": 0.8551096213207285, + "eval_runtime": 0.2539, + "eval_samples_per_second": 641.941, + "eval_steps_per_second": 3.938, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4734 + }, + { + "epoch": 264.0, + "eval_accuracy": 0.9203274354281066, + "eval_auc": 0.9480515799865329, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7501195981502152, + "eval_f1_macro": 0.8513640482812168, + "eval_loss": 0.2363332211971283, + "eval_pr_auc": 0.7588834286830018, + "eval_precision": 0.7419558359621451, + "eval_precision_macro": 0.848278196802748, + "eval_pred_class_0": 16498, + "eval_pred_class_1": 3170, + "eval_predicted_binding_ratio": 0.16117551352450682, + "eval_recall": 0.7584650112866818, + "eval_recall_macro": 0.8545448736037441, + "eval_runtime": 0.253, + "eval_samples_per_second": 644.351, + "eval_steps_per_second": 3.953, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4752 + }, + { + "epoch": 265.0, + "eval_accuracy": 0.9203782794386821, + "eval_auc": 0.9481172938194913, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.750557502389296, + "eval_f1_macro": 0.8515931077800434, + "eval_loss": 0.23625436425209045, + "eval_pr_auc": 0.7591087523185005, + "eval_precision": 0.7415801070192005, + "eval_precision_macro": 0.8482019751638359, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7597549177684618, + "eval_recall_macro": 0.8550992854068361, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.903, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4770 + }, + { + "epoch": 266.0, + "eval_accuracy": 0.9200223713646533, + "eval_auc": 0.9482093632596256, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7495621716287215, + "eval_f1_macro": 0.8509874086097018, + "eval_loss": 0.23614051938056946, + "eval_pr_auc": 0.7594934170637511, + "eval_precision": 0.740251572327044, + "eval_precision_macro": 0.8474729477355745, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7591099645275717, + "eval_recall_macro": 0.854625906390061, + "eval_runtime": 0.2555, + "eval_samples_per_second": 638.024, + "eval_steps_per_second": 3.914, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4788 + }, + { + "epoch": 267.0, + "eval_accuracy": 0.9204291234492576, + "eval_auc": 0.9483264257570818, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7504385265507894, + "eval_f1_macro": 0.8515537559413557, + "eval_loss": 0.23595084249973297, + "eval_pr_auc": 0.7600448623712702, + "eval_precision": 0.7422712933753943, + "eval_precision_macro": 0.8484662322132155, + "eval_pred_class_0": 16498, + "eval_pred_class_1": 3170, + "eval_predicted_binding_ratio": 0.16117551352450682, + "eval_recall": 0.7587874879071267, + "eval_recall_macro": 0.8547362923932326, + "eval_runtime": 0.2589, + "eval_samples_per_second": 629.531, + "eval_steps_per_second": 3.862, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4806 + }, + { + "epoch": 268.0, + "eval_accuracy": 0.9203782794386821, + "eval_auc": 0.9483670102777331, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7509541984732825, + "eval_f1_macro": 0.8517842887791249, + "eval_loss": 0.23594258725643158, + "eval_pr_auc": 0.7601171664174119, + "eval_precision": 0.740822089739567, + "eval_precision_macro": 0.8479609508220922, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7613673008706868, + "eval_recall_macro": 0.8557545745616185, + "eval_runtime": 0.2463, + "eval_samples_per_second": 661.883, + "eval_steps_per_second": 4.061, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4824 + }, + { + "epoch": 269.0, + "eval_accuracy": 0.920276591417531, + "eval_auc": 0.9484603741402287, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7505567928730512, + "eval_f1_macro": 0.8515567625484772, + "eval_loss": 0.23575998842716217, + "eval_pr_auc": 0.7605631058573062, + "eval_precision": 0.7406593406593407, + "eval_precision_macro": 0.8478216317444613, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7607223476297968, + "eval_recall_macro": 0.8554320979411736, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.928, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4842 + }, + { + "epoch": 270.0, + "eval_accuracy": 0.9206833435021354, + "eval_auc": 0.9485513243429828, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.751434034416826, + "eval_f1_macro": 0.8521235507837306, + "eval_loss": 0.235606849193573, + "eval_pr_auc": 0.7610077759721279, + "eval_precision": 0.7426771653543307, + "eval_precision_macro": 0.8488138752255192, + "eval_pred_class_0": 16493, + "eval_pred_class_1": 3175, + "eval_predicted_binding_ratio": 0.16142973357738458, + "eval_recall": 0.7603998710093518, + "eval_recall_macro": 0.8555424839443451, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.303, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4860 + }, + { + "epoch": 271.0, + "eval_accuracy": 0.9209884075655888, + "eval_auc": 0.948647082400222, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7519948930737312, + "eval_f1_macro": 0.8525018311755109, + "eval_loss": 0.23542079329490662, + "eval_pr_auc": 0.7614859215167992, + "eval_precision": 0.744391785150079, + "eval_precision_macro": 0.8496242389363071, + "eval_pred_class_0": 16503, + "eval_pred_class_1": 3165, + "eval_predicted_binding_ratio": 0.16092129347162903, + "eval_recall": 0.7597549177684618, + "eval_recall_macro": 0.8554614511580283, + "eval_runtime": 0.2456, + "eval_samples_per_second": 663.61, + "eval_steps_per_second": 4.071, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4878 + }, + { + "epoch": 272.0, + "eval_accuracy": 0.920734187512711, + "eval_auc": 0.9487102755159504, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7517120560598821, + "eval_f1_macro": 0.8522755458325244, + "eval_loss": 0.2353215366601944, + "eval_pr_auc": 0.761752604262035, + "eval_precision": 0.7426054122089364, + "eval_precision_macro": 0.8488345435817272, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7610448242502419, + "eval_recall_macro": 0.8558347800855242, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.279, + "eval_steps_per_second": 3.965, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4896 + }, + { + "epoch": 273.0, + "eval_accuracy": 0.920734187512711, + "eval_auc": 0.9487825490532058, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7517911160643209, + "eval_f1_macro": 0.8523136490925144, + "eval_loss": 0.23522616922855377, + "eval_pr_auc": 0.7620046741511783, + "eval_precision": 0.7424528301886792, + "eval_precision_macro": 0.8487858522607636, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7613673008706868, + "eval_recall_macro": 0.8559658379164806, + "eval_runtime": 0.2581, + "eval_samples_per_second": 631.657, + "eval_steps_per_second": 3.875, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4914 + }, + { + "epoch": 274.0, + "eval_accuracy": 0.9208358755338621, + "eval_auc": 0.9488776355680169, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7518725099601593, + "eval_f1_macro": 0.8523888728682258, + "eval_loss": 0.2350645512342453, + "eval_pr_auc": 0.7623918274747735, + "eval_precision": 0.7432262129804663, + "eval_precision_macro": 0.8491200787225601, + "eval_pred_class_0": 16494, + "eval_pred_class_1": 3174, + "eval_predicted_binding_ratio": 0.16137888956680904, + "eval_recall": 0.7607223476297968, + "eval_recall_macro": 0.8557640832130997, + "eval_runtime": 0.2427, + "eval_samples_per_second": 671.591, + "eval_steps_per_second": 4.12, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4932 + }, + { + "epoch": 275.0, + "eval_accuracy": 0.9210900955867399, + "eval_auc": 0.9489531597599242, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7525510204081632, + "eval_f1_macro": 0.8528058755561261, + "eval_loss": 0.2349635362625122, + "eval_pr_auc": 0.7627186958629152, + "eval_precision": 0.7442447177546515, + "eval_precision_macro": 0.8496637300357182, + "eval_pred_class_0": 16497, + "eval_pred_class_1": 3171, + "eval_predicted_binding_ratio": 0.16122635753508235, + "eval_recall": 0.7610448242502419, + "eval_recall_macro": 0.8560460434403863, + "eval_runtime": 0.2606, + "eval_samples_per_second": 625.549, + "eval_steps_per_second": 3.838, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4950 + }, + { + "epoch": 276.0, + "eval_accuracy": 0.9207850315232866, + "eval_auc": 0.9489723619803666, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7524626628535113, + "eval_f1_macro": 0.8526552767085183, + "eval_loss": 0.2350020557641983, + "eval_pr_auc": 0.7627735453579831, + "eval_precision": 0.7416222987785781, + "eval_precision_macro": 0.8485653223786669, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.763624637213802, + "eval_recall_macro": 0.8569134232124421, + "eval_runtime": 0.2544, + "eval_samples_per_second": 640.627, + "eval_steps_per_second": 3.93, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4968 + }, + { + "epoch": 277.0, + "eval_accuracy": 0.9209884075655888, + "eval_auc": 0.9490740568619694, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7524689391525964, + "eval_f1_macro": 0.8527303253449472, + "eval_loss": 0.2348015159368515, + "eval_pr_auc": 0.7632371897507115, + "eval_precision": 0.7434686811457349, + "eval_precision_macro": 0.8493281796365992, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7616897774911319, + "eval_recall_macro": 0.8562477981437672, + "eval_runtime": 0.2476, + "eval_samples_per_second": 658.287, + "eval_steps_per_second": 4.039, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4986 + }, + { + "epoch": 277.77777777777777, + "grad_norm": 14799.8212890625, + "learning_rate": 4.904982238472025e-07, + "loss": 0.199, + "step": 5000 + }, + { + "epoch": 278.0, + "eval_accuracy": 0.9207850315232866, + "eval_auc": 0.9491519363186243, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7514358647096363, + "eval_f1_macro": 0.8521604145127957, + "eval_loss": 0.23470228910446167, + "eval_pr_auc": 0.7636185597213633, + "eval_precision": 0.7436059362172402, + "eval_precision_macro": 0.8491982774838095, + "eval_pred_class_0": 16501, + "eval_pred_class_1": 3167, + "eval_predicted_binding_ratio": 0.16102298149278016, + "eval_recall": 0.7594324411480168, + "eval_recall_macro": 0.8552096714100077, + "eval_runtime": 0.2561, + "eval_samples_per_second": 636.482, + "eval_steps_per_second": 3.905, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5004 + }, + { + "epoch": 279.0, + "eval_accuracy": 0.9209375635550132, + "eval_auc": 0.9492209202712323, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7521912350597609, + "eval_f1_macro": 0.8525784825369885, + "eval_loss": 0.2345963418483734, + "eval_pr_auc": 0.7638862490185815, + "eval_precision": 0.7435412728418399, + "eval_precision_macro": 0.8493079227068421, + "eval_pred_class_0": 16494, + "eval_pred_class_1": 3174, + "eval_predicted_binding_ratio": 0.16137888956680904, + "eval_recall": 0.7610448242502419, + "eval_recall_macro": 0.8559555020025882, + "eval_runtime": 0.273, + "eval_samples_per_second": 597.168, + "eval_steps_per_second": 3.664, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5022 + }, + { + "epoch": 280.0, + "eval_accuracy": 0.9210900955867399, + "eval_auc": 0.9492453877736104, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7532591414944356, + "eval_f1_macro": 0.8531471523002045, + "eval_loss": 0.23453067243099213, + "eval_pr_auc": 0.7639137465976396, + "eval_precision": 0.7428661022264033, + "eval_precision_macro": 0.8492229655497572, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7639471138342471, + "eval_recall_macro": 0.8572255639189947, + "eval_runtime": 0.2247, + "eval_samples_per_second": 725.266, + "eval_steps_per_second": 4.449, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5040 + }, + { + "epoch": 281.0, + "eval_accuracy": 0.9210900955867399, + "eval_auc": 0.9493002596027307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7531806615776081, + "eval_f1_macro": 0.8531093334515976, + "eval_loss": 0.2344331294298172, + "eval_pr_auc": 0.7641447446055818, + "eval_precision": 0.7430185127078758, + "eval_precision_macro": 0.8492715280607518, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.763624637213802, + "eval_recall_macro": 0.8570945060880382, + "eval_runtime": 0.263, + "eval_samples_per_second": 619.693, + "eval_steps_per_second": 3.802, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5058 + }, + { + "epoch": 282.0, + "eval_accuracy": 0.9212426276184665, + "eval_auc": 0.9493485327975579, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7538534880025425, + "eval_f1_macro": 0.8534875889608693, + "eval_loss": 0.2344052791595459, + "eval_pr_auc": 0.764296468123137, + "eval_precision": 0.7431077694235589, + "eval_precision_macro": 0.849430796583593, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8577092788496623, + "eval_runtime": 0.2484, + "eval_samples_per_second": 656.097, + "eval_steps_per_second": 4.025, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5076 + }, + { + "epoch": 283.0, + "eval_accuracy": 0.9212426276184665, + "eval_auc": 0.9494190544850012, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7536969311496263, + "eval_f1_macro": 0.8534121476034391, + "eval_loss": 0.23425185680389404, + "eval_pr_auc": 0.7646652388104445, + "eval_precision": 0.7434127979924717, + "eval_precision_macro": 0.8495280009379834, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.764269590454692, + "eval_recall_macro": 0.8574471631877492, + "eval_runtime": 0.2547, + "eval_samples_per_second": 639.881, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5094 + }, + { + "epoch": 284.0, + "eval_accuracy": 0.9213951596501933, + "eval_auc": 0.9495173137944721, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7539000318369946, + "eval_f1_macro": 0.853564041452472, + "eval_loss": 0.2340681403875351, + "eval_pr_auc": 0.7651886756989377, + "eval_precision": 0.744419993712669, + "eval_precision_macro": 0.8499803613859639, + "eval_pred_class_0": 16487, + "eval_pred_class_1": 3181, + "eval_predicted_binding_ratio": 0.1617347976408379, + "eval_recall": 0.763624637213802, + "eval_recall_macro": 0.8572755889636343, + "eval_runtime": 0.2477, + "eval_samples_per_second": 658.168, + "eval_steps_per_second": 4.038, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5112 + }, + { + "epoch": 285.0, + "eval_accuracy": 0.9213951596501933, + "eval_auc": 0.9495497619459952, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7540566337893733, + "eval_f1_macro": 0.8536395120535369, + "eval_loss": 0.234034925699234, + "eval_pr_auc": 0.7653794962608654, + "eval_precision": 0.7441130298273155, + "eval_precision_macro": 0.8498821534503319, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.764269590454692, + "eval_recall_macro": 0.8575377046255472, + "eval_runtime": 0.2024, + "eval_samples_per_second": 805.487, + "eval_steps_per_second": 4.942, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5130 + }, + { + "epoch": 286.0, + "eval_accuracy": 0.9215476916819199, + "eval_auc": 0.9496228140831674, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.754181934044926, + "eval_f1_macro": 0.8537539029854382, + "eval_loss": 0.23388919234275818, + "eval_pr_auc": 0.7657504791267543, + "eval_precision": 0.7452770780856424, + "eval_precision_macro": 0.8503853253634615, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.763302160593357, + "eval_recall_macro": 0.8572350725704758, + "eval_runtime": 0.1796, + "eval_samples_per_second": 907.381, + "eval_steps_per_second": 5.567, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5148 + }, + { + "epoch": 287.0, + "eval_accuracy": 0.9219544437665244, + "eval_auc": 0.9497348546111616, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7550662198819211, + "eval_f1_macro": 0.8543240621923138, + "eval_loss": 0.2337103933095932, + "eval_pr_auc": 0.7663000808208629, + "eval_precision": 0.7473152242577384, + "eval_precision_macro": 0.8513875842534602, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7629796839729119, + "eval_recall_macro": 0.8573454585736473, + "eval_runtime": 0.2659, + "eval_samples_per_second": 613.107, + "eval_steps_per_second": 3.761, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5166 + }, + { + "epoch": 288.0, + "eval_accuracy": 0.9218527557453732, + "eval_auc": 0.9498016974139991, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7548253309937789, + "eval_f1_macro": 0.8541719723587154, + "eval_loss": 0.23359474539756775, + "eval_pr_auc": 0.7666273574525592, + "eval_precision": 0.7468434343434344, + "eval_precision_macro": 0.8511489898989899, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7629796839729119, + "eval_recall_macro": 0.8572850976151154, + "eval_runtime": 0.2686, + "eval_samples_per_second": 606.819, + "eval_steps_per_second": 3.723, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5184 + }, + { + "epoch": 289.0, + "eval_accuracy": 0.9216493797030709, + "eval_auc": 0.9498063690134986, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7546569017672345, + "eval_f1_macro": 0.8540188154275592, + "eval_loss": 0.23359988629817963, + "eval_pr_auc": 0.7666725529217715, + "eval_precision": 0.7452830188679245, + "eval_precision_macro": 0.8504738723645784, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.764269590454692, + "eval_recall_macro": 0.8576886070218773, + "eval_runtime": 0.2712, + "eval_samples_per_second": 600.959, + "eval_steps_per_second": 3.687, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5202 + }, + { + "epoch": 290.0, + "eval_accuracy": 0.9213951596501933, + "eval_auc": 0.9498248412965191, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7543692405465523, + "eval_f1_macro": 0.8537901526260393, + "eval_loss": 0.23362942039966583, + "eval_pr_auc": 0.7666566917414745, + "eval_precision": 0.7435014093329158, + "eval_precision_macro": 0.8496869717377781, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7655594969364721, + "eval_recall_macro": 0.8580619359493733, + "eval_runtime": 0.2158, + "eval_samples_per_second": 755.492, + "eval_steps_per_second": 4.635, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5220 + }, + { + "epoch": 291.0, + "eval_accuracy": 0.9219544437665244, + "eval_auc": 0.9499087646350263, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7554564282300462, + "eval_f1_macro": 0.8545121458733945, + "eval_loss": 0.23345860838890076, + "eval_pr_auc": 0.767032300253484, + "eval_precision": 0.746536523929471, + "eval_precision_macro": 0.8511363192046093, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.764592067075137, + "eval_recall_macro": 0.8580007477284299, + "eval_runtime": 0.2582, + "eval_samples_per_second": 631.391, + "eval_steps_per_second": 3.874, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5238 + }, + { + "epoch": 292.0, + "eval_accuracy": 0.9226154159040065, + "eval_auc": 0.9500412726083274, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7567135549872123, + "eval_f1_macro": 0.8553519407342349, + "eval_loss": 0.23320625722408295, + "eval_pr_auc": 0.7678050059622479, + "eval_precision": 0.7502377179080824, + "eval_precision_macro": 0.8528939452496871, + "eval_pred_class_0": 16513, + "eval_pred_class_1": 3155, + "eval_predicted_binding_ratio": 0.1604128533658735, + "eval_recall": 0.763302160593357, + "eval_recall_macro": 0.857868862635062, + "eval_runtime": 0.2022, + "eval_samples_per_second": 806.124, + "eval_steps_per_second": 4.946, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5256 + }, + { + "epoch": 293.0, + "eval_accuracy": 0.9219035997559487, + "eval_auc": 0.9500337688516315, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7551801083838062, + "eval_f1_macro": 0.8543609694420392, + "eval_loss": 0.23323103785514832, + "eval_pr_auc": 0.7676369094541509, + "eval_precision": 0.7466120390797353, + "eval_precision_macro": 0.8511174775574487, + "eval_pred_class_0": 16495, + "eval_pred_class_1": 3173, + "eval_predicted_binding_ratio": 0.16132804555623348, + "eval_recall": 0.7639471138342471, + "eval_recall_macro": 0.8577084515872508, + "eval_runtime": 0.2457, + "eval_samples_per_second": 663.373, + "eval_steps_per_second": 4.07, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5274 + }, + { + "epoch": 294.0, + "eval_accuracy": 0.9219035997559487, + "eval_auc": 0.9500722803500048, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7554140127388536, + "eval_f1_macro": 0.8544737053045671, + "eval_loss": 0.23318050801753998, + "eval_pr_auc": 0.7677669872107012, + "eval_precision": 0.7461465869770368, + "eval_precision_macro": 0.8509676473001504, + "eval_pred_class_0": 16489, + "eval_pred_class_1": 3179, + "eval_predicted_binding_ratio": 0.1616331096196868, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8581016250801203, + "eval_runtime": 0.2622, + "eval_samples_per_second": 621.765, + "eval_steps_per_second": 3.815, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5292 + }, + { + "epoch": 295.0, + "eval_accuracy": 0.9223103518405532, + "eval_auc": 0.9501381693679445, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7562998405103668, + "eval_f1_macro": 0.8550446157127531, + "eval_loss": 0.23305083811283112, + "eval_pr_auc": 0.7681080957655504, + "eval_precision": 0.7481855474913222, + "eval_precision_macro": 0.8519702208636682, + "eval_pred_class_0": 16499, + "eval_pred_class_1": 3169, + "eval_predicted_binding_ratio": 0.16112466951393126, + "eval_recall": 0.764592067075137, + "eval_recall_macro": 0.8582120110832919, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.885, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5310 + }, + { + "epoch": 296.0, + "eval_accuracy": 0.9220052877770999, + "eval_auc": 0.9501577511558462, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7556546670914304, + "eval_f1_macro": 0.8546256879531203, + "eval_loss": 0.23304298520088196, + "eval_pr_auc": 0.7681639014007426, + "eval_precision": 0.7466163046899591, + "eval_precision_macro": 0.8512051870912047, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8581619860386525, + "eval_runtime": 0.2233, + "eval_samples_per_second": 730.092, + "eval_steps_per_second": 4.479, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5328 + }, + { + "epoch": 297.0, + "eval_accuracy": 0.9223611958511287, + "eval_auc": 0.9502301512155882, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7564981661616967, + "eval_f1_macro": 0.8551582014839937, + "eval_loss": 0.23290005326271057, + "eval_pr_auc": 0.7685493782671796, + "eval_precision": 0.7482649842271294, + "eval_precision_macro": 0.8520389050120978, + "eval_pred_class_0": 16498, + "eval_pred_class_1": 3170, + "eval_predicted_binding_ratio": 0.16117551352450682, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8583732493935144, + "eval_runtime": 0.259, + "eval_samples_per_second": 629.312, + "eval_steps_per_second": 3.861, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5346 + }, + { + "epoch": 298.0, + "eval_accuracy": 0.9224628838722798, + "eval_auc": 0.9503260649928105, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.756739511883873, + "eval_f1_macro": 0.8553105125875349, + "eval_loss": 0.23275841772556305, + "eval_pr_auc": 0.768979970846171, + "eval_precision": 0.7487373737373737, + "eval_precision_macro": 0.8522777777777777, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8584336103520465, + "eval_runtime": 0.265, + "eval_samples_per_second": 615.056, + "eval_steps_per_second": 3.773, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5364 + }, + { + "epoch": 299.0, + "eval_accuracy": 0.9224120398617043, + "eval_auc": 0.9503799246420391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7565411614550096, + "eval_f1_macro": 0.8551969143430849, + "eval_loss": 0.23264609277248383, + "eval_pr_auc": 0.7692428666566218, + "eval_precision": 0.748658035996211, + "eval_precision_macro": 0.8522091464751675, + "eval_pred_class_0": 16501, + "eval_pred_class_1": 3167, + "eval_predicted_binding_ratio": 0.16102298149278016, + "eval_recall": 0.764592067075137, + "eval_recall_macro": 0.8582723720418239, + "eval_runtime": 0.2384, + "eval_samples_per_second": 683.765, + "eval_steps_per_second": 4.195, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5382 + }, + { + "epoch": 300.0, + "eval_accuracy": 0.9223611958511287, + "eval_auc": 0.9503983969250598, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7565758010521282, + "eval_f1_macro": 0.8551956221484214, + "eval_loss": 0.23263320326805115, + "eval_pr_auc": 0.7693119480202146, + "eval_precision": 0.748108448928121, + "eval_precision_macro": 0.8519882690809373, + "eval_pred_class_0": 16496, + "eval_pred_class_1": 3172, + "eval_predicted_binding_ratio": 0.16127720154565792, + "eval_recall": 0.7652370203160271, + "eval_recall_macro": 0.8585043072244709, + "eval_runtime": 0.264, + "eval_samples_per_second": 617.523, + "eval_steps_per_second": 3.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5400 + }, + { + "epoch": 301.0, + "eval_accuracy": 0.9225137278828553, + "eval_auc": 0.9504205091626904, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7572475310608474, + "eval_f1_macro": 0.855573369257207, + "eval_loss": 0.2325783669948578, + "eval_pr_auc": 0.7694562109808358, + "eval_precision": 0.7481901164620711, + "eval_precision_macro": 0.8521436908185075, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7665269267978072, + "eval_recall_macro": 0.859119079986095, + "eval_runtime": 0.2413, + "eval_samples_per_second": 675.443, + "eval_steps_per_second": 4.144, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5418 + }, + { + "epoch": 302.0, + "eval_accuracy": 0.9223611958511287, + "eval_auc": 0.9504706120673215, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.756808408982322, + "eval_f1_macro": 0.8553077347570655, + "eval_loss": 0.23251411318778992, + "eval_pr_auc": 0.7697272239104135, + "eval_precision": 0.7476400251730648, + "eval_precision_macro": 0.8518369925744038, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7662044501773622, + "eval_recall_macro": 0.8588974807173404, + "eval_runtime": 0.2708, + "eval_samples_per_second": 602.022, + "eval_steps_per_second": 3.693, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5436 + }, + { + "epoch": 303.0, + "eval_accuracy": 0.9223611958511287, + "eval_auc": 0.9504959749596038, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7571178622554477, + "eval_f1_macro": 0.8554568705510044, + "eval_loss": 0.23248492181301117, + "eval_pr_auc": 0.7698215821647963, + "eval_precision": 0.7470182046453233, + "eval_precision_macro": 0.8516367567335341, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8594217120411665, + "eval_runtime": 0.2271, + "eval_samples_per_second": 717.802, + "eval_steps_per_second": 4.404, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5454 + }, + { + "epoch": 304.0, + "eval_accuracy": 0.9228187919463087, + "eval_auc": 0.9505587106478812, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7579719387755102, + "eval_f1_macro": 0.8560304891070873, + "eval_loss": 0.23235370218753815, + "eval_pr_auc": 0.7701546218803492, + "eval_precision": 0.749605802585935, + "eval_precision_macro": 0.8528595176474563, + "eval_pred_class_0": 16497, + "eval_pred_class_1": 3171, + "eval_predicted_binding_ratio": 0.16122635753508235, + "eval_recall": 0.7665269267978072, + "eval_recall_macro": 0.8593001628616911, + "eval_runtime": 0.1849, + "eval_samples_per_second": 881.772, + "eval_steps_per_second": 5.41, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5472 + }, + { + "epoch": 305.0, + "eval_accuracy": 0.9231238560097621, + "eval_auc": 0.9506673837312365, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7583120204603581, + "eval_f1_macro": 0.8563023222011585, + "eval_loss": 0.23212042450904846, + "eval_pr_auc": 0.7706813722507476, + "eval_precision": 0.7518225039619651, + "eval_precision_macro": 0.8538377341465491, + "eval_pred_class_0": 16513, + "eval_pred_class_1": 3155, + "eval_predicted_binding_ratio": 0.1604128533658735, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8588259565825047, + "eval_runtime": 0.1758, + "eval_samples_per_second": 927.397, + "eval_steps_per_second": 5.69, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5490 + }, + { + "epoch": 305.55555555555554, + "grad_norm": 15827.6396484375, + "learning_rate": 3.943376017723057e-07, + "loss": 0.1954, + "step": 5500 + }, + { + "epoch": 306.0, + "eval_accuracy": 0.9233272320520642, + "eval_auc": 0.9506862453142154, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7591823698498882, + "eval_f1_macro": 0.8567938214370079, + "eval_loss": 0.23211389780044556, + "eval_pr_auc": 0.7707434518060764, + "eval_precision": 0.7519772223979754, + "eval_precision_macro": 0.8540585209342515, + "eval_pred_class_0": 16507, + "eval_pred_class_1": 3161, + "eval_predicted_binding_ratio": 0.1607179174293268, + "eval_recall": 0.7665269267978072, + "eval_recall_macro": 0.8596019676543512, + "eval_runtime": 0.2349, + "eval_samples_per_second": 693.884, + "eval_steps_per_second": 4.257, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5508 + }, + { + "epoch": 307.0, + "eval_accuracy": 0.9230730119991865, + "eval_auc": 0.9506972041080411, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7588075880758808, + "eval_f1_macro": 0.8565232326853711, + "eval_loss": 0.23213696479797363, + "eval_pr_auc": 0.7706724583082463, + "eval_precision": 0.7503152585119798, + "eval_precision_macro": 0.8533038465207814, + "eval_pred_class_0": 16496, + "eval_pred_class_1": 3172, + "eval_predicted_binding_ratio": 0.16127720154565792, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8598442387508907, + "eval_runtime": 0.2681, + "eval_samples_per_second": 608.072, + "eval_steps_per_second": 3.731, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5526 + }, + { + "epoch": 308.0, + "eval_accuracy": 0.9229204799674599, + "eval_auc": 0.9507381389986547, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.758521822236381, + "eval_f1_macro": 0.8563315143004762, + "eval_loss": 0.23207640647888184, + "eval_pr_auc": 0.7708824410889222, + "eval_precision": 0.7494491658797607, + "eval_precision_macro": 0.8528944938003498, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7678168332795873, + "eval_recall_macro": 0.8598847551440492, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.193, + "eval_steps_per_second": 3.946, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5544 + }, + { + "epoch": 309.0, + "eval_accuracy": 0.9233272320520642, + "eval_auc": 0.9508327778185136, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7593360995850622, + "eval_f1_macro": 0.8568679288369823, + "eval_loss": 0.2318853884935379, + "eval_pr_auc": 0.7713979747932131, + "eval_precision": 0.7516587677725118, + "eval_precision_macro": 0.8539545732457663, + "eval_pred_class_0": 16503, + "eval_pred_class_1": 3165, + "eval_predicted_binding_ratio": 0.16092129347162903, + "eval_recall": 0.7671718800386972, + "eval_recall_macro": 0.8598640833162641, + "eval_runtime": 0.3164, + "eval_samples_per_second": 515.146, + "eval_steps_per_second": 3.16, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5562 + }, + { + "epoch": 310.0, + "eval_accuracy": 0.9231238560097621, + "eval_auc": 0.9508632794702453, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7587747287811104, + "eval_f1_macro": 0.8565253830188363, + "eval_loss": 0.23183651268482208, + "eval_pr_auc": 0.7715329415149417, + "eval_precision": 0.7508683296495106, + "eval_precision_macro": 0.8535264016588866, + "eval_pred_class_0": 16501, + "eval_pred_class_1": 3167, + "eval_predicted_binding_ratio": 0.16102298149278016, + "eval_recall": 0.7668494034182521, + "eval_recall_macro": 0.8596123035682436, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.635, + "eval_steps_per_second": 3.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5580 + }, + { + "epoch": 311.0, + "eval_accuracy": 0.9230730119991865, + "eval_auc": 0.9509030269959862, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7588844621513944, + "eval_f1_macro": 0.8565602855810055, + "eval_loss": 0.23182560503482819, + "eval_pr_auc": 0.7716106695707178, + "eval_precision": 0.7501575299306869, + "eval_precision_macro": 0.8532526463767658, + "eval_pred_class_0": 16494, + "eval_pred_class_1": 3174, + "eval_predicted_binding_ratio": 0.16137888956680904, + "eval_recall": 0.7678168332795873, + "eval_recall_macro": 0.8599752965818471, + "eval_runtime": 0.2638, + "eval_samples_per_second": 617.932, + "eval_steps_per_second": 3.791, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5598 + }, + { + "epoch": 312.0, + "eval_accuracy": 0.9231238560097621, + "eval_auc": 0.9509431638216853, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7591589678241478, + "eval_f1_macro": 0.8567105868221108, + "eval_loss": 0.23177149891853333, + "eval_pr_auc": 0.7718084005522707, + "eval_precision": 0.7500786905886057, + "eval_precision_macro": 0.853269895291271, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8602675927230261, + "eval_runtime": 0.2364, + "eval_samples_per_second": 689.439, + "eval_steps_per_second": 4.23, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5616 + }, + { + "epoch": 313.0, + "eval_accuracy": 0.9230730119991865, + "eval_auc": 0.9509867264870173, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7590380633858895, + "eval_f1_macro": 0.856634317411552, + "eval_loss": 0.23171813786029816, + "eval_pr_auc": 0.771954368377823, + "eval_precision": 0.749842668344871, + "eval_precision_macro": 0.8531505640086999, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8602374122437602, + "eval_runtime": 0.2592, + "eval_samples_per_second": 628.85, + "eval_steps_per_second": 3.858, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5634 + }, + { + "epoch": 314.0, + "eval_accuracy": 0.9228696359568843, + "eval_auc": 0.9509922740114228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7584010192705845, + "eval_f1_macro": 0.8562552937959844, + "eval_loss": 0.23173367977142334, + "eval_pr_auc": 0.7720376545527768, + "eval_precision": 0.7492133417243549, + "eval_precision_macro": 0.8527752578846153, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7678168332795873, + "eval_recall_macro": 0.8598545746647831, + "eval_runtime": 0.2664, + "eval_samples_per_second": 611.795, + "eval_steps_per_second": 3.753, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5652 + }, + { + "epoch": 315.0, + "eval_accuracy": 0.9233780760626398, + "eval_auc": 0.9510762654774227, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7595340673368438, + "eval_f1_macro": 0.8569813431425517, + "eval_loss": 0.23158977925777435, + "eval_pr_auc": 0.7725350282702966, + "eval_precision": 0.7517372078332281, + "eval_precision_macro": 0.8540227670483556, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8600253216264866, + "eval_runtime": 0.2525, + "eval_samples_per_second": 645.435, + "eval_steps_per_second": 3.96, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5670 + }, + { + "epoch": 316.0, + "eval_accuracy": 0.9236322961155176, + "eval_auc": 0.9511527434542277, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7601405301820504, + "eval_f1_macro": 0.8573636072933594, + "eval_loss": 0.23149563372135162, + "eval_pr_auc": 0.7728791764414321, + "eval_precision": 0.7529262891490035, + "eval_precision_macro": 0.8546239248495366, + "eval_pred_class_0": 16507, + "eval_pred_class_1": 3161, + "eval_predicted_binding_ratio": 0.1607179174293268, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8601762240228168, + "eval_runtime": 0.2078, + "eval_samples_per_second": 784.391, + "eval_steps_per_second": 4.812, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5688 + }, + { + "epoch": 317.0, + "eval_accuracy": 0.923581452104942, + "eval_auc": 0.9511918583675363, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7598657932577089, + "eval_f1_macro": 0.8572131820235396, + "eval_loss": 0.23143813014030457, + "eval_pr_auc": 0.7730966214358813, + "eval_precision": 0.7530082330588981, + "eval_precision_macro": 0.8546082958147307, + "eval_pred_class_0": 16510, + "eval_pred_class_1": 3158, + "eval_predicted_binding_ratio": 0.16056538539760015, + "eval_recall": 0.7668494034182521, + "eval_recall_macro": 0.8598839278816377, + "eval_runtime": 0.2098, + "eval_samples_per_second": 777.022, + "eval_steps_per_second": 4.767, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5706 + }, + { + "epoch": 318.0, + "eval_accuracy": 0.923479764083791, + "eval_auc": 0.9512240340090886, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7597765363128491, + "eval_f1_macro": 0.8571341935895835, + "eval_loss": 0.23142649233341217, + "eval_pr_auc": 0.7731516186784236, + "eval_precision": 0.7522123893805309, + "eval_precision_macro": 0.8542630051604545, + "eval_pred_class_0": 16504, + "eval_pred_class_1": 3164, + "eval_predicted_binding_ratio": 0.1608704494610535, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8600856825850187, + "eval_runtime": 0.2091, + "eval_samples_per_second": 779.633, + "eval_steps_per_second": 4.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5724 + }, + { + "epoch": 319.0, + "eval_accuracy": 0.9236322961155176, + "eval_auc": 0.9512825457928188, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7600638977635783, + "eval_f1_macro": 0.8573266640831436, + "eval_loss": 0.2313271462917328, + "eval_pr_auc": 0.7734563478045352, + "eval_precision": 0.7530864197530864, + "eval_precision_macro": 0.8546763493762101, + "eval_pred_class_0": 16509, + "eval_pred_class_1": 3159, + "eval_predicted_binding_ratio": 0.16061622940817571, + "eval_recall": 0.7671718800386972, + "eval_recall_macro": 0.8600451661918602, + "eval_runtime": 0.2154, + "eval_samples_per_second": 756.779, + "eval_steps_per_second": 4.643, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5742 + }, + { + "epoch": 320.0, + "eval_accuracy": 0.9237848281472443, + "eval_auc": 0.9513326194999532, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7605048729829046, + "eval_f1_macro": 0.8575931868618003, + "eval_loss": 0.23122872412204742, + "eval_pr_auc": 0.7736938128704516, + "eval_precision": 0.7536415452818239, + "eval_precision_macro": 0.8549855212781016, + "eval_pred_class_0": 16510, + "eval_pred_class_1": 3158, + "eval_predicted_binding_ratio": 0.16056538539760015, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8602667654606148, + "eval_runtime": 0.235, + "eval_samples_per_second": 693.713, + "eval_steps_per_second": 4.256, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5760 + }, + { + "epoch": 321.0, + "eval_accuracy": 0.9236322961155176, + "eval_auc": 0.9513477438033324, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7602171136653896, + "eval_f1_macro": 0.8574005258699469, + "eval_loss": 0.2312333732843399, + "eval_pr_auc": 0.773709338696213, + "eval_precision": 0.7527663610496365, + "eval_precision_macro": 0.8545716082739852, + "eval_pred_class_0": 16505, + "eval_pred_class_1": 3163, + "eval_predicted_binding_ratio": 0.16081960545047794, + "eval_recall": 0.7678168332795873, + "eval_recall_macro": 0.8603072818537733, + "eval_runtime": 0.2276, + "eval_samples_per_second": 716.105, + "eval_steps_per_second": 4.393, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5778 + }, + { + "epoch": 322.0, + "eval_accuracy": 0.9235306080943665, + "eval_auc": 0.9513562889374167, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7602040816326531, + "eval_f1_macro": 0.8573582711574831, + "eval_loss": 0.23121465742588043, + "eval_pr_auc": 0.7737540605936648, + "eval_precision": 0.7518133081046988, + "eval_precision_macro": 0.854175430193466, + "eval_pred_class_0": 16497, + "eval_pred_class_1": 3171, + "eval_predicted_binding_ratio": 0.16122635753508235, + "eval_recall": 0.7687842631409223, + "eval_recall_macro": 0.8606400943881107, + "eval_runtime": 0.2488, + "eval_samples_per_second": 655.047, + "eval_steps_per_second": 4.019, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5796 + }, + { + "epoch": 323.0, + "eval_accuracy": 0.9236831401260931, + "eval_auc": 0.9514191998106756, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7604914632200415, + "eval_f1_macro": 0.8575507604890313, + "eval_loss": 0.23111233115196228, + "eval_pr_auc": 0.7739781495758574, + "eval_precision": 0.7526847757422616, + "eval_precision_macro": 0.8545874490758332, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8605995779949522, + "eval_runtime": 0.2355, + "eval_samples_per_second": 692.067, + "eval_steps_per_second": 4.246, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5814 + }, + { + "epoch": 324.0, + "eval_accuracy": 0.9236831401260931, + "eval_auc": 0.9514517842171841, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7604914632200415, + "eval_f1_macro": 0.8575507604890313, + "eval_loss": 0.23104801774024963, + "eval_pr_auc": 0.7741130008089699, + "eval_precision": 0.7526847757422616, + "eval_precision_macro": 0.8545874490758332, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8605995779949522, + "eval_runtime": 0.2425, + "eval_samples_per_second": 672.295, + "eval_steps_per_second": 4.125, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5832 + }, + { + "epoch": 325.0, + "eval_accuracy": 0.9237339841366687, + "eval_auc": 0.951504651151519, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7606128311522502, + "eval_f1_macro": 0.857627250169412, + "eval_loss": 0.23095941543579102, + "eval_pr_auc": 0.7744096919390852, + "eval_precision": 0.7529225908372827, + "eval_precision_macro": 0.8547076748648027, + "eval_pred_class_0": 16503, + "eval_pred_class_1": 3165, + "eval_predicted_binding_ratio": 0.16092129347162903, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8606297584742182, + "eval_runtime": 0.2175, + "eval_samples_per_second": 749.374, + "eval_steps_per_second": 4.597, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5850 + }, + { + "epoch": 326.0, + "eval_accuracy": 0.9237848281472443, + "eval_auc": 0.951546208922066, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7606578317100431, + "eval_f1_macro": 0.8576669257120046, + "eval_loss": 0.23088191449642181, + "eval_pr_auc": 0.774641356281408, + "eval_precision": 0.7533206831119544, + "eval_precision_macro": 0.8548803827531177, + "eval_pred_class_0": 16506, + "eval_pred_class_1": 3162, + "eval_predicted_binding_ratio": 0.16076876143990237, + "eval_recall": 0.7681393099000322, + "eval_recall_macro": 0.8605288811225278, + "eval_runtime": 0.2627, + "eval_samples_per_second": 620.56, + "eval_steps_per_second": 3.807, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5868 + }, + { + "epoch": 327.0, + "eval_accuracy": 0.9237848281472443, + "eval_auc": 0.9515728175742149, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7608105951811074, + "eval_f1_macro": 0.8577405662711912, + "eval_loss": 0.23083868622779846, + "eval_pr_auc": 0.7747608129205691, + "eval_precision": 0.7530006317119393, + "eval_precision_macro": 0.8547756764183257, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7687842631409223, + "eval_recall_macro": 0.8607909967844407, + "eval_runtime": 0.2492, + "eval_samples_per_second": 654.065, + "eval_steps_per_second": 4.013, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5886 + }, + { + "epoch": 328.0, + "eval_accuracy": 0.9238356721578198, + "eval_auc": 0.9516116113150578, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7611607142857143, + "eval_f1_macro": 0.8579273206076528, + "eval_loss": 0.23078228533267975, + "eval_pr_auc": 0.7749744562806883, + "eval_precision": 0.7527593818984547, + "eval_precision_macro": 0.8547393927131844, + "eval_pred_class_0": 16497, + "eval_pred_class_1": 3171, + "eval_predicted_binding_ratio": 0.16122635753508235, + "eval_recall": 0.7697516930022573, + "eval_recall_macro": 0.8612143507565763, + "eval_runtime": 0.2121, + "eval_samples_per_second": 768.541, + "eval_steps_per_second": 4.715, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5904 + }, + { + "epoch": 329.0, + "eval_accuracy": 0.9238865161683953, + "eval_auc": 0.9516614027797223, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7614342629482072, + "eval_f1_macro": 0.8580771629311073, + "eval_loss": 0.2307167798280716, + "eval_pr_auc": 0.7752067172424865, + "eval_precision": 0.7526780088216761, + "eval_precision_macro": 0.8547553982510223, + "eval_pred_class_0": 16494, + "eval_pred_class_1": 3174, + "eval_predicted_binding_ratio": 0.16137888956680904, + "eval_recall": 0.7703966462431474, + "eval_recall_macro": 0.8615066468977552, + "eval_runtime": 0.2538, + "eval_samples_per_second": 642.134, + "eval_steps_per_second": 3.939, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5922 + }, + { + "epoch": 330.0, + "eval_accuracy": 0.9236322961155176, + "eval_auc": 0.9516455582714203, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7612841703750794, + "eval_f1_macro": 0.857914812460267, + "eval_loss": 0.23076769709587097, + "eval_pr_auc": 0.775112377066796, + "eval_precision": 0.750548417424005, + "eval_precision_macro": 0.8538504058352652, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7723315059658175, + "eval_recall_macro": 0.8621420914871643, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.508, + "eval_steps_per_second": 3.868, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5940 + }, + { + "epoch": 331.0, + "eval_accuracy": 0.9240390482001221, + "eval_auc": 0.9516963035209824, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7621776504297995, + "eval_f1_macro": 0.8584894423868002, + "eval_loss": 0.23067235946655273, + "eval_pr_auc": 0.7753820734835624, + "eval_precision": 0.7525935240490412, + "eval_precision_macro": 0.8548556265844769, + "eval_pred_class_0": 16487, + "eval_pred_class_1": 3181, + "eval_predicted_binding_ratio": 0.1617347976408379, + "eval_recall": 0.7720090293453724, + "eval_recall_macro": 0.8622524774903357, + "eval_runtime": 0.1824, + "eval_samples_per_second": 893.499, + "eval_steps_per_second": 5.482, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5958 + }, + { + "epoch": 332.0, + "eval_accuracy": 0.9240390482001221, + "eval_auc": 0.9517492385828104, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7619502868068834, + "eval_f1_macro": 0.8583798620967267, + "eval_loss": 0.23056790232658386, + "eval_pr_auc": 0.775607049366595, + "eval_precision": 0.7530708661417322, + "eval_precision_macro": 0.8550111500417023, + "eval_pred_class_0": 16493, + "eval_pred_class_1": 3175, + "eval_predicted_binding_ratio": 0.16142973357738458, + "eval_recall": 0.7710415994840374, + "eval_recall_macro": 0.8618593039974662, + "eval_runtime": 0.2665, + "eval_samples_per_second": 611.679, + "eval_steps_per_second": 3.753, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5976 + }, + { + "epoch": 333.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9517777256072577, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7621475227019276, + "eval_f1_macro": 0.8584929210351648, + "eval_loss": 0.23053352534770966, + "eval_pr_auc": 0.7757600766000483, + "eval_precision": 0.7531486146095718, + "eval_precision_macro": 0.855079036870636, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7713640761044824, + "eval_recall_macro": 0.8620205423076888, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.883, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5994 + }, + { + "epoch": 333.3333333333333, + "grad_norm": 16736.6328125, + "learning_rate": 3.021381973636964e-07, + "loss": 0.1913, + "step": 6000 + }, + { + "epoch": 334.0, + "eval_accuracy": 0.9237848281472443, + "eval_auc": 0.9517933365355851, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7616473207187152, + "eval_f1_macro": 0.8581438407085573, + "eval_loss": 0.23052088916301727, + "eval_pr_auc": 0.7758187649274527, + "eval_precision": 0.751254705144291, + "eval_precision_macro": 0.8542074496595242, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7723315059658175, + "eval_recall_macro": 0.8622326329249622, + "eval_runtime": 0.2573, + "eval_samples_per_second": 633.473, + "eval_steps_per_second": 3.886, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6012 + }, + { + "epoch": 335.0, + "eval_accuracy": 0.9238356721578198, + "eval_auc": 0.9518365877609504, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7619198982835347, + "eval_f1_macro": 0.8582932017746205, + "eval_loss": 0.23045583069324493, + "eval_pr_auc": 0.7760158372270667, + "eval_precision": 0.7511751801942964, + "eval_precision_macro": 0.8542244778801185, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8625249290661412, + "eval_runtime": 0.219, + "eval_samples_per_second": 744.186, + "eval_steps_per_second": 4.566, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6030 + }, + { + "epoch": 336.0, + "eval_accuracy": 0.9241915802318487, + "eval_auc": 0.9518931822423861, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7624661462482077, + "eval_f1_macro": 0.8586824817571539, + "eval_loss": 0.23035065829753876, + "eval_pr_auc": 0.7762652864261658, + "eval_precision": 0.753463476070529, + "eval_precision_macro": 0.855266785330923, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7716865527249275, + "eval_recall_macro": 0.8622119610971773, + "eval_runtime": 0.2363, + "eval_samples_per_second": 689.881, + "eval_steps_per_second": 4.232, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6048 + }, + { + "epoch": 337.0, + "eval_accuracy": 0.9240390482001221, + "eval_auc": 0.9519021264089276, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7623289850461342, + "eval_f1_macro": 0.8585623745200415, + "eval_loss": 0.2303379327058792, + "eval_pr_auc": 0.7763353590359, + "eval_precision": 0.752276295133438, + "eval_precision_macro": 0.8547524774823897, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7726539825862625, + "eval_recall_macro": 0.8625145931522488, + "eval_runtime": 0.2467, + "eval_samples_per_second": 660.828, + "eval_steps_per_second": 4.054, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6066 + }, + { + "epoch": 338.0, + "eval_accuracy": 0.9236831401260931, + "eval_auc": 0.9519062432559864, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7616325234238527, + "eval_f1_macro": 0.8581006831532533, + "eval_loss": 0.23036180436611176, + "eval_pr_auc": 0.7763420780879606, + "eval_precision": 0.7503128911138923, + "eval_precision_macro": 0.8538172032062905, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7732989358271525, + "eval_recall_macro": 0.8625654454592997, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.319, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6084 + }, + { + "epoch": 339.0, + "eval_accuracy": 0.9237339841366687, + "eval_auc": 0.9519363653402588, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7618291521117815, + "eval_f1_macro": 0.8582134440261069, + "eval_loss": 0.23031854629516602, + "eval_pr_auc": 0.776475073481046, + "eval_precision": 0.7503909915545824, + "eval_precision_macro": 0.8538853142461151, + "eval_pred_class_0": 16471, + "eval_pred_class_1": 3197, + "eval_predicted_binding_ratio": 0.16254830181004679, + "eval_recall": 0.7736214124475975, + "eval_recall_macro": 0.8627266837695222, + "eval_runtime": 0.2533, + "eval_samples_per_second": 643.603, + "eval_steps_per_second": 3.948, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6102 + }, + { + "epoch": 340.0, + "eval_accuracy": 0.9236831401260931, + "eval_auc": 0.9519665750170216, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7616325234238527, + "eval_f1_macro": 0.8581006831532533, + "eval_loss": 0.23026354610919952, + "eval_pr_auc": 0.7766276763039114, + "eval_precision": 0.7503128911138923, + "eval_precision_macro": 0.8538172032062905, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7732989358271525, + "eval_recall_macro": 0.8625654454592997, + "eval_runtime": 0.2621, + "eval_samples_per_second": 621.893, + "eval_steps_per_second": 3.815, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6120 + }, + { + "epoch": 341.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9520129601070512, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7625258469858438, + "eval_f1_macro": 0.8586752506435165, + "eval_loss": 0.230192169547081, + "eval_pr_auc": 0.7768138361852162, + "eval_precision": 0.7523540489642184, + "eval_precision_macro": 0.8548203930053466, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8626758314624713, + "eval_runtime": 0.2426, + "eval_samples_per_second": 671.971, + "eval_steps_per_second": 4.123, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6138 + }, + { + "epoch": 342.0, + "eval_accuracy": 0.9239882041895465, + "eval_auc": 0.9520488340982072, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7622833518842423, + "eval_f1_macro": 0.8585223761569667, + "eval_loss": 0.2301386296749115, + "eval_pr_auc": 0.7769598633905366, + "eval_precision": 0.7518820577164367, + "eval_precision_macro": 0.8545818055572474, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8626154705039393, + "eval_runtime": 0.2473, + "eval_samples_per_second": 659.185, + "eval_steps_per_second": 4.044, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6156 + }, + { + "epoch": 343.0, + "eval_accuracy": 0.9241407362212731, + "eval_auc": 0.952100825107636, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7626471524021635, + "eval_f1_macro": 0.8587517153841377, + "eval_loss": 0.23005619645118713, + "eval_pr_auc": 0.7772456933395511, + "eval_precision": 0.7525902668759812, + "eval_precision_macro": 0.8549397976374689, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8627060119417373, + "eval_runtime": 0.2129, + "eval_samples_per_second": 765.55, + "eval_steps_per_second": 4.697, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6174 + }, + { + "epoch": 344.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9521250006350455, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7625258469858438, + "eval_f1_macro": 0.8586752506435165, + "eval_loss": 0.23000310361385345, + "eval_pr_auc": 0.7773917675410515, + "eval_precision": 0.7523540489642184, + "eval_precision_macro": 0.8548203930053466, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8626758314624713, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.276, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6192 + }, + { + "epoch": 345.0, + "eval_accuracy": 0.9241915802318487, + "eval_auc": 0.9521700231752211, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7627684964200477, + "eval_f1_macro": 0.8588281984687149, + "eval_loss": 0.22992061078548431, + "eval_pr_auc": 0.777567865132197, + "eval_precision": 0.7528266331658291, + "eval_precision_macro": 0.8550592763014295, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8627361924210033, + "eval_runtime": 0.2684, + "eval_samples_per_second": 607.371, + "eval_steps_per_second": 3.726, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6210 + }, + { + "epoch": 346.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9521951914175242, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7624502784407319, + "eval_f1_macro": 0.8586388332084449, + "eval_loss": 0.22986458241939545, + "eval_pr_auc": 0.7777181268262345, + "eval_precision": 0.7525125628140703, + "eval_precision_macro": 0.8548719086819685, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7726539825862625, + "eval_recall_macro": 0.8625447736315148, + "eval_runtime": 0.2315, + "eval_samples_per_second": 704.221, + "eval_steps_per_second": 4.32, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6228 + }, + { + "epoch": 347.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9522022182817713, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7625258469858438, + "eval_f1_macro": 0.8586752506435165, + "eval_loss": 0.22987791895866394, + "eval_pr_auc": 0.7777283636078421, + "eval_precision": 0.7523540489642184, + "eval_precision_macro": 0.8548203930053466, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8626758314624713, + "eval_runtime": 0.2021, + "eval_samples_per_second": 806.414, + "eval_steps_per_second": 4.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6246 + }, + { + "epoch": 348.0, + "eval_accuracy": 0.9243441122635754, + "eval_auc": 0.9522300142987927, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7634340222575516, + "eval_f1_macro": 0.8592029398342167, + "eval_loss": 0.229818195104599, + "eval_pr_auc": 0.7778254023800715, + "eval_precision": 0.7529005957980558, + "eval_precision_macro": 0.8552111450378106, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7742663656884876, + "eval_recall_macro": 0.8633509651826273, + "eval_runtime": 0.2422, + "eval_samples_per_second": 672.887, + "eval_steps_per_second": 4.128, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6264 + }, + { + "epoch": 349.0, + "eval_accuracy": 0.9243949562741509, + "eval_auc": 0.9522554939810626, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763780778395552, + "eval_f1_macro": 0.8593880436271214, + "eval_loss": 0.22978660464286804, + "eval_pr_auc": 0.7779049389173774, + "eval_precision": 0.7526612398246713, + "eval_precision_macro": 0.8551760733541227, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7752337955498226, + "eval_recall_macro": 0.8637743191547629, + "eval_runtime": 0.237, + "eval_samples_per_second": 687.696, + "eval_steps_per_second": 4.219, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6282 + }, + { + "epoch": 350.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9522950273918264, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7638270820089001, + "eval_f1_macro": 0.859428369717681, + "eval_loss": 0.2297380119562149, + "eval_pr_auc": 0.7780796039517833, + "eval_precision": 0.7530554685051708, + "eval_precision_macro": 0.855346694014678, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7749113189293776, + "eval_recall_macro": 0.8636734418030723, + "eval_runtime": 0.251, + "eval_samples_per_second": 649.348, + "eval_steps_per_second": 3.984, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6300 + }, + { + "epoch": 351.0, + "eval_accuracy": 0.9242932682529998, + "eval_auc": 0.9523093341652933, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7636883034438978, + "eval_f1_macro": 0.8593074482256571, + "eval_loss": 0.22972844541072845, + "eval_pr_auc": 0.7781517759374512, + "eval_precision": 0.751875, + "eval_precision_macro": 0.8548359697595336, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8639760738581439, + "eval_runtime": 0.2607, + "eval_samples_per_second": 625.342, + "eval_steps_per_second": 3.836, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6318 + }, + { + "epoch": 352.0, + "eval_accuracy": 0.9242424242424242, + "eval_auc": 0.952317528929415, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7635671215487146, + "eval_f1_macro": 0.8592310391299909, + "eval_loss": 0.22972537577152252, + "eval_pr_auc": 0.7781868067856106, + "eval_precision": 0.7516401124648547, + "eval_precision_macro": 0.8547172445484534, + "eval_pred_class_0": 16467, + "eval_pred_class_1": 3201, + "eval_predicted_binding_ratio": 0.16275167785234898, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8639458933788778, + "eval_runtime": 0.234, + "eval_samples_per_second": 696.7, + "eval_steps_per_second": 4.274, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6336 + }, + { + "epoch": 353.0, + "eval_accuracy": 0.9243441122635754, + "eval_auc": 0.9523571596651685, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7638095238095238, + "eval_f1_macro": 0.8593838755989138, + "eval_loss": 0.2296588122844696, + "eval_pr_auc": 0.7784014772150735, + "eval_precision": 0.7521100343857455, + "eval_precision_macro": 0.8549547682402951, + "eval_pred_class_0": 16469, + "eval_pred_class_1": 3199, + "eval_predicted_binding_ratio": 0.16264998983119788, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8640062543374099, + "eval_runtime": 0.2433, + "eval_samples_per_second": 669.953, + "eval_steps_per_second": 4.11, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6354 + }, + { + "epoch": 354.0, + "eval_accuracy": 0.9243441122635754, + "eval_auc": 0.952381568772553, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7638095238095238, + "eval_f1_macro": 0.8593838755989138, + "eval_loss": 0.22961482405662537, + "eval_pr_auc": 0.7785102930543456, + "eval_precision": 0.7521100343857455, + "eval_precision_macro": 0.8549547682402951, + "eval_pred_class_0": 16469, + "eval_pred_class_1": 3199, + "eval_predicted_binding_ratio": 0.16264998983119788, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8640062543374099, + "eval_runtime": 0.2671, + "eval_samples_per_second": 610.236, + "eval_steps_per_second": 3.744, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6372 + }, + { + "epoch": 355.0, + "eval_accuracy": 0.9242932682529998, + "eval_auc": 0.9523906199965831, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763838223632038, + "eval_f1_macro": 0.8593796791618457, + "eval_loss": 0.2295987904071808, + "eval_pr_auc": 0.7785825605072106, + "eval_precision": 0.7515605493133583, + "eval_precision_macro": 0.8547343562893321, + "eval_pred_class_0": 16464, + "eval_pred_class_1": 3204, + "eval_predicted_binding_ratio": 0.16290420988407567, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.8642381895200568, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.418, + "eval_steps_per_second": 3.849, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6390 + }, + { + "epoch": 356.0, + "eval_accuracy": 0.9243949562741509, + "eval_auc": 0.9524596428791869, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7636305833730727, + "eval_f1_macro": 0.8593156699585895, + "eval_loss": 0.229468435049057, + "eval_pr_auc": 0.7789122838326235, + "eval_precision": 0.7529780564263323, + "eval_precision_macro": 0.8552789299002641, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7745888423089327, + "eval_recall_macro": 0.8635122034928499, + "eval_runtime": 0.2641, + "eval_samples_per_second": 617.191, + "eval_steps_per_second": 3.786, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6408 + }, + { + "epoch": 357.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9524861542063461, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763751987281399, + "eval_f1_macro": 0.8593921831946546, + "eval_loss": 0.2294115126132965, + "eval_pr_auc": 0.7790142584142796, + "eval_precision": 0.7532141737221699, + "eval_precision_macro": 0.8553982756468123, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7745888423089327, + "eval_recall_macro": 0.8635423839721159, + "eval_runtime": 0.2557, + "eval_samples_per_second": 637.426, + "eval_steps_per_second": 3.911, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6426 + }, + { + "epoch": 358.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9524829619466881, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7639771283354511, + "eval_f1_macro": 0.8595006707052558, + "eval_loss": 0.2294154018163681, + "eval_pr_auc": 0.7789936192429844, + "eval_precision": 0.7527386541471048, + "eval_precision_macro": 0.8552438490185533, + "eval_pred_class_0": 16473, + "eval_pred_class_1": 3195, + "eval_predicted_binding_ratio": 0.16244661378889566, + "eval_recall": 0.7755562721702677, + "eval_recall_macro": 0.8639355574649854, + "eval_runtime": 0.2445, + "eval_samples_per_second": 666.738, + "eval_steps_per_second": 4.09, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6444 + }, + { + "epoch": 359.0, + "eval_accuracy": 0.9243949562741509, + "eval_auc": 0.9525149624032593, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763780778395552, + "eval_f1_macro": 0.8593880436271214, + "eval_loss": 0.2293538749217987, + "eval_pr_auc": 0.7791661873069065, + "eval_precision": 0.7526612398246713, + "eval_precision_macro": 0.8551760733541227, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7752337955498226, + "eval_recall_macro": 0.8637743191547629, + "eval_runtime": 0.2637, + "eval_samples_per_second": 618.132, + "eval_steps_per_second": 3.792, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6462 + }, + { + "epoch": 360.0, + "eval_accuracy": 0.9243949562741509, + "eval_auc": 0.9525288798767679, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763780778395552, + "eval_f1_macro": 0.8593880436271214, + "eval_loss": 0.22932648658752441, + "eval_pr_auc": 0.7792072068588576, + "eval_precision": 0.7526612398246713, + "eval_precision_macro": 0.8551760733541227, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7752337955498226, + "eval_recall_macro": 0.8637743191547629, + "eval_runtime": 0.2714, + "eval_samples_per_second": 600.631, + "eval_steps_per_second": 3.685, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6480 + }, + { + "epoch": 361.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9525697758373857, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763751987281399, + "eval_f1_macro": 0.8593921831946546, + "eval_loss": 0.22925521433353424, + "eval_pr_auc": 0.77936321808712, + "eval_precision": 0.7532141737221699, + "eval_precision_macro": 0.8553982756468123, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7745888423089327, + "eval_recall_macro": 0.8635423839721159, + "eval_runtime": 0.249, + "eval_samples_per_second": 654.66, + "eval_steps_per_second": 4.016, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6498 + }, + { + "epoch": 361.1111111111111, + "grad_norm": 21180.3203125, + "learning_rate": 2.1735650901333336e-07, + "loss": 0.1893, + "step": 6500 + }, + { + "epoch": 362.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9526172898972942, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7636016544702513, + "eval_f1_macro": 0.8593197379764267, + "eval_loss": 0.22917793691158295, + "eval_pr_auc": 0.7795955328857882, + "eval_precision": 0.7535321821036107, + "eval_precision_macro": 0.8555017581027062, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7739438890680426, + "eval_recall_macro": 0.8632802683102028, + "eval_runtime": 0.208, + "eval_samples_per_second": 783.548, + "eval_steps_per_second": 4.807, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6516 + }, + { + "epoch": 363.0, + "eval_accuracy": 0.924496644295302, + "eval_auc": 0.9526382926300437, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7636479388826993, + "eval_f1_macro": 0.8593600478608577, + "eval_loss": 0.22912409901618958, + "eval_pr_auc": 0.7796869947527124, + "eval_precision": 0.7539283469516027, + "eval_precision_macro": 0.8556733812884909, + "eval_pred_class_0": 16486, + "eval_pred_class_1": 3182, + "eval_predicted_binding_ratio": 0.16178564165141346, + "eval_recall": 0.7736214124475975, + "eval_recall_macro": 0.8631793909585124, + "eval_runtime": 0.2657, + "eval_samples_per_second": 613.513, + "eval_steps_per_second": 3.764, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6534 + }, + { + "epoch": 364.0, + "eval_accuracy": 0.924496644295302, + "eval_auc": 0.952662049659998, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7637231503579952, + "eval_f1_macro": 0.859396294249525, + "eval_loss": 0.22910362482070923, + "eval_pr_auc": 0.779788701256993, + "eval_precision": 0.7537688442211056, + "eval_precision_macro": 0.8556213791598126, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7739438890680426, + "eval_recall_macro": 0.8633104487894689, + "eval_runtime": 0.2607, + "eval_samples_per_second": 625.175, + "eval_steps_per_second": 3.835, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6552 + }, + { + "epoch": 365.0, + "eval_accuracy": 0.9245474883058775, + "eval_auc": 0.9526903420344663, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7638446849140674, + "eval_f1_macro": 0.8594728689002142, + "eval_loss": 0.2290574461221695, + "eval_pr_auc": 0.7799354479263911, + "eval_precision": 0.7540056550424128, + "eval_precision_macro": 0.8557410744123195, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7739438890680426, + "eval_recall_macro": 0.8633406292687349, + "eval_runtime": 0.2049, + "eval_samples_per_second": 795.329, + "eval_steps_per_second": 4.879, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6570 + }, + { + "epoch": 366.0, + "eval_accuracy": 0.9245474883058775, + "eval_auc": 0.9527021767531981, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7640699523052464, + "eval_f1_macro": 0.8595814265550925, + "eval_loss": 0.22903695702552795, + "eval_pr_auc": 0.7799769457420497, + "eval_precision": 0.753527751646284, + "eval_precision_macro": 0.8555854062558139, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7749113189293776, + "eval_recall_macro": 0.8637338027616044, + "eval_runtime": 0.2118, + "eval_samples_per_second": 769.509, + "eval_steps_per_second": 4.721, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6588 + }, + { + "epoch": 367.0, + "eval_accuracy": 0.9247000203376042, + "eval_auc": 0.9527121622971282, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.764659145081837, + "eval_f1_macro": 0.8599193797618125, + "eval_loss": 0.22902432084083557, + "eval_pr_auc": 0.7800307850119022, + "eval_precision": 0.7537593984962406, + "eval_precision_macro": 0.8557884149558164, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.864217517692272, + "eval_runtime": 0.2519, + "eval_samples_per_second": 647.072, + "eval_steps_per_second": 3.97, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6606 + }, + { + "epoch": 368.0, + "eval_accuracy": 0.9248017083587553, + "eval_auc": 0.9527291941703031, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7649769585253456, + "eval_f1_macro": 0.8601085500794873, + "eval_loss": 0.22899393737316132, + "eval_pr_auc": 0.7800749822284204, + "eval_precision": 0.7540726817042607, + "eval_precision_macro": 0.85597540373147, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8644089364817604, + "eval_runtime": 0.2331, + "eval_samples_per_second": 699.217, + "eval_steps_per_second": 4.29, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6624 + }, + { + "epoch": 369.0, + "eval_accuracy": 0.9248525523693308, + "eval_auc": 0.9527588782921224, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7650238473767885, + "eval_f1_macro": 0.8601491566364061, + "eval_loss": 0.22894835472106934, + "eval_pr_auc": 0.780237083741907, + "eval_precision": 0.7544684854186265, + "eval_precision_macro": 0.856146798082819, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.86430805913007, + "eval_runtime": 0.2655, + "eval_samples_per_second": 613.902, + "eval_steps_per_second": 3.766, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6642 + }, + { + "epoch": 370.0, + "eval_accuracy": 0.9248525523693308, + "eval_auc": 0.9527729125556185, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7650238473767885, + "eval_f1_macro": 0.8601491566364061, + "eval_loss": 0.22892294824123383, + "eval_pr_auc": 0.7803195480022762, + "eval_precision": 0.7544684854186265, + "eval_precision_macro": 0.856146798082819, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.86430805913007, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.873, + "eval_steps_per_second": 3.852, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6660 + }, + { + "epoch": 371.0, + "eval_accuracy": 0.9248525523693308, + "eval_auc": 0.9527883872289603, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7650985378258105, + "eval_f1_macro": 0.8601851483463878, + "eval_loss": 0.22889479994773865, + "eval_pr_auc": 0.7804143746656889, + "eval_precision": 0.7543089940457537, + "eval_precision_macro": 0.8560948381043845, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8644391169610264, + "eval_runtime": 0.2368, + "eval_samples_per_second": 688.489, + "eval_steps_per_second": 4.224, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6678 + }, + { + "epoch": 372.0, + "eval_accuracy": 0.924954240390482, + "eval_auc": 0.9528159885960027, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7653418124006359, + "eval_f1_macro": 0.860338399996844, + "eval_loss": 0.22885586321353912, + "eval_pr_auc": 0.7805625189044384, + "eval_precision": 0.7547820633427407, + "eval_precision_macro": 0.8563339286918206, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8644994779195585, + "eval_runtime": 0.1849, + "eval_samples_per_second": 881.352, + "eval_steps_per_second": 5.407, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6696 + }, + { + "epoch": 373.0, + "eval_accuracy": 0.9250559284116331, + "eval_auc": 0.9528519015171544, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7655852417302799, + "eval_f1_macro": 0.8604917251982311, + "eval_loss": 0.22878196835517883, + "eval_pr_auc": 0.7807742707975688, + "eval_precision": 0.7552557263884531, + "eval_precision_macro": 0.8565733155332836, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8645598388780904, + "eval_runtime": 0.2727, + "eval_samples_per_second": 597.724, + "eval_steps_per_second": 3.667, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6714 + }, + { + "epoch": 374.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9528673372605004, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7657070144743121, + "eval_f1_macro": 0.8605684154038177, + "eval_loss": 0.22877708077430725, + "eval_pr_auc": 0.7808321396342274, + "eval_precision": 0.7554927809165097, + "eval_precision_macro": 0.85669312022406, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8645900193573565, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.842, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6732 + }, + { + "epoch": 375.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9528682131854067, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7659303988558716, + "eval_f1_macro": 0.8606760610325117, + "eval_loss": 0.2287902534008026, + "eval_pr_auc": 0.7808487230478494, + "eval_precision": 0.7550125313283208, + "eval_precision_macro": 0.8565363700584309, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8649831928502261, + "eval_runtime": 0.1856, + "eval_samples_per_second": 878.251, + "eval_steps_per_second": 5.388, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6750 + }, + { + "epoch": 376.0, + "eval_accuracy": 0.9250050844010576, + "eval_auc": 0.9528953279275011, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7655380702591003, + "eval_f1_macro": 0.8604509839871686, + "eval_loss": 0.228745236992836, + "eval_pr_auc": 0.7809833435589818, + "eval_precision": 0.754858934169279, + "eval_precision_macro": 0.8564014297014619, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864660716229781, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.614, + "eval_steps_per_second": 3.881, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6768 + }, + { + "epoch": 377.0, + "eval_accuracy": 0.9248017083587553, + "eval_auc": 0.9529040482465667, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7651262505955216, + "eval_f1_macro": 0.8601804865980422, + "eval_loss": 0.2287396788597107, + "eval_pr_auc": 0.781048839864553, + "eval_precision": 0.7537546933667084, + "eval_precision_macro": 0.8558720042841312, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7768461786520477, + "eval_recall_macro": 0.8646710521436733, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.891, + "eval_steps_per_second": 3.901, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6786 + }, + { + "epoch": 378.0, + "eval_accuracy": 0.9248525523693308, + "eval_auc": 0.9529144620204507, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7653968253968254, + "eval_f1_macro": 0.8603288764349426, + "eval_loss": 0.22873102128505707, + "eval_pr_auc": 0.7810643100928254, + "eval_precision": 0.7536730228196311, + "eval_precision_macro": 0.8558880628094148, + "eval_pred_class_0": 16469, + "eval_pred_class_1": 3199, + "eval_predicted_binding_ratio": 0.16264998983119788, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8649633482848524, + "eval_runtime": 0.1843, + "eval_samples_per_second": 884.276, + "eval_steps_per_second": 5.425, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6804 + }, + { + "epoch": 379.0, + "eval_accuracy": 0.924954240390482, + "eval_auc": 0.9529275424990492, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7657142857142857, + "eval_f1_macro": 0.8605178766021483, + "eval_loss": 0.22870197892189026, + "eval_pr_auc": 0.7811376412515475, + "eval_precision": 0.7539856205064083, + "eval_precision_macro": 0.8560747217232387, + "eval_pred_class_0": 16469, + "eval_pred_class_1": 3199, + "eval_predicted_binding_ratio": 0.16264998983119788, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8651547670743409, + "eval_runtime": 0.2724, + "eval_samples_per_second": 598.424, + "eval_steps_per_second": 3.671, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6822 + }, + { + "epoch": 380.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9529545793811519, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7660047656870532, + "eval_f1_macro": 0.8607118952674847, + "eval_loss": 0.22864677011966705, + "eval_pr_auc": 0.7812048860219004, + "eval_precision": 0.7548528490920476, + "eval_precision_macro": 0.8564843339790698, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8651142506811824, + "eval_runtime": 0.1856, + "eval_samples_per_second": 878.062, + "eval_steps_per_second": 5.387, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6840 + }, + { + "epoch": 381.0, + "eval_accuracy": 0.9250559284116331, + "eval_auc": 0.9529760687388493, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7658087067047982, + "eval_f1_macro": 0.8605994081311654, + "eval_loss": 0.2286224663257599, + "eval_pr_auc": 0.7813066136655398, + "eval_precision": 0.7547760726589414, + "eval_precision_macro": 0.8564168678924449, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.86495301237096, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.403, + "eval_steps_per_second": 3.966, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6858 + }, + { + "epoch": 382.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9530162639595422, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7656324582338903, + "eval_f1_macro": 0.8605324858111449, + "eval_loss": 0.22855480015277863, + "eval_pr_auc": 0.7815011919374028, + "eval_precision": 0.7556532663316583, + "eval_precision_macro": 0.856745584876579, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8644589615264, + "eval_runtime": 0.267, + "eval_samples_per_second": 610.397, + "eval_steps_per_second": 3.745, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6876 + }, + { + "epoch": 383.0, + "eval_accuracy": 0.9251576164327843, + "eval_auc": 0.9530368384623376, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7658288259624563, + "eval_f1_macro": 0.8606451240251011, + "eval_loss": 0.22851014137268066, + "eval_pr_auc": 0.7815937686262128, + "eval_precision": 0.7557299843014129, + "eval_precision_macro": 0.8568129991882603, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646201998366225, + "eval_runtime": 0.2682, + "eval_samples_per_second": 607.766, + "eval_steps_per_second": 3.729, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6894 + }, + { + "epoch": 384.0, + "eval_accuracy": 0.9251576164327843, + "eval_auc": 0.9530639240069352, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7658288259624563, + "eval_f1_macro": 0.8606451240251011, + "eval_loss": 0.22846660017967224, + "eval_pr_auc": 0.7817366980630457, + "eval_precision": 0.7557299843014129, + "eval_precision_macro": 0.8568129991882603, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646201998366225, + "eval_runtime": 0.2509, + "eval_samples_per_second": 649.734, + "eval_steps_per_second": 3.986, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6912 + }, + { + "epoch": 385.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9530865131370146, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7656324582338903, + "eval_f1_macro": 0.8605324858111449, + "eval_loss": 0.22843268513679504, + "eval_pr_auc": 0.7818405401720232, + "eval_precision": 0.7556532663316583, + "eval_precision_macro": 0.856745584876579, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8644589615264, + "eval_runtime": 0.2627, + "eval_samples_per_second": 620.559, + "eval_steps_per_second": 3.807, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6930 + }, + { + "epoch": 386.0, + "eval_accuracy": 0.9251576164327843, + "eval_auc": 0.9530828926474026, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7659033078880407, + "eval_f1_macro": 0.8606810172942987, + "eval_loss": 0.22846029698848724, + "eval_pr_auc": 0.781765942321457, + "eval_precision": 0.7555695010982115, + "eval_precision_macro": 0.8567605408530922, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864751257667579, + "eval_runtime": 0.2616, + "eval_samples_per_second": 623.151, + "eval_steps_per_second": 3.823, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6948 + }, + { + "epoch": 387.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9530829315773984, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7665184243964421, + "eval_f1_macro": 0.8610134494863566, + "eval_loss": 0.22847168147563934, + "eval_pr_auc": 0.7818060776753192, + "eval_precision": 0.7552425665101722, + "eval_precision_macro": 0.8567386267869261, + "eval_pred_class_0": 16473, + "eval_pred_class_1": 3195, + "eval_predicted_binding_ratio": 0.16244661378889566, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8654669077808935, + "eval_runtime": 0.2089, + "eval_samples_per_second": 780.429, + "eval_steps_per_second": 4.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6966 + }, + { + "epoch": 388.0, + "eval_accuracy": 0.9254118364856619, + "eval_auc": 0.953101403860419, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7668838391863976, + "eval_f1_macro": 0.861243571985536, + "eval_loss": 0.2284410148859024, + "eval_pr_auc": 0.7818375459402719, + "eval_precision": 0.7559523809523809, + "eval_precision_macro": 0.8570973363853918, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8655574492186915, + "eval_runtime": 0.2357, + "eval_samples_per_second": 691.702, + "eval_steps_per_second": 4.244, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6984 + }, + { + "epoch": 388.8888888888889, + "grad_norm": 17393.9921875, + "learning_rate": 1.4317094954644378e-07, + "loss": 0.1876, + "step": 7000 + }, + { + "epoch": 389.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9531112726143616, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671537484116899, + "eval_f1_macro": 0.8613916441816318, + "eval_loss": 0.22843530774116516, + "eval_pr_auc": 0.7818710932290109, + "eval_precision": 0.755868544600939, + "eval_precision_macro": 0.8571123212290193, + "eval_pred_class_0": 16473, + "eval_pred_class_1": 3195, + "eval_predicted_binding_ratio": 0.16244661378889566, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8658497453598706, + "eval_runtime": 0.2628, + "eval_samples_per_second": 620.309, + "eval_steps_per_second": 3.806, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7002 + }, + { + "epoch": 390.0, + "eval_accuracy": 0.9253101484645109, + "eval_auc": 0.9531541734697645, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7664175544601686, + "eval_f1_macro": 0.8609828565716282, + "eval_loss": 0.2283545583486557, + "eval_pr_auc": 0.7820972392670548, + "eval_precision": 0.7559598494353826, + "eval_precision_macro": 0.8570151188924486, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651039147672901, + "eval_runtime": 0.2594, + "eval_samples_per_second": 628.253, + "eval_steps_per_second": 3.854, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7020 + }, + { + "epoch": 391.0, + "eval_accuracy": 0.9253609924750864, + "eval_auc": 0.9531671274258764, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7665394402035624, + "eval_f1_macro": 0.8610596014864338, + "eval_loss": 0.22832486033439636, + "eval_pr_auc": 0.7821584295330316, + "eval_precision": 0.7561970505177282, + "eval_precision_macro": 0.8571349914927091, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.865134095246556, + "eval_runtime": 0.2425, + "eval_samples_per_second": 672.118, + "eval_steps_per_second": 4.123, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7038 + }, + { + "epoch": 392.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.953186076601346, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7661469933184856, + "eval_f1_macro": 0.8608344648891975, + "eval_loss": 0.22829268872737885, + "eval_pr_auc": 0.7822636305739935, + "eval_precision": 0.756043956043956, + "eval_precision_macro": 0.8570003193433394, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864811618626111, + "eval_runtime": 0.241, + "eval_samples_per_second": 676.357, + "eval_steps_per_second": 4.149, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7056 + }, + { + "epoch": 393.0, + "eval_accuracy": 0.9252084604433598, + "eval_auc": 0.9532213958400613, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.765950676213206, + "eval_f1_macro": 0.860721851071415, + "eval_loss": 0.2282164841890335, + "eval_pr_auc": 0.782458419213003, + "eval_precision": 0.7559673366834171, + "eval_precision_macro": 0.8569329524960401, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646503803158885, + "eval_runtime": 0.21, + "eval_samples_per_second": 776.053, + "eval_steps_per_second": 4.761, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7074 + }, + { + "epoch": 394.0, + "eval_accuracy": 0.9251576164327843, + "eval_auc": 0.9532409095004704, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7657542966263526, + "eval_f1_macro": 0.8606092068875439, + "eval_loss": 0.22819304466247559, + "eval_pr_auc": 0.7825471573946872, + "eval_precision": 0.7558906691800189, + "eval_precision_macro": 0.8568655651025967, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.864489142005666, + "eval_runtime": 0.1952, + "eval_samples_per_second": 835.24, + "eval_steps_per_second": 5.124, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7092 + }, + { + "epoch": 395.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9532573574237078, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7659980897803247, + "eval_f1_macro": 0.8607627043564902, + "eval_loss": 0.2281719297170639, + "eval_pr_auc": 0.7826366824043385, + "eval_precision": 0.7563659226658284, + "eval_precision_macro": 0.8571057489837905, + "eval_pred_class_0": 16487, + "eval_pred_class_1": 3181, + "eval_predicted_binding_ratio": 0.1617347976408379, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8645495029641981, + "eval_runtime": 0.2003, + "eval_samples_per_second": 813.649, + "eval_steps_per_second": 4.992, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7110 + }, + { + "epoch": 396.0, + "eval_accuracy": 0.9252084604433598, + "eval_auc": 0.9532628173556228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.765950676213206, + "eval_f1_macro": 0.860721851071415, + "eval_loss": 0.22817298769950867, + "eval_pr_auc": 0.7826246984855115, + "eval_precision": 0.7559673366834171, + "eval_precision_macro": 0.8569329524960401, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646503803158885, + "eval_runtime": 0.2265, + "eval_samples_per_second": 719.545, + "eval_steps_per_second": 4.414, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7128 + }, + { + "epoch": 397.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9532649585053934, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7662213740458015, + "eval_f1_macro": 0.8608703093903662, + "eval_loss": 0.22816696763038635, + "eval_pr_auc": 0.7826996340764835, + "eval_precision": 0.7558832758079699, + "eval_precision_macro": 0.8569477661729006, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7768461786520477, + "eval_recall_macro": 0.8649426764570676, + "eval_runtime": 0.2363, + "eval_samples_per_second": 689.822, + "eval_steps_per_second": 4.232, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7146 + }, + { + "epoch": 398.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9532878980054353, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7661469933184856, + "eval_f1_macro": 0.8608344648891975, + "eval_loss": 0.22812943160533905, + "eval_pr_auc": 0.7828570635819191, + "eval_precision": 0.756043956043956, + "eval_precision_macro": 0.8570003193433394, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864811618626111, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.707, + "eval_steps_per_second": 3.888, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7164 + }, + { + "epoch": 399.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9533086671582098, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7660725652450668, + "eval_f1_macro": 0.860798596552099, + "eval_loss": 0.2281065434217453, + "eval_pr_auc": 0.7829274286728394, + "eval_precision": 0.7562048382029531, + "eval_precision_macro": 0.8570529802176428, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646805607951544, + "eval_runtime": 0.2483, + "eval_samples_per_second": 656.497, + "eval_steps_per_second": 4.028, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7182 + }, + { + "epoch": 400.0, + "eval_accuracy": 0.9253101484645109, + "eval_auc": 0.953327003186245, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7661944930765557, + "eval_f1_macro": 0.8608753604764983, + "eval_loss": 0.22807644307613373, + "eval_pr_auc": 0.7830440116061308, + "eval_precision": 0.7564424890006285, + "eval_precision_macro": 0.8571730824234005, + "eval_pred_class_0": 16486, + "eval_pred_class_1": 3182, + "eval_predicted_binding_ratio": 0.16178564165141346, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8647107412744205, + "eval_runtime": 0.221, + "eval_samples_per_second": 737.532, + "eval_steps_per_second": 4.525, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7200 + }, + { + "epoch": 401.0, + "eval_accuracy": 0.9253101484645109, + "eval_auc": 0.9533311297658027, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7664175544601686, + "eval_f1_macro": 0.8609828565716282, + "eval_loss": 0.22808308899402618, + "eval_pr_auc": 0.7830491617637381, + "eval_precision": 0.7559598494353826, + "eval_precision_macro": 0.8570151188924486, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651039147672901, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.406, + "eval_steps_per_second": 3.794, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7218 + }, + { + "epoch": 402.0, + "eval_accuracy": 0.9253609924750864, + "eval_auc": 0.9533512371086481, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.766390833863781, + "eval_f1_macro": 0.8609879862166538, + "eval_loss": 0.2280474752187729, + "eval_pr_auc": 0.7831121530747555, + "eval_precision": 0.7565190072258875, + "eval_precision_macro": 0.857240395332689, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864871979584643, + "eval_runtime": 0.2425, + "eval_samples_per_second": 672.294, + "eval_steps_per_second": 4.125, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7236 + }, + { + "epoch": 403.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9533569987480307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.766783328030544, + "eval_f1_macro": 0.86121314661739, + "eval_loss": 0.22804181277751923, + "eval_pr_auc": 0.7831234273165448, + "eval_precision": 0.7566718995290423, + "eval_precision_macro": 0.8573749596534976, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651944562050881, + "eval_runtime": 0.256, + "eval_samples_per_second": 636.798, + "eval_steps_per_second": 3.907, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7254 + }, + { + "epoch": 404.0, + "eval_accuracy": 0.9255135245068131, + "eval_auc": 0.9533826536152816, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7669053301511536, + "eval_f1_macro": 0.861289946852225, + "eval_loss": 0.2279965728521347, + "eval_pr_auc": 0.78327266335354, + "eval_precision": 0.7569095477386935, + "eval_precision_macro": 0.8574950553544232, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8652246366843541, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.619, + "eval_steps_per_second": 3.826, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7272 + }, + { + "epoch": 405.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9533846682425657, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.766783328030544, + "eval_f1_macro": 0.86121314661739, + "eval_loss": 0.22799374163150787, + "eval_pr_auc": 0.7832776890714148, + "eval_precision": 0.7566718995290423, + "eval_precision_macro": 0.8573749596534976, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651944562050881, + "eval_runtime": 0.2631, + "eval_samples_per_second": 619.577, + "eval_steps_per_second": 3.801, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7290 + }, + { + "epoch": 406.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9533931647141554, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.766783328030544, + "eval_f1_macro": 0.86121314661739, + "eval_loss": 0.2279902696609497, + "eval_pr_auc": 0.7833151045367318, + "eval_precision": 0.7566718995290423, + "eval_precision_macro": 0.8573749596534976, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651944562050881, + "eval_runtime": 0.2498, + "eval_samples_per_second": 652.534, + "eval_steps_per_second": 4.003, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7308 + }, + { + "epoch": 407.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9533809990904591, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7673235855054037, + "eval_f1_macro": 0.8615095109466251, + "eval_loss": 0.22801372408866882, + "eval_pr_auc": 0.7832695956978404, + "eval_precision": 0.7565026637417738, + "eval_precision_macro": 0.8574040902613707, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8657790484874461, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.354, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7326 + }, + { + "epoch": 408.0, + "eval_accuracy": 0.9254118364856619, + "eval_auc": 0.9533805708605049, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671058898237816, + "eval_f1_macro": 0.8613505657612415, + "eval_loss": 0.2280135303735733, + "eval_pr_auc": 0.7832983031509244, + "eval_precision": 0.7554721701063164, + "eval_precision_macro": 0.8569406995036744, + "eval_pred_class_0": 16470, + "eval_pred_class_1": 3198, + "eval_predicted_binding_ratio": 0.16259914582062232, + "eval_recall": 0.7791035149951628, + "eval_recall_macro": 0.865950622711561, + "eval_runtime": 0.2668, + "eval_samples_per_second": 610.9, + "eval_steps_per_second": 3.748, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7344 + }, + { + "epoch": 409.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9533953253289238, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7672276913305811, + "eval_f1_macro": 0.8614272726281818, + "eval_loss": 0.22799338400363922, + "eval_pr_auc": 0.7833637706556547, + "eval_precision": 0.7557084766969033, + "eval_precision_macro": 0.857060115344384, + "eval_pred_class_0": 16471, + "eval_pred_class_1": 3197, + "eval_predicted_binding_ratio": 0.16254830181004679, + "eval_recall": 0.7791035149951628, + "eval_recall_macro": 0.865980803190827, + "eval_runtime": 0.2998, + "eval_samples_per_second": 543.699, + "eval_steps_per_second": 3.336, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7362 + }, + { + "epoch": 410.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9534269948805303, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7672496025437202, + "eval_f1_macro": 0.8614738601594714, + "eval_loss": 0.22793784737586975, + "eval_pr_auc": 0.783492627588611, + "eval_precision": 0.7566635308874256, + "eval_precision_macro": 0.8574567123458305, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8656479906564896, + "eval_runtime": 0.2396, + "eval_samples_per_second": 680.309, + "eval_steps_per_second": 4.174, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7380 + }, + { + "epoch": 411.0, + "eval_accuracy": 0.9255135245068131, + "eval_auc": 0.9534421775789035, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7670535856256957, + "eval_f1_macro": 0.8613613920200376, + "eval_loss": 0.22790838778018951, + "eval_pr_auc": 0.7835790311478947, + "eval_precision": 0.7565872020075283, + "eval_precision_macro": 0.8573894747901719, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.865486752346267, + "eval_runtime": 0.2625, + "eval_samples_per_second": 620.852, + "eval_steps_per_second": 3.809, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7398 + }, + { + "epoch": 412.0, + "eval_accuracy": 0.9255135245068131, + "eval_auc": 0.9534503918080233, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7670535856256957, + "eval_f1_macro": 0.8613613920200376, + "eval_loss": 0.22789432108402252, + "eval_pr_auc": 0.7836065049249683, + "eval_precision": 0.7565872020075283, + "eval_precision_macro": 0.8573894747901719, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.865486752346267, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.102, + "eval_steps_per_second": 3.829, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7416 + }, + { + "epoch": 413.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9534621486667634, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7669316375198728, + "eval_f1_macro": 0.8612846167990333, + "eval_loss": 0.2278737723827362, + "eval_pr_auc": 0.7836530325514856, + "eval_precision": 0.7563499529633114, + "eval_precision_macro": 0.857269581736829, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8654565718670011, + "eval_runtime": 0.2015, + "eval_samples_per_second": 809.132, + "eval_steps_per_second": 4.964, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7434 + }, + { + "epoch": 414.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9534808739947569, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7668575063613231, + "eval_f1_macro": 0.8612488935825013, + "eval_loss": 0.22784681618213654, + "eval_pr_auc": 0.7837271248134856, + "eval_precision": 0.7565108252274867, + "eval_precision_macro": 0.8573222168125176, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653255140360445, + "eval_runtime": 0.2677, + "eval_samples_per_second": 608.938, + "eval_steps_per_second": 3.736, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7452 + }, + { + "epoch": 415.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535030056973854, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.2278076857328415, + "eval_pr_auc": 0.783829043372685, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2388, + "eval_samples_per_second": 682.596, + "eval_steps_per_second": 4.188, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7470 + }, + { + "epoch": 416.0, + "eval_accuracy": 0.9255135245068131, + "eval_auc": 0.9535103634665969, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7669053301511536, + "eval_f1_macro": 0.861289946852225, + "eval_loss": 0.22778868675231934, + "eval_pr_auc": 0.7838714010488458, + "eval_precision": 0.7569095477386935, + "eval_precision_macro": 0.8574950553544232, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8652246366843541, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.742, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7488 + }, + { + "epoch": 416.6666666666667, + "grad_norm": 16683.39453125, + "learning_rate": 8.236268949930852e-08, + "loss": 0.186, + "step": 7500 + }, + { + "epoch": 417.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535206409854957, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22777557373046875, + "eval_pr_auc": 0.7839160806088992, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.595, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7506 + }, + { + "epoch": 418.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.953516261360965, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22778432071208954, + "eval_pr_auc": 0.7838825807566469, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2552, + "eval_samples_per_second": 638.836, + "eval_steps_per_second": 3.919, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7524 + }, + { + "epoch": 419.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.953526694599847, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22777114808559418, + "eval_pr_auc": 0.783922204343384, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2628, + "eval_samples_per_second": 620.269, + "eval_steps_per_second": 3.805, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7542 + }, + { + "epoch": 420.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535399113334307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22775039076805115, + "eval_pr_auc": 0.7839806097795337, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2677, + "eval_samples_per_second": 608.883, + "eval_steps_per_second": 3.735, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7560 + }, + { + "epoch": 421.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535487289774859, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22773513197898865, + "eval_pr_auc": 0.7840321361391863, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2732, + "eval_samples_per_second": 596.635, + "eval_steps_per_second": 3.66, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7578 + }, + { + "epoch": 422.0, + "eval_accuracy": 0.9254118364856619, + "eval_auc": 0.9535455951128217, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7668097281831188, + "eval_f1_macro": 0.8612078600062212, + "eval_loss": 0.22774243354797363, + "eval_pr_auc": 0.7840612126983214, + "eval_precision": 0.7561128526645768, + "eval_precision_macro": 0.8571497629022605, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.865426391387735, + "eval_runtime": 0.1892, + "eval_samples_per_second": 861.347, + "eval_steps_per_second": 5.284, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7596 + }, + { + "epoch": 423.0, + "eval_accuracy": 0.9254118364856619, + "eval_auc": 0.9535476389376027, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7668097281831188, + "eval_f1_macro": 0.8612078600062212, + "eval_loss": 0.22774267196655273, + "eval_pr_auc": 0.7840628987467491, + "eval_precision": 0.7561128526645768, + "eval_precision_macro": 0.8571497629022605, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.865426391387735, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.36, + "eval_steps_per_second": 3.935, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7614 + }, + { + "epoch": 424.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9535528458245448, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7670057215511761, + "eval_f1_macro": 0.8613203162894483, + "eval_loss": 0.22773417830467224, + "eval_pr_auc": 0.7840834648119666, + "eval_precision": 0.756189282356628, + "eval_precision_macro": 0.8572170542389439, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8655876296979576, + "eval_runtime": 0.2571, + "eval_samples_per_second": 633.927, + "eval_steps_per_second": 3.889, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7632 + }, + { + "epoch": 425.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9535637559558758, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7669316375198728, + "eval_f1_macro": 0.8612846167990333, + "eval_loss": 0.22771182656288147, + "eval_pr_auc": 0.7841366739811415, + "eval_precision": 0.7563499529633114, + "eval_precision_macro": 0.857269581736829, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8654565718670011, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.859, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7650 + }, + { + "epoch": 426.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535682523703939, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7673235855054037, + "eval_f1_macro": 0.8615095109466251, + "eval_loss": 0.22770953178405762, + "eval_pr_auc": 0.7841596739317596, + "eval_precision": 0.7565026637417738, + "eval_precision_macro": 0.8574040902613707, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8657790484874461, + "eval_runtime": 0.2634, + "eval_samples_per_second": 618.83, + "eval_steps_per_second": 3.797, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7668 + }, + { + "epoch": 427.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535677852104438, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674714104193139, + "eval_f1_macro": 0.8615807415292696, + "eval_loss": 0.22770845890045166, + "eval_pr_auc": 0.7841620948509175, + "eval_precision": 0.7561815336463223, + "eval_precision_macro": 0.8572991684500658, + "eval_pred_class_0": 16473, + "eval_pred_class_1": 3195, + "eval_predicted_binding_ratio": 0.16244661378889566, + "eval_recall": 0.7791035149951628, + "eval_recall_macro": 0.866041164149359, + "eval_runtime": 0.2571, + "eval_samples_per_second": 633.948, + "eval_steps_per_second": 3.889, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7686 + }, + { + "epoch": 428.0, + "eval_accuracy": 0.9256152125279642, + "eval_auc": 0.9535825980738566, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767445557145128, + "eval_f1_macro": 0.8615862980157476, + "eval_loss": 0.22767424583435059, + "eval_pr_auc": 0.7842346625186999, + "eval_precision": 0.7567398119122257, + "eval_precision_macro": 0.8575239295026598, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658092289667121, + "eval_runtime": 0.254, + "eval_samples_per_second": 641.669, + "eval_steps_per_second": 3.937, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7704 + }, + { + "epoch": 429.0, + "eval_accuracy": 0.9256152125279642, + "eval_auc": 0.9535961651774029, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767445557145128, + "eval_f1_macro": 0.8615862980157476, + "eval_loss": 0.22765418887138367, + "eval_pr_auc": 0.7843006481928805, + "eval_precision": 0.7567398119122257, + "eval_precision_macro": 0.8575239295026598, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658092289667121, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.28, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7722 + }, + { + "epoch": 430.0, + "eval_accuracy": 0.9256660565385397, + "eval_auc": 0.9536132165155758, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674936386768448, + "eval_f1_macro": 0.8616274777746364, + "eval_loss": 0.22762420773506165, + "eval_pr_auc": 0.7844000578756268, + "eval_precision": 0.7571383746470034, + "eval_precision_macro": 0.8576966674521347, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657083516150216, + "eval_runtime": 0.2691, + "eval_samples_per_second": 605.732, + "eval_steps_per_second": 3.716, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7740 + }, + { + "epoch": 431.0, + "eval_accuracy": 0.9256660565385397, + "eval_auc": 0.9536164866352254, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674936386768448, + "eval_f1_macro": 0.8616274777746364, + "eval_loss": 0.22761479020118713, + "eval_pr_auc": 0.7844231309752159, + "eval_precision": 0.7571383746470034, + "eval_precision_macro": 0.8576966674521347, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657083516150216, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.783, + "eval_steps_per_second": 3.888, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7758 + }, + { + "epoch": 432.0, + "eval_accuracy": 0.9256660565385397, + "eval_auc": 0.9536227348995558, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674936386768448, + "eval_f1_macro": 0.8616274777746364, + "eval_loss": 0.22760987281799316, + "eval_pr_auc": 0.7844561314285999, + "eval_precision": 0.7571383746470034, + "eval_precision_macro": 0.8576966674521347, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657083516150216, + "eval_runtime": 0.2144, + "eval_samples_per_second": 760.142, + "eval_steps_per_second": 4.663, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7776 + }, + { + "epoch": 433.0, + "eval_accuracy": 0.9256660565385397, + "eval_auc": 0.9536278736590051, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674936386768448, + "eval_f1_macro": 0.8616274777746364, + "eval_loss": 0.22760248184204102, + "eval_pr_auc": 0.7844848515258783, + "eval_precision": 0.7571383746470034, + "eval_precision_macro": 0.8576966674521347, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657083516150216, + "eval_runtime": 0.2745, + "eval_samples_per_second": 593.759, + "eval_steps_per_second": 3.643, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7794 + }, + { + "epoch": 434.0, + "eval_accuracy": 0.9257169005491153, + "eval_auc": 0.9536346669432771, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676896167912227, + "eval_f1_macro": 0.861739927468447, + "eval_loss": 0.22759221494197845, + "eval_pr_auc": 0.7845111553142384, + "eval_precision": 0.7572145545796738, + "eval_precision_macro": 0.8577638306878952, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658695899252441, + "eval_runtime": 0.216, + "eval_samples_per_second": 754.683, + "eval_steps_per_second": 4.63, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7812 + }, + { + "epoch": 435.0, + "eval_accuracy": 0.9257169005491153, + "eval_auc": 0.9536369248830353, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676896167912227, + "eval_f1_macro": 0.861739927468447, + "eval_loss": 0.2275882065296173, + "eval_pr_auc": 0.7845220230482824, + "eval_precision": 0.7572145545796738, + "eval_precision_macro": 0.8577638306878952, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658695899252441, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.083, + "eval_steps_per_second": 3.816, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7830 + }, + { + "epoch": 436.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.9536390660328059, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7678855325914149, + "eval_f1_macro": 0.8618523468803471, + "eval_loss": 0.2275806963443756, + "eval_pr_auc": 0.7845372505398349, + "eval_precision": 0.7572906867356538, + "eval_precision_macro": 0.8578309735638339, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660308282354665, + "eval_runtime": 0.2506, + "eval_samples_per_second": 650.493, + "eval_steps_per_second": 3.991, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7848 + }, + { + "epoch": 437.0, + "eval_accuracy": 0.9257169005491153, + "eval_auc": 0.9536480004668485, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676896167912227, + "eval_f1_macro": 0.861739927468447, + "eval_loss": 0.2275666743516922, + "eval_pr_auc": 0.7845807540266186, + "eval_precision": 0.7572145545796738, + "eval_precision_macro": 0.8577638306878952, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658695899252441, + "eval_runtime": 0.1853, + "eval_samples_per_second": 879.496, + "eval_steps_per_second": 5.396, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7866 + }, + { + "epoch": 438.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9536552025160767, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.22755169868469238, + "eval_pr_auc": 0.7846223392518402, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2108, + "eval_samples_per_second": 773.198, + "eval_steps_per_second": 4.744, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7884 + }, + { + "epoch": 439.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.95366627809989, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676639083386378, + "eval_f1_macro": 0.8617455448748739, + "eval_loss": 0.2275334894657135, + "eval_pr_auc": 0.784667391259121, + "eval_precision": 0.7577756833176249, + "eval_precision_macro": 0.8579900557928737, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8656376547425971, + "eval_runtime": 0.259, + "eval_samples_per_second": 629.311, + "eval_steps_per_second": 3.861, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7902 + }, + { + "epoch": 440.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9536672708147835, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.22753211855888367, + "eval_pr_auc": 0.7846795397266301, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2424, + "eval_samples_per_second": 672.466, + "eval_steps_per_second": 4.126, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7920 + }, + { + "epoch": 441.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9536694119645541, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.22753164172172546, + "eval_pr_auc": 0.7846730527925044, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2647, + "eval_samples_per_second": 615.862, + "eval_steps_per_second": 3.778, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7938 + }, + { + "epoch": 442.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.9536803123633861, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7677378300986318, + "eval_f1_macro": 0.8617811692096791, + "eval_loss": 0.22751472890377045, + "eval_pr_auc": 0.7847227034562287, + "eval_precision": 0.7576138147566719, + "eval_precision_macro": 0.8579369201187351, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657687125735536, + "eval_runtime": 0.2667, + "eval_samples_per_second": 611.172, + "eval_steps_per_second": 3.75, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7956 + }, + { + "epoch": 443.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.9536880691650549, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676639083386378, + "eval_f1_macro": 0.8617455448748739, + "eval_loss": 0.22749866545200348, + "eval_pr_auc": 0.7847641543874231, + "eval_precision": 0.7577756833176249, + "eval_precision_macro": 0.8579900557928737, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8656376547425971, + "eval_runtime": 0.2405, + "eval_samples_per_second": 677.868, + "eval_steps_per_second": 4.159, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7974 + }, + { + "epoch": 444.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.9536937626769448, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676639083386378, + "eval_f1_macro": 0.8617455448748739, + "eval_loss": 0.2274913638830185, + "eval_pr_auc": 0.7847876605630844, + "eval_precision": 0.7577756833176249, + "eval_precision_macro": 0.8579900557928737, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8656376547425971, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.896, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7992 + }, + { + "epoch": 444.44444444444446, + "grad_norm": 19008.333984375, + "learning_rate": 3.72113927636733e-08, + "loss": 0.1854, + "step": 8000 + }, + { + "epoch": 445.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.953696935471605, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767859984089101, + "eval_f1_macro": 0.861858042633035, + "eval_loss": 0.22748790681362152, + "eval_pr_auc": 0.7848011703648987, + "eval_precision": 0.7578517587939698, + "eval_precision_macro": 0.8580571582128063, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657988930528197, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.861, + "eval_steps_per_second": 3.772, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8010 + }, + { + "epoch": 446.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9536995243163275, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767859984089101, + "eval_f1_macro": 0.861858042633035, + "eval_loss": 0.22748683393001556, + "eval_pr_auc": 0.7848132010793348, + "eval_precision": 0.7578517587939698, + "eval_precision_macro": 0.8580571582128063, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657988930528197, + "eval_runtime": 0.2471, + "eval_samples_per_second": 659.648, + "eval_steps_per_second": 4.047, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8028 + }, + { + "epoch": 447.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9537016265361022, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767859984089101, + "eval_f1_macro": 0.861858042633035, + "eval_loss": 0.22748340666294098, + "eval_pr_auc": 0.7848243277439235, + "eval_precision": 0.7578517587939698, + "eval_precision_macro": 0.8580571582128063, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657988930528197, + "eval_runtime": 0.2047, + "eval_samples_per_second": 796.254, + "eval_steps_per_second": 4.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8046 + }, + { + "epoch": 448.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9537030474809498, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.2274865061044693, + "eval_pr_auc": 0.7848313481583303, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.69, + "eval_steps_per_second": 3.826, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8064 + }, + { + "epoch": 449.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9537053443507039, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.22748111188411713, + "eval_pr_auc": 0.7848426067264029, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2031, + "eval_samples_per_second": 802.733, + "eval_steps_per_second": 4.925, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8082 + }, + { + "epoch": 450.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537081765079003, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22747540473937988, + "eval_pr_auc": 0.7848573104950377, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2266, + "eval_samples_per_second": 719.273, + "eval_steps_per_second": 4.413, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8100 + }, + { + "epoch": 451.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537093638727732, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22747638821601868, + "eval_pr_auc": 0.7848599055730325, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.395, + "eval_steps_per_second": 3.966, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8118 + }, + { + "epoch": 452.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.953713694834809, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22746768593788147, + "eval_pr_auc": 0.7848753398216984, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2493, + "eval_samples_per_second": 653.726, + "eval_steps_per_second": 4.011, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8136 + }, + { + "epoch": 453.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537144345047297, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22746726870536804, + "eval_pr_auc": 0.7848749928109487, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2607, + "eval_samples_per_second": 625.248, + "eval_steps_per_second": 3.836, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8154 + }, + { + "epoch": 454.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537162252845378, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22746579349040985, + "eval_pr_auc": 0.7848822771158814, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2463, + "eval_samples_per_second": 661.74, + "eval_steps_per_second": 4.06, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8172 + }, + { + "epoch": 455.0, + "eval_accuracy": 0.9260219646125687, + "eval_auc": 0.9537124685399404, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7687172150691464, + "eval_f1_macro": 0.8623431740348002, + "eval_loss": 0.22747564315795898, + "eval_pr_auc": 0.7848568499525364, + "eval_precision": 0.7579937304075235, + "eval_precision_macro": 0.8582722627034582, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665749041246662, + "eval_runtime": 0.2569, + "eval_samples_per_second": 634.446, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8190 + }, + { + "epoch": 456.0, + "eval_accuracy": 0.9260219646125687, + "eval_auc": 0.9537148724671828, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7687172150691464, + "eval_f1_macro": 0.8623431740348002, + "eval_loss": 0.22747227549552917, + "eval_pr_auc": 0.7848699531009984, + "eval_precision": 0.7579937304075235, + "eval_precision_macro": 0.8582722627034582, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665749041246662, + "eval_runtime": 0.2171, + "eval_samples_per_second": 750.821, + "eval_steps_per_second": 4.606, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8208 + }, + { + "epoch": 457.0, + "eval_accuracy": 0.925971120601993, + "eval_auc": 0.9537148043396901, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.768595041322314, + "eval_f1_macro": 0.8622662895753321, + "eval_loss": 0.22747254371643066, + "eval_pr_auc": 0.784871702941944, + "eval_precision": 0.7577561892823567, + "eval_precision_macro": 0.858152234351077, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665447236454001, + "eval_runtime": 0.2395, + "eval_samples_per_second": 680.611, + "eval_steps_per_second": 4.176, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8226 + }, + { + "epoch": 458.0, + "eval_accuracy": 0.925971120601993, + "eval_auc": 0.9537153006971368, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.768595041322314, + "eval_f1_macro": 0.8622662895753321, + "eval_loss": 0.22747036814689636, + "eval_pr_auc": 0.7848700532008381, + "eval_precision": 0.7577561892823567, + "eval_precision_macro": 0.858152234351077, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665447236454001, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.347, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8244 + }, + { + "epoch": 459.0, + "eval_accuracy": 0.925971120601993, + "eval_auc": 0.9537154856146172, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.768595041322314, + "eval_f1_macro": 0.8622662895753321, + "eval_loss": 0.22747208178043365, + "eval_pr_auc": 0.7848674676035483, + "eval_precision": 0.7577561892823567, + "eval_precision_macro": 0.858152234351077, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665447236454001, + "eval_runtime": 0.2162, + "eval_samples_per_second": 753.78, + "eval_steps_per_second": 4.624, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8262 + }, + { + "epoch": 460.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537164783295108, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22747138142585754, + "eval_pr_auc": 0.7848698987106607, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.29, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8280 + }, + { + "epoch": 461.0, + "eval_accuracy": 0.925971120601993, + "eval_auc": 0.9537188335942584, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.768595041322314, + "eval_f1_macro": 0.8622662895753321, + "eval_loss": 0.2274673730134964, + "eval_pr_auc": 0.7848777447449568, + "eval_precision": 0.7577561892823567, + "eval_precision_macro": 0.858152234351077, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665447236454001, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.971, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8298 + }, + { + "epoch": 462.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537204881190811, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22746412456035614, + "eval_pr_auc": 0.7848871874390749, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2183, + "eval_samples_per_second": 746.701, + "eval_steps_per_second": 4.581, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8316 + }, + { + "epoch": 463.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537223956888766, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274623066186905, + "eval_pr_auc": 0.784896188141376, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2673, + "eval_samples_per_second": 609.736, + "eval_steps_per_second": 3.741, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8334 + }, + { + "epoch": 464.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537243227236701, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274596393108368, + "eval_pr_auc": 0.7849067717237345, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2647, + "eval_samples_per_second": 615.711, + "eval_steps_per_second": 3.777, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8352 + }, + { + "epoch": 465.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537252959735659, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22745810449123383, + "eval_pr_auc": 0.7849091375774293, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2588, + "eval_samples_per_second": 629.717, + "eval_steps_per_second": 3.863, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8370 + }, + { + "epoch": 466.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537274565883342, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274550497531891, + "eval_pr_auc": 0.7849133909574995, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2576, + "eval_samples_per_second": 632.839, + "eval_steps_per_second": 3.882, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8388 + }, + { + "epoch": 467.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537294225531237, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274521142244339, + "eval_pr_auc": 0.7849233363238722, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2621, + "eval_samples_per_second": 621.858, + "eval_steps_per_second": 3.815, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8406 + }, + { + "epoch": 468.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537300065030612, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.227451354265213, + "eval_pr_auc": 0.7849296985834374, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.266, + "eval_samples_per_second": 612.691, + "eval_steps_per_second": 3.759, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8424 + }, + { + "epoch": 469.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.953731164670437, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744858264923096, + "eval_pr_auc": 0.7849332304496031, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.1831, + "eval_samples_per_second": 890.399, + "eval_steps_per_second": 5.463, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8442 + }, + { + "epoch": 470.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537301622230444, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22745059430599213, + "eval_pr_auc": 0.7849302221742441, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.282, + "eval_samples_per_second": 577.963, + "eval_steps_per_second": 3.546, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8460 + }, + { + "epoch": 471.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.95373253695279, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274470180273056, + "eval_pr_auc": 0.7849404295185345, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2547, + "eval_samples_per_second": 639.882, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8478 + }, + { + "epoch": 472.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537337243176629, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744475305080414, + "eval_pr_auc": 0.7849440080112278, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.737, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8496 + }, + { + "epoch": 472.22222222222223, + "grad_norm": 16415.080078125, + "learning_rate": 9.409753403698373e-09, + "loss": 0.185, + "step": 8500 + }, + { + "epoch": 473.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537344250575877, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744259238243103, + "eval_pr_auc": 0.7849494179086262, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2539, + "eval_samples_per_second": 642.078, + "eval_steps_per_second": 3.939, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8514 + }, + { + "epoch": 474.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537351647275085, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744259238243103, + "eval_pr_auc": 0.7849503915002546, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.1794, + "eval_samples_per_second": 908.686, + "eval_steps_per_second": 5.575, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8532 + }, + { + "epoch": 475.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537357876074417, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744180262088776, + "eval_pr_auc": 0.7849530625634429, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.1819, + "eval_samples_per_second": 895.935, + "eval_steps_per_second": 5.497, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8550 + }, + { + "epoch": 476.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537367997873333, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274399697780609, + "eval_pr_auc": 0.7849568926584508, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2575, + "eval_samples_per_second": 633.091, + "eval_steps_per_second": 3.884, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8568 + }, + { + "epoch": 477.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537370139023102, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274392694234848, + "eval_pr_auc": 0.784958754067368, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2669, + "eval_samples_per_second": 610.802, + "eval_steps_per_second": 3.747, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8586 + }, + { + "epoch": 478.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537376562472413, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22743819653987885, + "eval_pr_auc": 0.7849582401594454, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2266, + "eval_samples_per_second": 719.444, + "eval_steps_per_second": 4.414, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8604 + }, + { + "epoch": 479.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537377925022268, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743773460388184, + "eval_pr_auc": 0.7849574280187238, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2575, + "eval_samples_per_second": 632.921, + "eval_steps_per_second": 3.883, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8622 + }, + { + "epoch": 480.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537384932421518, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743669152259827, + "eval_pr_auc": 0.7849621773766102, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2648, + "eval_samples_per_second": 615.492, + "eval_steps_per_second": 3.776, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8640 + }, + { + "epoch": 481.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537382012671831, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743773460388184, + "eval_pr_auc": 0.7849637957606732, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2656, + "eval_samples_per_second": 613.647, + "eval_steps_per_second": 3.765, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8658 + }, + { + "epoch": 482.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537380066172038, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.227437824010849, + "eval_pr_auc": 0.7849623241007161, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.1901, + "eval_samples_per_second": 857.378, + "eval_steps_per_second": 5.26, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8676 + }, + { + "epoch": 483.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537382791271747, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743763029575348, + "eval_pr_auc": 0.7849636397572854, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2674, + "eval_samples_per_second": 609.618, + "eval_steps_per_second": 3.74, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8694 + }, + { + "epoch": 484.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537387365546257, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743697464466095, + "eval_pr_auc": 0.7849651242159179, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.398, + "eval_steps_per_second": 3.794, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8712 + }, + { + "epoch": 485.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537391745170787, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.2274360954761505, + "eval_pr_auc": 0.7849672499360805, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.1892, + "eval_samples_per_second": 861.673, + "eval_steps_per_second": 5.286, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8730 + }, + { + "epoch": 486.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.953739330237062, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743603587150574, + "eval_pr_auc": 0.784968007514094, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2358, + "eval_samples_per_second": 691.342, + "eval_steps_per_second": 4.241, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8748 + }, + { + "epoch": 487.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.95373934970206, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743603587150574, + "eval_pr_auc": 0.7849684807581643, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2101, + "eval_samples_per_second": 775.889, + "eval_steps_per_second": 4.76, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8766 + }, + { + "epoch": 488.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537393886320558, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.2274361550807953, + "eval_pr_auc": 0.7849697577570401, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.25, + "eval_samples_per_second": 652.0, + "eval_steps_per_second": 4.0, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8784 + }, + { + "epoch": 489.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537394859570454, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743597626686096, + "eval_pr_auc": 0.784969480914725, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.1914, + "eval_samples_per_second": 851.633, + "eval_steps_per_second": 5.225, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8802 + }, + { + "epoch": 490.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537395540845381, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.2274358570575714, + "eval_pr_auc": 0.784970228255774, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2687, + "eval_samples_per_second": 606.685, + "eval_steps_per_second": 3.722, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8820 + }, + { + "epoch": 491.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537395443520391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743581235408783, + "eval_pr_auc": 0.7849691356929127, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.272, + "eval_samples_per_second": 599.26, + "eval_steps_per_second": 3.676, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8838 + }, + { + "epoch": 492.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537396222120308, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743573784828186, + "eval_pr_auc": 0.7849709563979171, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2531, + "eval_samples_per_second": 644.069, + "eval_steps_per_second": 3.951, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8856 + } + ], + "logging_steps": 500, + "max_steps": 9000, + "num_input_tokens_seen": 0, + "num_train_epochs": 500, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6657228093507264.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/training_args.bin b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8903358100d3be09ad49078090c6e572b3ddef68 --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:499ba8a39afec206dd7194e2d216bf0be2633330bfcda3d90a12ddcbc04cdaca +size 5368 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/config.json b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a424582b16776120730808f810ad9081375d580e --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/config.json @@ -0,0 +1,52 @@ +{ + "architectures": [ + "GloMeModelForTokenClassification" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "bos_token_id": 28, + "cdr_weight": 0.0, + "class_weights": [ + 0.1, + 0.9 + ], + "classifier_activation": "gelu", + "classifier_bias": false, + "classifier_dropout": 0.1, + "classifier_pooling": "cls", + "cls_token_id": 28, + "compress_block_size": 16, + "compress_block_sliding_stride": 16, + "decoder_bias": true, + "dice_weight": 0.1, + "embedding_dropout": 0.1, + "eos_token_id": 29, + "hidden_activation": "gelu", + "hidden_size": 320, + "inner_rank": 32, + "intermediate_size": 1280, + "kv_heads": 10, + "mask_token_id": 31, + "mlp_bias": false, + "mlp_dropout": 0.1, + "model_size": "tiny", + "model_type": "glome", + "norm_bias": false, + "norm_eps": 1e-05, + "num_attention_heads": 20, + "num_hidden_layers": 6, + "num_selected_blocks": 8, + "num_slots": 64, + "pad_token_id": 30, + "reference_compile": null, + "selection_block_size": 16, + "sep_token_id": 29, + "sliding_window_size": 0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.52.3", + "unk_token_id": 27, + "use_glome": true, + "use_nsa": true, + "vocab_size": 36 +} diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/model.safetensors b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..344739af9ae8101f3177280930f96088f9f9607b --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd6b4e359b5d6cb87fd11e6785513fb93cd33e2621e530c3d5d0372c61965079 +size 61385376 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/optimizer.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8eb584ea45d85a13f7833c0d1ca2a4c9b5ce620 --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd7657ee28cb041646b9be46a93baffab2afaf1966b7a9f81a83df51bd473eff +size 122881658 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/rng_state.pth b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ea4a281732ef8791236faff0159461d69c164230 --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae123764811139e2cb14efaf37527d6fd81efe423a4a92fa24d82e5d54b5501c +size 14244 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/scaler.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363 +size 988 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/scheduler.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a57622d23c8c6d1195ffb892b5e96972f71046de --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2cddf9c2f8aa06961889d4d18e579770b9648558e3b3dd47f47b9be5707aaa0 +size 1064 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/trainer_state.json b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bbfb4e2ce03cd348800695619849bfc23008822e --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/trainer_state.json @@ -0,0 +1,12129 @@ +{ + "best_global_step": 8856, + "best_metric": 0.7849709563979171, + "best_model_checkpoint": "./results/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856", + "epoch": 499.0, + "eval_steps": 500, + "global_step": 8982, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy": 0.3401972747610332, + "eval_auc": 0.39064302367564674, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25900759435847653, + "eval_f1_macro": 0.3321798728791878, + "eval_loss": 1.0617570877075195, + "eval_pr_auc": 0.1212308124824295, + "eval_precision": 0.15736885928393005, + "eval_precision_macro": 0.49944165947453734, + "eval_pred_class_0": 5256, + "eval_pred_class_1": 14412, + "eval_predicted_binding_ratio": 0.7327638804148872, + "eval_recall": 0.7313769751693002, + "eval_recall_macro": 0.4991767473782156, + "eval_runtime": 0.304, + "eval_samples_per_second": 536.239, + "eval_steps_per_second": 3.29, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 18 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.341010778930242, + "eval_auc": 0.39081343973238586, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2590750585948665, + "eval_f1_macro": 0.33285209231533375, + "eval_loss": 1.0604556798934937, + "eval_pr_auc": 0.12126612292918731, + "eval_precision": 0.1574485825458588, + "eval_precision_macro": 0.4995923731531417, + "eval_pred_class_0": 5276, + "eval_pred_class_1": 14392, + "eval_predicted_binding_ratio": 0.731747000203376, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.4993975193845588, + "eval_runtime": 0.2793, + "eval_samples_per_second": 583.516, + "eval_steps_per_second": 3.58, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 36 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.34223103518405534, + "eval_auc": 0.3911369382652214, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2591765446944969, + "eval_f1_macro": 0.33385837704253485, + "eval_loss": 1.058252215385437, + "eval_pr_auc": 0.12133107613942488, + "eval_precision": 0.15756858376270713, + "eval_precision_macro": 0.4998170849458089, + "eval_pred_class_0": 5306, + "eval_pred_class_1": 14362, + "eval_predicted_binding_ratio": 0.7302216798861094, + "eval_recall": 0.7297645920670751, + "eval_recall_macro": 0.49972867739407356, + "eval_runtime": 0.2676, + "eval_samples_per_second": 609.165, + "eval_steps_per_second": 3.737, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 54 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.3441122635753508, + "eval_auc": 0.3915867840995182, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.259556882103088, + "eval_f1_macro": 0.33544605079873757, + "eval_loss": 1.0551481246948242, + "eval_pr_auc": 0.12142208631760734, + "eval_precision": 0.15788003631031353, + "eval_precision_macro": 0.500391299247358, + "eval_pred_class_0": 5347, + "eval_pred_class_1": 14321, + "eval_predicted_binding_ratio": 0.7281370754525117, + "eval_recall": 0.7291196388261851, + "eval_recall_macro": 0.5005832394650029, + "eval_runtime": 0.2336, + "eval_samples_per_second": 697.796, + "eval_steps_per_second": 4.281, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 72 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.3457392719137686, + "eval_auc": 0.39218283153314865, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2597791072250345, + "eval_f1_macro": 0.3367955302889021, + "eval_loss": 1.0511513948440552, + "eval_pr_auc": 0.12154600341235242, + "eval_precision": 0.15809003710705033, + "eval_precision_macro": 0.5007720380521324, + "eval_pred_class_0": 5385, + "eval_pred_class_1": 14283, + "eval_predicted_binding_ratio": 0.7262050030506406, + "eval_recall": 0.72815220896485, + "eval_recall_macro": 0.5011558413086458, + "eval_runtime": 0.2629, + "eval_samples_per_second": 620.078, + "eval_steps_per_second": 3.804, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 90 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.34873906853772624, + "eval_auc": 0.39291782012189097, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.259894840238054, + "eval_f1_macro": 0.33921701928376435, + "eval_loss": 1.0462485551834106, + "eval_pr_auc": 0.1216940029412557, + "eval_precision": 0.1583133887089962, + "eval_precision_macro": 0.5011632853468087, + "eval_pred_class_0": 5462, + "eval_pred_class_1": 14206, + "eval_predicted_binding_ratio": 0.722290014236323, + "eval_recall": 0.7252499193808449, + "eval_recall_macro": 0.5017569691067321, + "eval_runtime": 0.2393, + "eval_samples_per_second": 681.219, + "eval_steps_per_second": 4.179, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 108 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.3517897091722595, + "eval_auc": 0.3937714770704174, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2599407906193766, + "eval_f1_macro": 0.3416488972772128, + "eval_loss": 1.0405118465423584, + "eval_pr_auc": 0.12187498322705145, + "eval_precision": 0.1585020529520034, + "eval_precision_macro": 0.5014812682659693, + "eval_pred_class_0": 5542, + "eval_pred_class_1": 14126, + "eval_predicted_binding_ratio": 0.7182224933902787, + "eval_recall": 0.7220251531763947, + "eval_recall_macro": 0.5022572195531276, + "eval_runtime": 0.2765, + "eval_samples_per_second": 589.571, + "eval_steps_per_second": 3.617, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 126 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.354586129753915, + "eval_auc": 0.3947741191129793, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2591338858410179, + "eval_f1_macro": 0.3436917965373002, + "eval_loss": 1.0338975191116333, + "eval_pr_auc": 0.12208733120990471, + "eval_precision": 0.1581985320316397, + "eval_precision_macro": 0.5009271275952343, + "eval_pred_class_0": 5635, + "eval_pred_class_1": 14033, + "eval_predicted_binding_ratio": 0.713494000406752, + "eval_recall": 0.7158980973879394, + "eval_recall_macro": 0.5014270471245847, + "eval_runtime": 0.2385, + "eval_samples_per_second": 683.567, + "eval_steps_per_second": 4.194, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 144 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.3598230628431971, + "eval_auc": 0.39592411118975185, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25913503971756396, + "eval_f1_macro": 0.3477761944928628, + "eval_loss": 1.0263975858688354, + "eval_pr_auc": 0.12233214426039367, + "eval_precision": 0.15848567727076435, + "eval_precision_macro": 0.5013938604573427, + "eval_pred_class_0": 5774, + "eval_pred_class_1": 13894, + "eval_predicted_binding_ratio": 0.70642668293675, + "eval_recall": 0.710093518219929, + "eval_recall_macro": 0.502176595531767, + "eval_runtime": 0.2745, + "eval_samples_per_second": 593.873, + "eval_steps_per_second": 3.643, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 162 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.3636872076469392, + "eval_auc": 0.3972021050928084, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25801861623288075, + "eval_f1_macro": 0.35051449303788773, + "eval_loss": 1.0180495977401733, + "eval_pr_auc": 0.12260540333611444, + "eval_precision": 0.15807060874618625, + "eval_precision_macro": 0.5006720376838353, + "eval_pred_class_0": 5902, + "eval_pred_class_1": 13766, + "eval_predicted_binding_ratio": 0.6999186495830791, + "eval_recall": 0.7017091260883586, + "eval_recall_macro": 0.5010628083511147, + "eval_runtime": 0.2708, + "eval_samples_per_second": 601.838, + "eval_steps_per_second": 3.692, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 180 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.3691275167785235, + "eval_auc": 0.39866621357342186, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.258604206500956, + "eval_f1_macro": 0.35478882891419483, + "eval_loss": 1.0087939500808716, + "eval_pr_auc": 0.12292033936081492, + "eval_precision": 0.1587092042537587, + "eval_precision_macro": 0.5016983780261003, + "eval_pred_class_0": 6033, + "eval_pred_class_1": 13635, + "eval_predicted_binding_ratio": 0.6932580841976815, + "eval_recall": 0.6978394066430184, + "eval_recall_macro": 0.5027194256611, + "eval_runtime": 0.2616, + "eval_samples_per_second": 623.126, + "eval_steps_per_second": 3.823, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 198 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.3740593858043523, + "eval_auc": 0.4002664991794433, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25823944086280654, + "eval_f1_macro": 0.35841740282728696, + "eval_loss": 0.9987770318984985, + "eval_pr_auc": 0.12326823892822446, + "eval_precision": 0.15878778897451096, + "eval_precision_macro": 0.5017853397238077, + "eval_pred_class_0": 6172, + "eval_pred_class_1": 13496, + "eval_predicted_binding_ratio": 0.6861907667276794, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.5028947176998165, + "eval_runtime": 0.258, + "eval_samples_per_second": 631.708, + "eval_steps_per_second": 3.876, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 216 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.37980475899938987, + "eval_auc": 0.40207323055334293, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25811945018854154, + "eval_f1_macro": 0.36265804779890953, + "eval_loss": 0.987876832485199, + "eval_pr_auc": 0.12366119818610516, + "eval_precision": 0.15905854133873024, + "eval_precision_macro": 0.5021624301446299, + "eval_pred_class_0": 6327, + "eval_pred_class_1": 13341, + "eval_predicted_binding_ratio": 0.6783099450884685, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.5035528974067893, + "eval_runtime": 0.254, + "eval_samples_per_second": 641.68, + "eval_steps_per_second": 3.937, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 234 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.3867703884482408, + "eval_auc": 0.40404318566725245, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2582866982350409, + "eval_f1_macro": 0.36779990383041317, + "eval_loss": 0.9760332107543945, + "eval_pr_auc": 0.12409453800524387, + "eval_precision": 0.1595744680851064, + "eval_precision_macro": 0.5028818867776484, + "eval_pred_class_0": 6508, + "eval_pred_class_1": 13160, + "eval_predicted_binding_ratio": 0.6691071791742933, + "eval_recall": 0.6772009029345373, + "eval_recall_macro": 0.5048043507851898, + "eval_runtime": 0.2717, + "eval_samples_per_second": 599.919, + "eval_steps_per_second": 3.68, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 252 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.39429530201342283, + "eval_auc": 0.40629005957398867, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2577107607950651, + "eval_f1_macro": 0.37306889008104693, + "eval_loss": 0.9632152915000916, + "eval_pr_auc": 0.12458802431940903, + "eval_precision": 0.15971578622181032, + "eval_precision_macro": 0.5029977740632862, + "eval_pred_class_0": 6720, + "eval_pred_class_1": 12948, + "eval_predicted_binding_ratio": 0.6583282489322758, + "eval_recall": 0.6668816510802967, + "eval_recall_macro": 0.5050772111259515, + "eval_runtime": 0.253, + "eval_samples_per_second": 644.248, + "eval_steps_per_second": 3.952, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 270 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.4020744356314826, + "eval_auc": 0.408681470822737, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25738822935084615, + "eval_f1_macro": 0.37848134871797623, + "eval_loss": 0.9496278166770935, + "eval_pr_auc": 0.12511500176534787, + "eval_precision": 0.16003140950137418, + "eval_precision_macro": 0.5033533652151325, + "eval_pred_class_0": 6933, + "eval_pred_class_1": 12735, + "eval_predicted_binding_ratio": 0.6474984746796827, + "eval_recall": 0.6572073524669462, + "eval_recall_macro": 0.5057630895249562, + "eval_runtime": 0.269, + "eval_samples_per_second": 606.014, + "eval_steps_per_second": 3.718, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 288 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.408989221069758, + "eval_auc": 0.4113766625614338, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.255349135169763, + "eval_f1_macro": 0.38271123621844805, + "eval_loss": 0.9350630640983582, + "eval_pr_auc": 0.1257149536327416, + "eval_precision": 0.15932528579422817, + "eval_precision_macro": 0.5022775332449281, + "eval_pred_class_0": 7159, + "eval_pred_class_1": 12509, + "eval_predicted_binding_ratio": 0.6360077282896075, + "eval_recall": 0.6426959045469204, + "eval_recall_macro": 0.5039700323120913, + "eval_runtime": 0.2687, + "eval_samples_per_second": 606.61, + "eval_steps_per_second": 3.722, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 306 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.4161582265609111, + "eval_auc": 0.41440477389195646, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2538176619663396, + "eval_f1_macro": 0.3871501973338609, + "eval_loss": 0.9196970462799072, + "eval_pr_auc": 0.12640556118775828, + "eval_precision": 0.158935546875, + "eval_precision_macro": 0.5016899956597223, + "eval_pred_class_0": 7380, + "eval_pred_class_1": 12288, + "eval_predicted_binding_ratio": 0.62477120195241, + "eval_recall": 0.6297968397291196, + "eval_recall_macro": 0.5029831666503388, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.105, + "eval_steps_per_second": 3.915, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 324 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.4237848281472443, + "eval_auc": 0.4176888499450513, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25100786464873437, + "eval_f1_macro": 0.3913994084656603, + "eval_loss": 0.9033117294311523, + "eval_pr_auc": 0.12714405405007598, + "eval_precision": 0.15785536159600996, + "eval_precision_macro": 0.5002421610284318, + "eval_pred_class_0": 7638, + "eval_pred_class_1": 12030, + "eval_predicted_binding_ratio": 0.611653447223917, + "eval_recall": 0.6123831022250886, + "eval_recall_macro": 0.5004331156685895, + "eval_runtime": 0.2637, + "eval_samples_per_second": 618.078, + "eval_steps_per_second": 3.792, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 342 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.4312080536912752, + "eval_auc": 0.4212995000006521, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24904343156340203, + "eval_f1_macro": 0.39564573885956833, + "eval_loss": 0.8857852220535278, + "eval_pr_auc": 0.12799421494868934, + "eval_precision": 0.1572566971854866, + "eval_precision_macro": 0.49948708843014167, + "eval_pred_class_0": 7872, + "eval_pred_class_1": 11796, + "eval_predicted_binding_ratio": 0.5997559487492373, + "eval_recall": 0.5981941309255079, + "eval_recall_macro": 0.4990729210793411, + "eval_runtime": 0.2709, + "eval_samples_per_second": 601.789, + "eval_steps_per_second": 3.692, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 360 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.4394956274150905, + "eval_auc": 0.42538333442304876, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24493150684931506, + "eval_f1_macro": 0.399632635701501, + "eval_loss": 0.8671084642410278, + "eval_pr_auc": 0.12894871744717554, + "eval_precision": 0.15549178189407775, + "eval_precision_macro": 0.4973810972146359, + "eval_pred_class_0": 8169, + "eval_pred_class_1": 11499, + "eval_predicted_binding_ratio": 0.5846552776082977, + "eval_recall": 0.5765881973556917, + "eval_recall_macro": 0.4952114645256155, + "eval_runtime": 0.2681, + "eval_samples_per_second": 607.95, + "eval_steps_per_second": 3.73, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 378 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.44824079723408583, + "eval_auc": 0.42976391273864795, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24080033580523297, + "eval_f1_macro": 0.4037241835563183, + "eval_loss": 0.8476783633232117, + "eval_pr_auc": 0.13001972671009082, + "eval_precision": 0.15375681229339766, + "eval_precision_macro": 0.495462476943159, + "eval_pred_class_0": 8475, + "eval_pred_class_1": 11193, + "eval_predicted_binding_ratio": 0.5690970103721782, + "eval_recall": 0.5549822637858756, + "eval_recall_macro": 0.491621632285284, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.096, + "eval_steps_per_second": 3.817, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 396 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.46044335977221884, + "eval_auc": 0.4345819960798662, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.23819095477386934, + "eval_f1_macro": 0.4102471738365922, + "eval_loss": 0.8271914720535278, + "eval_pr_auc": 0.1312038077210987, + "eval_precision": 0.15319974143503556, + "eval_precision_macro": 0.49502955733365084, + "eval_pred_class_0": 8839, + "eval_pred_class_1": 10829, + "eval_predicted_binding_ratio": 0.5505897905226764, + "eval_recall": 0.5349887133182845, + "eval_recall_macro": 0.4907393617898237, + "eval_runtime": 0.2699, + "eval_samples_per_second": 603.817, + "eval_steps_per_second": 3.704, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 414 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.4719849501728696, + "eval_auc": 0.4399397854182523, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2337489854644728, + "eval_f1_macro": 0.4154821023975197, + "eval_loss": 0.8054794669151306, + "eval_pr_auc": 0.13253606290408437, + "eval_precision": 0.1515499425947187, + "eval_precision_macro": 0.4934724539362483, + "eval_pred_class_0": 9216, + "eval_pred_class_1": 10452, + "eval_predicted_binding_ratio": 0.5314215985356925, + "eval_recall": 0.5108029667849081, + "eval_recall_macro": 0.48776099326147077, + "eval_runtime": 0.2459, + "eval_samples_per_second": 662.968, + "eval_steps_per_second": 4.067, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 432 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.48713646532438476, + "eval_auc": 0.4457222328836341, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.23099794160250056, + "eval_f1_macro": 0.42313846887516615, + "eval_loss": 0.7826969027519226, + "eval_pr_auc": 0.13403779679155806, + "eval_precision": 0.15125798722044728, + "eval_precision_macro": 0.4934698556077371, + "eval_pred_class_0": 9652, + "eval_pred_class_1": 10016, + "eval_predicted_binding_ratio": 0.5092536099247509, + "eval_recall": 0.48855207997420186, + "eval_recall_macro": 0.48771178574674356, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.772, + "eval_steps_per_second": 3.772, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 450 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.506152125279642, + "eval_auc": 0.452125351005008, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22648721828462212, + "eval_f1_macro": 0.43188881620754876, + "eval_loss": 0.7587484121322632, + "eval_pr_auc": 0.13570124162691763, + "eval_precision": 0.15038071065989847, + "eval_precision_macro": 0.492983148122742, + "eval_pred_class_0": 10212, + "eval_pred_class_1": 9456, + "eval_predicted_binding_ratio": 0.4807809640024405, + "eval_recall": 0.4585617542728152, + "eval_recall_macro": 0.48681090671327726, + "eval_runtime": 0.2665, + "eval_samples_per_second": 611.566, + "eval_steps_per_second": 3.752, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 468 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.529997966239577, + "eval_auc": 0.4588746151842906, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22371514947934162, + "eval_f1_macro": 0.4433436473661838, + "eval_loss": 0.7342172861099243, + "eval_pr_auc": 0.13749280910612846, + "eval_precision": 0.15124332916997843, + "eval_precision_macro": 0.4941834913044441, + "eval_pred_class_0": 10861, + "eval_pred_class_1": 8807, + "eval_predicted_binding_ratio": 0.44778320113890585, + "eval_recall": 0.4295388584327636, + "eval_recall_macro": 0.4891703467029515, + "eval_runtime": 0.2591, + "eval_samples_per_second": 629.173, + "eval_steps_per_second": 3.86, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 486 + }, + { + "epoch": 27.77777777777778, + "grad_norm": 191838.453125, + "learning_rate": 5.544444444444443e-07, + "loss": 0.954, + "step": 500 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.5579621720561317, + "eval_auc": 0.46628288633295734, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22208303507516106, + "eval_f1_macro": 0.4566736198103078, + "eval_loss": 0.7085328102111816, + "eval_pr_auc": 0.1397221721421834, + "eval_precision": 0.15368421052631578, + "eval_precision_macro": 0.4966212823527809, + "eval_pred_class_0": 11593, + "eval_pred_class_1": 8075, + "eval_predicted_binding_ratio": 0.4105653853976002, + "eval_recall": 0.400193485972267, + "eval_recall_macro": 0.49384334768221605, + "eval_runtime": 0.245, + "eval_samples_per_second": 665.264, + "eval_steps_per_second": 4.081, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 504 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.5890278625177954, + "eval_auc": 0.47432292318642716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21926011784023955, + "eval_f1_macro": 0.4701862470303913, + "eval_loss": 0.6820237636566162, + "eval_pr_auc": 0.1419831923592407, + "eval_precision": 0.15650854936569222, + "eval_precision_macro": 0.4990822386003719, + "eval_pred_class_0": 12416, + "eval_pred_class_1": 7252, + "eval_predicted_binding_ratio": 0.36872076469391907, + "eval_recall": 0.36601096420509516, + "eval_recall_macro": 0.49839149043235986, + "eval_runtime": 0.239, + "eval_samples_per_second": 681.884, + "eval_steps_per_second": 4.183, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 522 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.62385600976205, + "eval_auc": 0.4829859859606367, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21797040169133192, + "eval_f1_macro": 0.4851734455906117, + "eval_loss": 0.6552286148071289, + "eval_pr_auc": 0.144533301478986, + "eval_precision": 0.16213241075640825, + "eval_precision_macro": 0.5032992807407407, + "eval_pred_class_0": 13309, + "eval_pred_class_1": 6359, + "eval_predicted_binding_ratio": 0.32331706324994913, + "eval_recall": 0.3324733956788133, + "eval_recall_macro": 0.5054351043101014, + "eval_runtime": 0.2289, + "eval_samples_per_second": 712.13, + "eval_steps_per_second": 4.369, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 540 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.6593959731543624, + "eval_auc": 0.4923438323703967, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21308586867144366, + "eval_f1_macro": 0.49787408315316334, + "eval_loss": 0.6283431053161621, + "eval_pr_auc": 0.14738118302130468, + "eval_precision": 0.16759053954175906, + "eval_precision_macro": 0.5068452136541568, + "eval_pred_class_0": 14256, + "eval_pred_class_1": 5412, + "eval_predicted_binding_ratio": 0.2751677852348993, + "eval_recall": 0.2924862947436311, + "eval_recall_macro": 0.5102800882784371, + "eval_runtime": 0.2716, + "eval_samples_per_second": 600.185, + "eval_steps_per_second": 3.682, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 558 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.6967154769168192, + "eval_auc": 0.5026651961769109, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21316449017280042, + "eval_f1_macro": 0.5126600281126953, + "eval_loss": 0.6013967990875244, + "eval_pr_auc": 0.15068393162958252, + "eval_precision": 0.18035714285714285, + "eval_precision_macro": 0.5146913446706046, + "eval_pred_class_0": 15188, + "eval_pred_class_1": 4480, + "eval_predicted_binding_ratio": 0.2277811673784828, + "eval_recall": 0.2605611093195743, + "eval_recall_macro": 0.5194578347949956, + "eval_runtime": 0.2705, + "eval_samples_per_second": 602.56, + "eval_steps_per_second": 3.697, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 576 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.7287472035794184, + "eval_auc": 0.5136348320064595, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20692730786383232, + "eval_f1_macro": 0.5216610840892347, + "eval_loss": 0.575495719909668, + "eval_pr_auc": 0.15441446935423722, + "eval_precision": 0.19194704908990623, + "eval_precision_macro": 0.5210140431835268, + "eval_pred_class_0": 16042, + "eval_pred_class_1": 3626, + "eval_predicted_binding_ratio": 0.18436038234695953, + "eval_recall": 0.22444372782973235, + "eval_recall_macro": 0.5237930596654548, + "eval_runtime": 0.2647, + "eval_samples_per_second": 615.887, + "eval_steps_per_second": 3.778, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 594 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.7577791336180598, + "eval_auc": 0.5256758602512032, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20013431833445267, + "eval_f1_macro": 0.5287070633014385, + "eval_loss": 0.5502753853797913, + "eval_pr_auc": 0.15881070257620672, + "eval_precision": 0.20875656742556917, + "eval_precision_macro": 0.5298823579410603, + "eval_pred_class_0": 16813, + "eval_pred_class_1": 2855, + "eval_predicted_binding_ratio": 0.14515965019320723, + "eval_recall": 0.19219606578523057, + "eval_recall_macro": 0.5279203302306971, + "eval_runtime": 0.2621, + "eval_samples_per_second": 621.974, + "eval_steps_per_second": 3.816, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 612 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.7844213951596501, + "eval_auc": 0.5388391234856937, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.19575113808801214, + "eval_f1_macro": 0.5356397775926205, + "eval_loss": 0.5265588164329529, + "eval_pr_auc": 0.16395620275178963, + "eval_precision": 0.23767848917549517, + "eval_precision_macro": 0.5449694383352471, + "eval_pred_class_0": 17497, + "eval_pred_class_1": 2171, + "eval_predicted_binding_ratio": 0.11038234695952817, + "eval_recall": 0.16639793614962914, + "eval_recall_macro": 0.5332502748895668, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.866, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 630 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.8094366483628228, + "eval_auc": 0.5531888075405533, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.19189305735230702, + "eval_f1_macro": 0.5419376520838427, + "eval_loss": 0.5044229626655579, + "eval_pr_auc": 0.16987983494600534, + "eval_precision": 0.28952504879635654, + "eval_precision_macro": 0.5715178054086024, + "eval_pred_class_0": 18131, + "eval_pred_class_1": 1537, + "eval_predicted_binding_ratio": 0.07814724425462681, + "eval_recall": 0.14350209609803288, + "eval_recall_macro": 0.5387939646905326, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.446, + "eval_steps_per_second": 3.794, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 648 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.827791336180598, + "eval_auc": 0.5689342779333475, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.18796451690242147, + "eval_f1_macro": 0.5458235779450257, + "eval_loss": 0.4842270016670227, + "eval_pr_auc": 0.177326879876991, + "eval_precision": 0.3663551401869159, + "eval_precision_macro": 0.6103471582212137, + "eval_pred_class_0": 18598, + "eval_pred_class_1": 1070, + "eval_predicted_binding_ratio": 0.05440309131584299, + "eval_recall": 0.12641083521444696, + "eval_recall_macro": 0.5427430526648682, + "eval_runtime": 0.2392, + "eval_samples_per_second": 681.483, + "eval_steps_per_second": 4.181, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 666 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.8386719544437665, + "eval_auc": 0.5868017348062602, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.1820056715648363, + "eval_f1_macro": 0.5462584975699354, + "eval_loss": 0.46564891934394836, + "eval_pr_auc": 0.18685168882837525, + "eval_precision": 0.4537275064267352, + "eval_precision_macro": 0.6541268553838282, + "eval_pred_class_0": 18890, + "eval_pred_class_1": 778, + "eval_predicted_binding_ratio": 0.039556640227781166, + "eval_recall": 0.11383424701709126, + "eval_recall_macro": 0.5440904198204911, + "eval_runtime": 0.2648, + "eval_samples_per_second": 615.584, + "eval_steps_per_second": 3.777, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 684 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.8441631075859264, + "eval_auc": 0.6057814605899876, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.17982338774417983, + "eval_f1_macro": 0.5468627318225942, + "eval_loss": 0.4492926001548767, + "eval_pr_auc": 0.19848375437748741, + "eval_precision": 0.5283018867924528, + "eval_precision_macro": 0.6915101279275421, + "eval_pred_class_0": 19032, + "eval_pred_class_1": 636, + "eval_predicted_binding_ratio": 0.03233679072605247, + "eval_recall": 0.10835214446952596, + "eval_recall_macro": 0.5451219284549598, + "eval_runtime": 0.2567, + "eval_samples_per_second": 635.049, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 702 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.8456375838926175, + "eval_auc": 0.6262280880815292, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.1758957654723127, + "eval_f1_macro": 0.5453696262568565, + "eval_loss": 0.43461790680885315, + "eval_pr_auc": 0.21275175506055685, + "eval_precision": 0.5557461406518011, + "eval_precision_macro": 0.7051195990133514, + "eval_pred_class_0": 19085, + "eval_pred_class_1": 583, + "eval_predicted_binding_ratio": 0.0296420581655481, + "eval_recall": 0.10448242502418574, + "eval_recall_macro": 0.544424468382196, + "eval_runtime": 0.2668, + "eval_samples_per_second": 610.873, + "eval_steps_per_second": 3.748, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 720 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.8483323164531218, + "eval_auc": 0.6481986497247736, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.18519530183010108, + "eval_f1_macro": 0.5507896621273841, + "eval_loss": 0.42107364535331726, + "eval_pr_auc": 0.23051421419341214, + "eval_precision": 0.6053571428571428, + "eval_precision_macro": 0.730405178085469, + "eval_pred_class_0": 19108, + "eval_pred_class_1": 560, + "eval_predicted_binding_ratio": 0.02847264592231035, + "eval_recall": 0.10931957433086101, + "eval_recall_macro": 0.5479899012476421, + "eval_runtime": 0.2619, + "eval_samples_per_second": 622.385, + "eval_steps_per_second": 3.818, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 738 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.8502135448444174, + "eval_auc": 0.6709813300109956, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20292207792207792, + "eval_f1_macro": 0.5601310726310726, + "eval_loss": 0.4084097743034363, + "eval_pr_auc": 0.25093797354762637, + "eval_precision": 0.6302521008403361, + "eval_precision_macro": 0.7436637739036264, + "eval_pred_class_0": 19073, + "eval_pred_class_1": 595, + "eval_predicted_binding_ratio": 0.03025218629245475, + "eval_recall": 0.12092873266688164, + "eval_recall_macro": 0.5538246608949184, + "eval_runtime": 0.2691, + "eval_samples_per_second": 605.746, + "eval_steps_per_second": 3.716, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 756 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.8526540573520439, + "eval_auc": 0.6936772353365158, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22263948497854077, + "eval_f1_macro": 0.5706266398157138, + "eval_loss": 0.39666271209716797, + "eval_pr_auc": 0.2738840395423864, + "eval_precision": 0.6618819776714514, + "eval_precision_macro": 0.7604089789622948, + "eval_pred_class_0": 19041, + "eval_pred_class_1": 627, + "eval_predicted_binding_ratio": 0.031879194630872486, + "eval_recall": 0.13382779748468235, + "eval_recall_macro": 0.5605156371379469, + "eval_runtime": 0.2268, + "eval_samples_per_second": 718.638, + "eval_steps_per_second": 4.409, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 774 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.8544844417327638, + "eval_auc": 0.7158095511124275, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24325753569539926, + "eval_f1_macro": 0.58138013196988, + "eval_loss": 0.38576817512512207, + "eval_pr_auc": 0.29905525248581355, + "eval_precision": 0.6754772393538914, + "eval_precision_macro": 0.7681910344870789, + "eval_pred_class_0": 18987, + "eval_pred_class_1": 681, + "eval_predicted_binding_ratio": 0.03462477120195241, + "eval_recall": 0.14833924540470816, + "eval_recall_macro": 0.5674997367845657, + "eval_runtime": 0.2567, + "eval_samples_per_second": 635.061, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 792 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.8575859263778727, + "eval_auc": 0.737084336405228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2808729139922978, + "eval_f1_macro": 0.60092007766148, + "eval_loss": 0.37560486793518066, + "eval_pr_auc": 0.3260256629572295, + "eval_precision": 0.6889168765743073, + "eval_precision_macro": 0.7767992245539758, + "eval_pred_class_0": 18874, + "eval_pred_class_1": 794, + "eval_predicted_binding_ratio": 0.040370144396990035, + "eval_recall": 0.1763947113834247, + "eval_recall_macro": 0.5807427773130077, + "eval_runtime": 0.2685, + "eval_samples_per_second": 607.1, + "eval_steps_per_second": 3.725, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 810 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.8611450071181614, + "eval_auc": 0.7571642141385685, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.32451150136037593, + "eval_f1_macro": 0.623565358817779, + "eval_loss": 0.36611661314964294, + "eval_pr_auc": 0.3532860869347882, + "eval_precision": 0.6963906581740976, + "eval_precision_macro": 0.7829117661264593, + "eval_pred_class_0": 18726, + "eval_pred_class_1": 942, + "eval_predicted_binding_ratio": 0.047895057962172055, + "eval_recall": 0.21154466301193164, + "eval_recall_macro": 0.5971407144358867, + "eval_runtime": 0.2698, + "eval_samples_per_second": 604.115, + "eval_steps_per_second": 3.706, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 828 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.8646532438478747, + "eval_auc": 0.7759390708192488, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.3694931312174325, + "eval_f1_macro": 0.6468414565354121, + "eval_loss": 0.3574466407299042, + "eval_pr_auc": 0.380672409235741, + "eval_precision": 0.695807314897413, + "eval_precision_macro": 0.7853328912870632, + "eval_pred_class_0": 18547, + "eval_pred_class_1": 1121, + "eval_predicted_binding_ratio": 0.05699613585519626, + "eval_recall": 0.25153176394711385, + "eval_recall_macro": 0.6154743385438473, + "eval_runtime": 0.2501, + "eval_samples_per_second": 651.868, + "eval_steps_per_second": 3.999, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 846 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.8670937563555013, + "eval_auc": 0.7932697219796829, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.4004587155963303, + "eval_f1_macro": 0.6628608765538834, + "eval_loss": 0.34963178634643555, + "eval_pr_auc": 0.40879055918048346, + "eval_precision": 0.69340746624305, + "eval_precision_macro": 0.7861898540406407, + "eval_pred_class_0": 18409, + "eval_pred_class_1": 1259, + "eval_predicted_binding_ratio": 0.06401260931462274, + "eval_recall": 0.2815220896485005, + "eval_recall_macro": 0.6291113798275701, + "eval_runtime": 0.2518, + "eval_samples_per_second": 647.281, + "eval_steps_per_second": 3.971, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 864 + }, + { + "epoch": 49.0, + "eval_accuracy": 0.8693308928208257, + "eval_auc": 0.8090460638591691, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.43491644678979774, + "eval_f1_macro": 0.6805201987887128, + "eval_loss": 0.3424255847930908, + "eval_pr_auc": 0.43548439720530613, + "eval_precision": 0.6834830684174154, + "eval_precision_macro": 0.783786427463743, + "eval_pred_class_0": 18221, + "eval_pred_class_1": 1447, + "eval_predicted_binding_ratio": 0.07357128330282693, + "eval_recall": 0.3189293776201225, + "eval_recall_macro": 0.6456420293062284, + "eval_runtime": 0.2567, + "eval_samples_per_second": 635.097, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 882 + }, + { + "epoch": 50.0, + "eval_accuracy": 0.8711612772015457, + "eval_auc": 0.8231584209269593, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.46153846153846156, + "eval_f1_macro": 0.6941824562962304, + "eval_loss": 0.3358187675476074, + "eval_pr_auc": 0.46013674866792464, + "eval_precision": 0.6766355140186916, + "eval_precision_macro": 0.7825407542966181, + "eval_pred_class_0": 18063, + "eval_pred_class_1": 1605, + "eval_predicted_binding_ratio": 0.0816046369737645, + "eval_recall": 0.35020960980328925, + "eval_recall_macro": 0.659441136162585, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.797, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 900 + }, + { + "epoch": 51.0, + "eval_accuracy": 0.8745169818995322, + "eval_auc": 0.8357514570475526, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.49363972096840375, + "eval_f1_macro": 0.7110123043354003, + "eval_loss": 0.32985639572143555, + "eval_pr_auc": 0.48277553791567623, + "eval_precision": 0.6785109983079526, + "eval_precision_macro": 0.7862239260888744, + "eval_pred_class_0": 17895, + "eval_pred_class_1": 1773, + "eval_predicted_binding_ratio": 0.0901464307504576, + "eval_recall": 0.38793937439535636, + "eval_recall_macro": 0.6767668140160521, + "eval_runtime": 0.2496, + "eval_samples_per_second": 653.0, + "eval_steps_per_second": 4.006, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 918 + }, + { + "epoch": 52.0, + "eval_accuracy": 0.8772117144600367, + "eval_auc": 0.8463606400457255, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.523574669560071, + "eval_f1_macro": 0.7265493507137326, + "eval_loss": 0.3246362507343292, + "eval_pr_auc": 0.5014434788718165, + "eval_precision": 0.6742886178861789, + "eval_precision_macro": 0.787031314592807, + "eval_pred_class_0": 17700, + "eval_pred_class_1": 1968, + "eval_predicted_binding_ratio": 0.10006101281269067, + "eval_recall": 0.4279264753305385, + "eval_recall_macro": 0.6946175504557563, + "eval_runtime": 0.2301, + "eval_samples_per_second": 708.248, + "eval_steps_per_second": 4.345, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 936 + }, + { + "epoch": 53.0, + "eval_accuracy": 0.878991254830181, + "eval_auc": 0.8556500280578212, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5445847684653655, + "eval_f1_macro": 0.7374052543587455, + "eval_loss": 0.3201504647731781, + "eval_pr_auc": 0.5184804467620471, + "eval_precision": 0.6696470588235294, + "eval_precision_macro": 0.786998185969936, + "eval_pred_class_0": 17543, + "eval_pred_class_1": 2125, + "eval_predicted_binding_ratio": 0.10804352247305267, + "eval_recall": 0.45888423089326025, + "eval_recall_macro": 0.7082554190018906, + "eval_runtime": 0.2666, + "eval_samples_per_second": 611.344, + "eval_steps_per_second": 3.751, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 954 + }, + { + "epoch": 54.0, + "eval_accuracy": 0.8792454748830588, + "eval_auc": 0.8636336358823378, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5594509367464292, + "eval_f1_macro": 0.744742407539513, + "eval_loss": 0.31616976857185364, + "eval_pr_auc": 0.5331175601979875, + "eval_precision": 0.6585152838427948, + "eval_precision_macro": 0.7834238290545543, + "eval_pred_class_0": 17378, + "eval_pred_class_1": 2290, + "eval_predicted_binding_ratio": 0.11643278421801911, + "eval_recall": 0.48629474363108677, + "eval_recall_macro": 0.719546237029523, + "eval_runtime": 0.2694, + "eval_samples_per_second": 604.964, + "eval_steps_per_second": 3.711, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 972 + }, + { + "epoch": 55.0, + "eval_accuracy": 0.8814317673378076, + "eval_auc": 0.8703512791725087, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.578757225433526, + "eval_f1_macro": 0.7548815712966447, + "eval_loss": 0.31280621886253357, + "eval_pr_auc": 0.5453871030590061, + "eval_precision": 0.657905544147844, + "eval_precision_macro": 0.7854606348952531, + "eval_pred_class_0": 17233, + "eval_pred_class_1": 2435, + "eval_predicted_binding_ratio": 0.12380516575147447, + "eval_recall": 0.5166075459529185, + "eval_recall_macro": 0.7331634337478723, + "eval_runtime": 0.3648, + "eval_samples_per_second": 446.874, + "eval_steps_per_second": 2.742, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 990 + }, + { + "epoch": 55.55555555555556, + "grad_norm": 18517.669921875, + "learning_rate": 9.996314582053105e-07, + "loss": 0.4604, + "step": 1000 + }, + { + "epoch": 56.0, + "eval_accuracy": 0.8834146837502542, + "eval_auc": 0.8759527216222862, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5956621407159232, + "eval_f1_macro": 0.7637749289648232, + "eval_loss": 0.3100614845752716, + "eval_pr_auc": 0.5551596710183998, + "eval_precision": 0.6571984435797665, + "eval_precision_macro": 0.7873078426812156, + "eval_pred_class_0": 17098, + "eval_pred_class_1": 2570, + "eval_predicted_binding_ratio": 0.1306691071791743, + "eval_recall": 0.5446630119316349, + "eval_recall_macro": 0.745742503732462, + "eval_runtime": 0.2507, + "eval_samples_per_second": 650.302, + "eval_steps_per_second": 3.99, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1008 + }, + { + "epoch": 57.0, + "eval_accuracy": 0.8840756558877364, + "eval_auc": 0.8809651824326759, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6064894718674491, + "eval_f1_macro": 0.7692574960553631, + "eval_loss": 0.30748215317726135, + "eval_pr_auc": 0.5634298069700459, + "eval_precision": 0.6524322317118455, + "eval_precision_macro": 0.7866284869899434, + "eval_pred_class_0": 16975, + "eval_pred_class_1": 2693, + "eval_predicted_binding_ratio": 0.13692292047996746, + "eval_recall": 0.5665914221218962, + "eval_recall_macro": 0.7550467824679621, + "eval_runtime": 0.265, + "eval_samples_per_second": 615.163, + "eval_steps_per_second": 3.774, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1026 + }, + { + "epoch": 58.0, + "eval_accuracy": 0.8849908480780964, + "eval_auc": 0.8853361353068065, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6121399176954733, + "eval_f1_macro": 0.7723127955239544, + "eval_loss": 0.30511021614074707, + "eval_pr_auc": 0.5712324508006517, + "eval_precision": 0.6536067374588063, + "eval_precision_macro": 0.7879535133831199, + "eval_pred_class_0": 16937, + "eval_pred_class_1": 2731, + "eval_predicted_binding_ratio": 0.13885499288183853, + "eval_recall": 0.5756207674943566, + "eval_recall_macro": 0.7592596503615321, + "eval_runtime": 0.2627, + "eval_samples_per_second": 620.515, + "eval_steps_per_second": 3.807, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1044 + }, + { + "epoch": 59.0, + "eval_accuracy": 0.8853467561521253, + "eval_auc": 0.8892084338643703, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.616822429906542, + "eval_f1_macro": 0.7747051971959543, + "eval_loss": 0.3030014634132385, + "eval_pr_auc": 0.5778370115776272, + "eval_precision": 0.6519396551724138, + "eval_precision_macro": 0.7878864350252024, + "eval_pred_class_0": 16884, + "eval_pred_class_1": 2784, + "eval_predicted_binding_ratio": 0.1415497254423429, + "eval_recall": 0.5852950661077072, + "eval_recall_macro": 0.7634026486450891, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.008, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1062 + }, + { + "epoch": 60.0, + "eval_accuracy": 0.884685784014643, + "eval_auc": 0.8924710108272688, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6202277294038848, + "eval_f1_macro": 0.7761251343338811, + "eval_loss": 0.3011925220489502, + "eval_pr_auc": 0.5832812236308141, + "eval_precision": 0.6450714036920934, + "eval_precision_macro": 0.7853564436451774, + "eval_pred_class_0": 16797, + "eval_pred_class_1": 2871, + "eval_predicted_binding_ratio": 0.14597315436241612, + "eval_recall": 0.5972267010641729, + "eval_recall_macro": 0.7678594421600216, + "eval_runtime": 0.2437, + "eval_samples_per_second": 668.98, + "eval_steps_per_second": 4.104, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1080 + }, + { + "epoch": 61.0, + "eval_accuracy": 0.8856518202155786, + "eval_auc": 0.8954398707041407, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6242272347535506, + "eval_f1_macro": 0.7783964874556335, + "eval_loss": 0.29942846298217773, + "eval_pr_auc": 0.5886815510653964, + "eval_precision": 0.6477115117891817, + "eval_precision_macro": 0.7871243450270979, + "eval_pred_class_0": 16784, + "eval_pred_class_1": 2884, + "eval_predicted_binding_ratio": 0.14663412649989832, + "eval_recall": 0.6023863269912931, + "eval_recall_macro": 0.7705297965613797, + "eval_runtime": 0.2695, + "eval_samples_per_second": 604.921, + "eval_steps_per_second": 3.711, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1098 + }, + { + "epoch": 62.0, + "eval_accuracy": 0.8865161683953631, + "eval_auc": 0.8978366542923133, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6288659793814433, + "eval_f1_macro": 0.7809416026191173, + "eval_loss": 0.29785510897636414, + "eval_pr_auc": 0.593021329597711, + "eval_precision": 0.6491589426707861, + "eval_precision_macro": 0.7884708470441367, + "eval_pred_class_0": 16755, + "eval_pred_class_1": 2913, + "eval_predicted_binding_ratio": 0.14810860280658938, + "eval_recall": 0.6098032892615285, + "eval_recall_macro": 0.7740571948209012, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.719, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1116 + }, + { + "epoch": 63.0, + "eval_accuracy": 0.8871262965222697, + "eval_auc": 0.8998632136201572, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6315964155326916, + "eval_f1_macro": 0.7824748814379159, + "eval_loss": 0.2964444160461426, + "eval_pr_auc": 0.5970041919243015, + "eval_precision": 0.6505982905982906, + "eval_precision_macro": 0.789523000044412, + "eval_pred_class_0": 16743, + "eval_pred_class_1": 2925, + "eval_predicted_binding_ratio": 0.14871873093349602, + "eval_recall": 0.6136730087068688, + "eval_recall_macro": 0.7759920545435715, + "eval_runtime": 0.2695, + "eval_samples_per_second": 604.792, + "eval_steps_per_second": 3.71, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1134 + }, + { + "epoch": 64.0, + "eval_accuracy": 0.8876855806386008, + "eval_auc": 0.9017429582012333, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6356589147286822, + "eval_f1_macro": 0.7846343742639293, + "eval_loss": 0.2951850891113281, + "eval_pr_auc": 0.6005268804358049, + "eval_precision": 0.650573936529372, + "eval_precision_macro": 0.7901498917652248, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.6214124475975492, + "eval_recall_macro": 0.7794694277584535, + "eval_runtime": 0.2675, + "eval_samples_per_second": 609.376, + "eval_steps_per_second": 3.739, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1152 + }, + { + "epoch": 65.0, + "eval_accuracy": 0.8881940207443563, + "eval_auc": 0.9033021142666618, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6377861966727063, + "eval_f1_macro": 0.785840340182137, + "eval_loss": 0.2939698398113251, + "eval_pr_auc": 0.6035211605243039, + "eval_precision": 0.6518518518518519, + "eval_precision_macro": 0.7910415086304414, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.6243147371815544, + "eval_recall_macro": 0.7809507530297222, + "eval_runtime": 0.2684, + "eval_samples_per_second": 607.415, + "eval_steps_per_second": 3.726, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1170 + }, + { + "epoch": 66.0, + "eval_accuracy": 0.8882448647549319, + "eval_auc": 0.9048048023731414, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6389618922470434, + "eval_f1_macro": 0.7864263262967652, + "eval_loss": 0.29283198714256287, + "eval_pr_auc": 0.6066927627742578, + "eval_precision": 0.6511550050217609, + "eval_precision_macro": 0.7909273016835919, + "eval_pred_class_0": 16681, + "eval_pred_class_1": 2987, + "eval_predicted_binding_ratio": 0.1518710595891804, + "eval_recall": 0.6272170267655595, + "eval_recall_macro": 0.7821604539875966, + "eval_runtime": 0.2764, + "eval_samples_per_second": 589.68, + "eval_steps_per_second": 3.618, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1188 + }, + { + "epoch": 67.0, + "eval_accuracy": 0.888346552776083, + "eval_auc": 0.9061457752769495, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6402359108781127, + "eval_f1_macro": 0.7870775124924988, + "eval_loss": 0.29169291257858276, + "eval_pr_auc": 0.6096183698390041, + "eval_precision": 0.6506826506826506, + "eval_precision_macro": 0.7909278839971909, + "eval_pred_class_0": 16665, + "eval_pred_class_1": 3003, + "eval_predicted_binding_ratio": 0.15268456375838926, + "eval_recall": 0.6301193163495646, + "eval_recall_macro": 0.783400335424737, + "eval_runtime": 0.2635, + "eval_samples_per_second": 618.516, + "eval_steps_per_second": 3.795, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1206 + }, + { + "epoch": 68.0, + "eval_accuracy": 0.8887533048606874, + "eval_auc": 0.9074730837522218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.641781270464964, + "eval_f1_macro": 0.7879665952661885, + "eval_loss": 0.2904839515686035, + "eval_pr_auc": 0.6127271933864005, + "eval_precision": 0.6518124376454939, + "eval_precision_macro": 0.7916645766644131, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6320541760722348, + "eval_recall_macro": 0.7844281262446042, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.958, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1224 + }, + { + "epoch": 69.0, + "eval_accuracy": 0.889261744966443, + "eval_auc": 0.9085174295528148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6431847968545217, + "eval_f1_macro": 0.788822778783544, + "eval_loss": 0.28959015011787415, + "eval_pr_auc": 0.6152976575518759, + "eval_precision": 0.6536796536796536, + "eval_precision_macro": 0.7926964124983926, + "eval_pred_class_0": 16665, + "eval_pred_class_1": 3003, + "eval_predicted_binding_ratio": 0.15268456375838926, + "eval_recall": 0.6330216059335698, + "eval_recall_macro": 0.7851231045301337, + "eval_runtime": 0.2452, + "eval_samples_per_second": 664.894, + "eval_steps_per_second": 4.079, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1242 + }, + { + "epoch": 70.0, + "eval_accuracy": 0.889363432987594, + "eval_auc": 0.9095093658465239, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6446766819072501, + "eval_f1_macro": 0.7895790973067505, + "eval_loss": 0.2887136936187744, + "eval_pr_auc": 0.6176593727552148, + "eval_precision": 0.6529937148527952, + "eval_precision_macro": 0.7926428472131204, + "eval_pred_class_0": 16645, + "eval_pred_class_1": 3023, + "eval_predicted_binding_ratio": 0.15370144396990035, + "eval_recall": 0.636568848758465, + "eval_recall_macro": 0.7866251016291872, + "eval_runtime": 0.2634, + "eval_samples_per_second": 618.817, + "eval_steps_per_second": 3.796, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1260 + }, + { + "epoch": 71.0, + "eval_accuracy": 0.889821029082774, + "eval_auc": 0.9104295930879169, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6464349812367434, + "eval_f1_macro": 0.7905888279869988, + "eval_loss": 0.28783899545669556, + "eval_pr_auc": 0.619972501272285, + "eval_precision": 0.6542272126816381, + "eval_precision_macro": 0.7934597601869728, + "eval_pred_class_0": 16640, + "eval_pred_class_1": 3028, + "eval_predicted_binding_ratio": 0.1539556640227781, + "eval_recall": 0.6388261851015802, + "eval_recall_macro": 0.7878141307592769, + "eval_runtime": 0.2615, + "eval_samples_per_second": 623.334, + "eval_steps_per_second": 3.824, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1278 + }, + { + "epoch": 72.0, + "eval_accuracy": 0.8899735611145008, + "eval_auc": 0.911281293804153, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.646289637136319, + "eval_f1_macro": 0.7905721170208057, + "eval_loss": 0.28697267174720764, + "eval_pr_auc": 0.6225153426830469, + "eval_precision": 0.6552867086509778, + "eval_precision_macro": 0.7938916277024632, + "eval_pred_class_0": 16651, + "eval_pred_class_1": 3017, + "eval_predicted_binding_ratio": 0.15339637990644703, + "eval_recall": 0.6375362786198001, + "eval_recall_macro": 0.7873804408732489, + "eval_runtime": 0.2764, + "eval_samples_per_second": 589.762, + "eval_steps_per_second": 3.618, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1296 + }, + { + "epoch": 73.0, + "eval_accuracy": 0.890736221273134, + "eval_auc": 0.911925575502615, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6505122784192552, + "eval_f1_macro": 0.7928790035842321, + "eval_loss": 0.2863345444202423, + "eval_pr_auc": 0.6235765349975187, + "eval_precision": 0.6561679790026247, + "eval_precision_macro": 0.7949612458190018, + "eval_pred_class_0": 16620, + "eval_pred_class_1": 3048, + "eval_predicted_binding_ratio": 0.1549725442342892, + "eval_recall": 0.6449532408900355, + "eval_recall_macro": 0.7908474781742385, + "eval_runtime": 0.2631, + "eval_samples_per_second": 619.574, + "eval_steps_per_second": 3.801, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1314 + }, + { + "epoch": 74.0, + "eval_accuracy": 0.891193817368314, + "eval_auc": 0.9126060328997004, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6506692784851453, + "eval_f1_macro": 0.793115428161573, + "eval_loss": 0.28558436036109924, + "eval_pr_auc": 0.625919370718976, + "eval_precision": 0.6588429752066116, + "eval_precision_macro": 0.7961342196828587, + "eval_pred_class_0": 16643, + "eval_pred_class_1": 3025, + "eval_predicted_binding_ratio": 0.15380313199105144, + "eval_recall": 0.6426959045469204, + "eval_recall_macro": 0.7902016976709372, + "eval_runtime": 0.265, + "eval_samples_per_second": 615.167, + "eval_steps_per_second": 3.774, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1332 + }, + { + "epoch": 75.0, + "eval_accuracy": 0.8916005694529184, + "eval_auc": 0.9132873078266985, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6526555881394591, + "eval_f1_macro": 0.794217425975266, + "eval_loss": 0.28493690490722656, + "eval_pr_auc": 0.6278320531638758, + "eval_precision": 0.6595324333223576, + "eval_precision_macro": 0.7967555738856391, + "eval_pred_class_0": 16631, + "eval_pred_class_1": 3037, + "eval_predicted_binding_ratio": 0.1544132601179581, + "eval_recall": 0.6459206707513705, + "eval_recall_macro": 0.7917537198146302, + "eval_runtime": 0.2564, + "eval_samples_per_second": 635.773, + "eval_steps_per_second": 3.9, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1350 + }, + { + "epoch": 76.0, + "eval_accuracy": 0.8920073215375229, + "eval_auc": 0.9139370494570753, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6551948051948052, + "eval_f1_macro": 0.7955863102414826, + "eval_loss": 0.28430166840553284, + "eval_pr_auc": 0.6292546024902547, + "eval_precision": 0.6596927100359594, + "eval_precision_macro": 0.7972435493102309, + "eval_pred_class_0": 16609, + "eval_pred_class_1": 3059, + "eval_predicted_binding_ratio": 0.1555318283506203, + "eval_recall": 0.6507578200580458, + "eval_recall_macro": 0.7939610311131058, + "eval_runtime": 0.2569, + "eval_samples_per_second": 634.459, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1368 + }, + { + "epoch": 77.0, + "eval_accuracy": 0.8921598535692495, + "eval_auc": 0.9146080371326758, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6542787286063569, + "eval_f1_macro": 0.7951975553215214, + "eval_loss": 0.283497154712677, + "eval_pr_auc": 0.6315022943889131, + "eval_precision": 0.6615029663810151, + "eval_precision_macro": 0.7978670296615908, + "eval_pred_class_0": 16634, + "eval_pred_class_1": 3034, + "eval_predicted_binding_ratio": 0.15426072808623145, + "eval_recall": 0.6472105772331506, + "eval_recall_macro": 0.7926099364103822, + "eval_runtime": 0.2203, + "eval_samples_per_second": 740.067, + "eval_steps_per_second": 4.54, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1386 + }, + { + "epoch": 78.0, + "eval_accuracy": 0.8924140736221273, + "eval_auc": 0.9151027497871649, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.655037495924356, + "eval_f1_macro": 0.7956531976941219, + "eval_loss": 0.2829034626483917, + "eval_pr_auc": 0.6331226154536788, + "eval_precision": 0.6623804813715793, + "eval_precision_macro": 0.7983678781970611, + "eval_pred_class_0": 16635, + "eval_pred_class_1": 3033, + "eval_predicted_binding_ratio": 0.1542098840756559, + "eval_recall": 0.6478555304740407, + "eval_recall_macro": 0.7930229544686254, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.571, + "eval_steps_per_second": 3.899, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1404 + }, + { + "epoch": 79.0, + "eval_accuracy": 0.8925157616432784, + "eval_auc": 0.9156585533376076, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6559244791666666, + "eval_f1_macro": 0.7961172166862497, + "eval_loss": 0.28232645988464355, + "eval_pr_auc": 0.6343326075351273, + "eval_precision": 0.6621754847190273, + "eval_precision_macro": 0.7984260882241754, + "eval_pred_class_0": 16625, + "eval_pred_class_1": 3043, + "eval_predicted_binding_ratio": 0.15471832418141143, + "eval_recall": 0.6497903901967107, + "eval_recall_macro": 0.7938696624128962, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.665, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1422 + }, + { + "epoch": 80.0, + "eval_accuracy": 0.8929733577384584, + "eval_auc": 0.9162826303682348, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6572219508223416, + "eval_f1_macro": 0.7969043930945569, + "eval_loss": 0.28162533044815063, + "eval_pr_auc": 0.6363164977912346, + "eval_precision": 0.6638157894736842, + "eval_precision_macro": 0.7993423426560147, + "eval_pred_class_0": 16628, + "eval_pred_class_1": 3040, + "eval_predicted_binding_ratio": 0.15456579214968477, + "eval_recall": 0.6507578200580458, + "eval_recall_macro": 0.79453446021916, + "eval_runtime": 0.272, + "eval_samples_per_second": 599.266, + "eval_steps_per_second": 3.676, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1440 + }, + { + "epoch": 81.0, + "eval_accuracy": 0.8928208257067317, + "eval_auc": 0.9167705886684476, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.657124268054652, + "eval_f1_macro": 0.7968036671115732, + "eval_loss": 0.2811121940612793, + "eval_pr_auc": 0.6378571407612313, + "eval_precision": 0.6629471611421069, + "eval_precision_macro": 0.7989544782306408, + "eval_pred_class_0": 16621, + "eval_pred_class_1": 3047, + "eval_predicted_binding_ratio": 0.15492170022371365, + "eval_recall": 0.6514027732989358, + "eval_recall_macro": 0.7947060344432748, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.342, + "eval_steps_per_second": 3.843, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1458 + }, + { + "epoch": 82.0, + "eval_accuracy": 0.893125889770185, + "eval_auc": 0.9172343422437541, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6581001951854262, + "eval_f1_macro": 0.7973820247953165, + "eval_loss": 0.2804972231388092, + "eval_pr_auc": 0.6394259907419034, + "eval_precision": 0.6639317361339022, + "eval_precision_macro": 0.7995370130040789, + "eval_pred_class_0": 16621, + "eval_pred_class_1": 3047, + "eval_predicted_binding_ratio": 0.15492170022371365, + "eval_recall": 0.6523702031602708, + "eval_recall_macro": 0.7952802908117405, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.86, + "eval_steps_per_second": 3.827, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1476 + }, + { + "epoch": 83.0, + "eval_accuracy": 0.8933801098230628, + "eval_auc": 0.9176662705474707, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6597436313483693, + "eval_f1_macro": 0.7982647858607822, + "eval_loss": 0.28001976013183594, + "eval_pr_auc": 0.6407525699560299, + "eval_precision": 0.6639451338994121, + "eval_precision_macro": 0.7998155152816343, + "eval_pred_class_0": 16606, + "eval_pred_class_1": 3062, + "eval_predicted_binding_ratio": 0.15568436038234695, + "eval_recall": 0.6555949693647211, + "eval_recall_macro": 0.7967417715176355, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.967, + "eval_steps_per_second": 3.902, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1494 + }, + { + "epoch": 83.33333333333333, + "grad_norm": 11845.0048828125, + "learning_rate": 9.86567120987093e-07, + "loss": 0.2741, + "step": 1500 + }, + { + "epoch": 84.0, + "eval_accuracy": 0.8932275777913362, + "eval_auc": 0.9181069872977458, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6592015579357352, + "eval_f1_macro": 0.7979494857864604, + "eval_loss": 0.2794816195964813, + "eval_pr_auc": 0.6421665674351047, + "eval_precision": 0.6635086573015354, + "eval_precision_macro": 0.799538997766201, + "eval_pred_class_0": 16607, + "eval_pred_class_1": 3061, + "eval_predicted_binding_ratio": 0.15563351637177142, + "eval_recall": 0.654950016123831, + "eval_recall_macro": 0.7963891144179245, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.121, + "eval_steps_per_second": 3.841, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1512 + }, + { + "epoch": 85.0, + "eval_accuracy": 0.8932275777913362, + "eval_auc": 0.9184344761551537, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6595330739299611, + "eval_f1_macro": 0.7981095181516664, + "eval_loss": 0.27904370427131653, + "eval_pr_auc": 0.6429990318434126, + "eval_precision": 0.6631887838278449, + "eval_precision_macro": 0.7994577736379149, + "eval_pred_class_0": 16601, + "eval_pred_class_1": 3067, + "eval_predicted_binding_ratio": 0.15593858043522474, + "eval_recall": 0.6559174459851661, + "eval_recall_macro": 0.796782287910794, + "eval_runtime": 0.2526, + "eval_samples_per_second": 645.288, + "eval_steps_per_second": 3.959, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1530 + }, + { + "epoch": 86.0, + "eval_accuracy": 0.8933292658124873, + "eval_auc": 0.9189190864757253, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6585286458333334, + "eval_f1_macro": 0.797660321952579, + "eval_loss": 0.27837634086608887, + "eval_pr_auc": 0.6447520419594072, + "eval_precision": 0.664804469273743, + "eval_precision_macro": 0.7999811820052926, + "eval_pred_class_0": 16625, + "eval_pred_class_1": 3043, + "eval_predicted_binding_ratio": 0.15471832418141143, + "eval_recall": 0.6523702031602708, + "eval_recall_macro": 0.7954010127288045, + "eval_runtime": 0.2401, + "eval_samples_per_second": 678.873, + "eval_steps_per_second": 4.165, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1548 + }, + { + "epoch": 87.0, + "eval_accuracy": 0.8936851738865161, + "eval_auc": 0.9193183038504471, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6602761982128351, + "eval_f1_macro": 0.7986291029941847, + "eval_loss": 0.27788689732551575, + "eval_pr_auc": 0.6462039154555116, + "eval_precision": 0.6653569089718402, + "eval_precision_macro": 0.8005067920325675, + "eval_pred_class_0": 16614, + "eval_pred_class_1": 3054, + "eval_predicted_binding_ratio": 0.15527760829774254, + "eval_recall": 0.655272492744276, + "eval_recall_macro": 0.7967917965622751, + "eval_runtime": 0.2547, + "eval_samples_per_second": 640.0, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1566 + }, + { + "epoch": 88.0, + "eval_accuracy": 0.8941427699816962, + "eval_auc": 0.9197055599839506, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.662012987012987, + "eval_f1_macro": 0.7996283888525269, + "eval_loss": 0.2774609923362732, + "eval_pr_auc": 0.6474319229516793, + "eval_precision": 0.6665576985943119, + "eval_precision_macro": 0.8013082309577014, + "eval_pred_class_0": 16609, + "eval_pred_class_1": 3059, + "eval_predicted_binding_ratio": 0.1555318283506203, + "eval_recall": 0.6575298290873912, + "eval_recall_macro": 0.7979808256923646, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.753, + "eval_steps_per_second": 4.005, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1584 + }, + { + "epoch": 89.0, + "eval_accuracy": 0.8946003660768761, + "eval_auc": 0.9201319018332661, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6636378387149116, + "eval_f1_macro": 0.8005736295133055, + "eval_loss": 0.27695581316947937, + "eval_pr_auc": 0.6490255599400047, + "eval_precision": 0.6678641410842586, + "eval_precision_macro": 0.8021363340613392, + "eval_pred_class_0": 16606, + "eval_pred_class_1": 3062, + "eval_predicted_binding_ratio": 0.15568436038234695, + "eval_recall": 0.6594646888100613, + "eval_recall_macro": 0.7990387969914977, + "eval_runtime": 0.269, + "eval_samples_per_second": 606.045, + "eval_steps_per_second": 3.718, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1602 + }, + { + "epoch": 90.0, + "eval_accuracy": 0.8950579621720561, + "eval_auc": 0.920403555344157, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6648262422864566, + "eval_f1_macro": 0.8013081720805965, + "eval_loss": 0.27657878398895264, + "eval_pr_auc": 0.6500340451314033, + "eval_precision": 0.6696107294733399, + "eval_precision_macro": 0.8030794000144978, + "eval_pred_class_0": 16611, + "eval_pred_class_1": 3057, + "eval_predicted_binding_ratio": 0.1554301403294692, + "eval_recall": 0.6601096420509514, + "eval_recall_macro": 0.7995725369668047, + "eval_runtime": 0.2609, + "eval_samples_per_second": 624.642, + "eval_steps_per_second": 3.832, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1620 + }, + { + "epoch": 91.0, + "eval_accuracy": 0.8953630262355095, + "eval_auc": 0.9207465675374016, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6654746423927178, + "eval_f1_macro": 0.8017284012349317, + "eval_loss": 0.27611449360847473, + "eval_pr_auc": 0.6512249430736048, + "eval_precision": 0.6709275647328745, + "eval_precision_macro": 0.8037492731289094, + "eval_pred_class_0": 16617, + "eval_pred_class_1": 3051, + "eval_predicted_binding_ratio": 0.15512507626601588, + "eval_recall": 0.6601096420509514, + "eval_recall_macro": 0.7997536198424009, + "eval_runtime": 0.2605, + "eval_samples_per_second": 625.736, + "eval_steps_per_second": 3.839, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1638 + }, + { + "epoch": 92.0, + "eval_accuracy": 0.8954647142566605, + "eval_auc": 0.9211146506479597, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6662337662337663, + "eval_f1_macro": 0.802130627992697, + "eval_loss": 0.275691956281662, + "eval_pr_auc": 0.6524138573777828, + "eval_precision": 0.6708074534161491, + "eval_precision_macro": 0.8038244624537546, + "eval_pred_class_0": 16609, + "eval_pred_class_1": 3059, + "eval_predicted_binding_ratio": 0.1555318283506203, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8004692699557154, + "eval_runtime": 0.2702, + "eval_samples_per_second": 603.318, + "eval_steps_per_second": 3.701, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1656 + }, + { + "epoch": 93.0, + "eval_accuracy": 0.8956680902989628, + "eval_auc": 0.9214137790034065, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6666666666666666, + "eval_f1_macro": 0.8024110910186859, + "eval_loss": 0.27531710267066956, + "eval_pr_auc": 0.6535015844647576, + "eval_precision": 0.6716857610474631, + "eval_precision_macro": 0.8042712197761243, + "eval_pred_class_0": 16613, + "eval_pred_class_1": 3055, + "eval_predicted_binding_ratio": 0.15532845230831807, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8005899918727795, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.638, + "eval_steps_per_second": 3.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1674 + }, + { + "epoch": 94.0, + "eval_accuracy": 0.8956172462883872, + "eval_auc": 0.9217489565349906, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6664500406173842, + "eval_f1_macro": 0.802288641055505, + "eval_loss": 0.27483227849006653, + "eval_pr_auc": 0.6546621751515824, + "eval_precision": 0.6715782580222659, + "eval_precision_macro": 0.8041892734676155, + "eval_pred_class_0": 16614, + "eval_pred_class_1": 3054, + "eval_predicted_binding_ratio": 0.15527760829774254, + "eval_recall": 0.6613995485327314, + "eval_recall_macro": 0.8004287535625569, + "eval_runtime": 0.2382, + "eval_samples_per_second": 684.396, + "eval_steps_per_second": 4.199, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1692 + }, + { + "epoch": 95.0, + "eval_accuracy": 0.8959731543624161, + "eval_auc": 0.9220882801111304, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6672088484059857, + "eval_f1_macro": 0.8027800298435859, + "eval_loss": 0.27437010407447815, + "eval_pr_auc": 0.6561534178394561, + "eval_precision": 0.6731211027239908, + "eval_precision_macro": 0.8049740042228342, + "eval_pred_class_0": 16621, + "eval_pred_class_1": 3047, + "eval_predicted_binding_ratio": 0.15492170022371365, + "eval_recall": 0.6613995485327314, + "eval_recall_macro": 0.800640016917419, + "eval_runtime": 0.2619, + "eval_samples_per_second": 622.273, + "eval_steps_per_second": 3.818, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1710 + }, + { + "epoch": 96.0, + "eval_accuracy": 0.8958206223306895, + "eval_auc": 0.9223952917907324, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6666666666666666, + "eval_f1_macro": 0.8024646720298894, + "eval_loss": 0.27396437525749207, + "eval_pr_auc": 0.6571939169041311, + "eval_precision": 0.6726854891661195, + "eval_precision_macro": 0.8046979364973901, + "eval_pred_class_0": 16622, + "eval_pred_class_1": 3046, + "eval_predicted_binding_ratio": 0.1548708562131381, + "eval_recall": 0.6607545952918413, + "eval_recall_macro": 0.8002873598177079, + "eval_runtime": 0.2627, + "eval_samples_per_second": 620.423, + "eval_steps_per_second": 3.806, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1728 + }, + { + "epoch": 97.0, + "eval_accuracy": 0.8960239983729916, + "eval_auc": 0.9226829844599047, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6679655788277318, + "eval_f1_macro": 0.8031632457541016, + "eval_loss": 0.2735843360424042, + "eval_pr_auc": 0.6581443956834908, + "eval_precision": 0.6726618705035972, + "eval_precision_macro": 0.8049040839573975, + "eval_pred_class_0": 16610, + "eval_pred_class_1": 3058, + "eval_predicted_binding_ratio": 0.15548098434004473, + "eval_recall": 0.6633344082554015, + "eval_recall_macro": 0.801456544382424, + "eval_runtime": 0.2325, + "eval_samples_per_second": 701.163, + "eval_steps_per_second": 4.302, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1746 + }, + { + "epoch": 98.0, + "eval_accuracy": 0.8961765304047183, + "eval_auc": 0.9230679047936586, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.668075422626788, + "eval_f1_macro": 0.8032698713905396, + "eval_loss": 0.2730555534362793, + "eval_pr_auc": 0.6597232694357793, + "eval_precision": 0.6735496558505408, + "eval_precision_macro": 0.8053010360254088, + "eval_pred_class_0": 16617, + "eval_pred_class_1": 3051, + "eval_predicted_binding_ratio": 0.15512507626601588, + "eval_recall": 0.6626894550145115, + "eval_recall_macro": 0.801284970158309, + "eval_runtime": 0.2564, + "eval_samples_per_second": 635.817, + "eval_steps_per_second": 3.901, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1764 + }, + { + "epoch": 99.0, + "eval_accuracy": 0.8961256863941428, + "eval_auc": 0.9233850285396773, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6689353427321342, + "eval_f1_macro": 0.8036671286252258, + "eval_loss": 0.272703617811203, + "eval_pr_auc": 0.6608360801532023, + "eval_precision": 0.6723127035830619, + "eval_precision_macro": 0.8049176483332829, + "eval_pred_class_0": 16598, + "eval_pred_class_1": 3070, + "eval_predicted_binding_ratio": 0.1560911124669514, + "eval_recall": 0.6655917445985166, + "eval_recall_macro": 0.8024343101576514, + "eval_runtime": 0.2609, + "eval_samples_per_second": 624.703, + "eval_steps_per_second": 3.833, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1782 + }, + { + "epoch": 100.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9236704438040937, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6696891191709845, + "eval_f1_macro": 0.804084607836397, + "eval_loss": 0.27227067947387695, + "eval_pr_auc": 0.661859432748859, + "eval_precision": 0.672520325203252, + "eval_precision_macro": 0.8051325786806955, + "eval_pred_class_0": 16593, + "eval_pred_class_1": 3075, + "eval_predicted_binding_ratio": 0.15634533251982916, + "eval_recall": 0.6668816510802967, + "eval_recall_macro": 0.8030490829192756, + "eval_runtime": 0.2555, + "eval_samples_per_second": 638.071, + "eval_steps_per_second": 3.915, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1800 + }, + { + "epoch": 101.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9239191675474416, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6695821185617103, + "eval_f1_macro": 0.8040329626642457, + "eval_loss": 0.2719270884990692, + "eval_pr_auc": 0.6626733311213273, + "eval_precision": 0.6726326065733811, + "eval_precision_macro": 0.8051623412499325, + "eval_pred_class_0": 16595, + "eval_pred_class_1": 3073, + "eval_predicted_binding_ratio": 0.15624364449867806, + "eval_recall": 0.6665591744598517, + "eval_recall_macro": 0.802918025088319, + "eval_runtime": 0.2736, + "eval_samples_per_second": 595.794, + "eval_steps_per_second": 3.655, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1818 + }, + { + "epoch": 102.0, + "eval_accuracy": 0.896786658531625, + "eval_auc": 0.9242257996595844, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6714146973130463, + "eval_f1_macro": 0.805096334723234, + "eval_loss": 0.271486759185791, + "eval_pr_auc": 0.6637007390734015, + "eval_precision": 0.6740331491712708, + "eval_precision_macro": 0.8060660592459934, + "eval_pred_class_0": 16591, + "eval_pred_class_1": 3077, + "eval_predicted_binding_ratio": 0.15644702054098028, + "eval_recall": 0.6688165108029668, + "eval_recall_macro": 0.8041372346976746, + "eval_runtime": 0.2489, + "eval_samples_per_second": 654.758, + "eval_steps_per_second": 4.017, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1836 + }, + { + "epoch": 103.0, + "eval_accuracy": 0.8973459426479561, + "eval_auc": 0.9244978230054357, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.673248098397799, + "eval_f1_macro": 0.8061779895433214, + "eval_loss": 0.2711206376552582, + "eval_pr_auc": 0.6646142778873767, + "eval_precision": 0.6757634827810266, + "eval_precision_macro": 0.8071101922645338, + "eval_pred_class_0": 16590, + "eval_pred_class_1": 3078, + "eval_predicted_binding_ratio": 0.15649786455155582, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.8052555669553397, + "eval_runtime": 0.2242, + "eval_samples_per_second": 727.107, + "eval_steps_per_second": 4.461, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1854 + }, + { + "epoch": 104.0, + "eval_accuracy": 0.8971934106162294, + "eval_auc": 0.9248346842593396, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6732385261797027, + "eval_f1_macro": 0.8061196854381076, + "eval_loss": 0.2707342207431793, + "eval_pr_auc": 0.6659244139495173, + "eval_precision": 0.6747651441528992, + "eval_precision_macro": 0.8066847854532062, + "eval_pred_class_0": 16581, + "eval_pred_class_1": 3087, + "eval_predicted_binding_ratio": 0.15695546064673582, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8055581990104113, + "eval_runtime": 0.255, + "eval_samples_per_second": 639.197, + "eval_steps_per_second": 3.921, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1872 + }, + { + "epoch": 105.0, + "eval_accuracy": 0.8975493186902583, + "eval_auc": 0.9250955152313902, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6746326497658647, + "eval_f1_macro": 0.8069177490147248, + "eval_loss": 0.2704195976257324, + "eval_pr_auc": 0.666907608407933, + "eval_precision": 0.675614489003881, + "eval_precision_macro": 0.8072811827258788, + "eval_pred_class_0": 16576, + "eval_pred_class_1": 3092, + "eval_predicted_binding_ratio": 0.15720968069961358, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8065558093510122, + "eval_runtime": 0.2579, + "eval_samples_per_second": 631.977, + "eval_steps_per_second": 3.877, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1890 + }, + { + "epoch": 106.0, + "eval_accuracy": 0.8977018507219849, + "eval_auc": 0.9254069649305167, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6749596122778675, + "eval_f1_macro": 0.8071292359343842, + "eval_loss": 0.26996490359306335, + "eval_pr_auc": 0.6681450085536085, + "eval_precision": 0.6762706377468436, + "eval_precision_macro": 0.8076147808433838, + "eval_pred_class_0": 16579, + "eval_pred_class_1": 3089, + "eval_predicted_binding_ratio": 0.15705714866788692, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8066463507888102, + "eval_runtime": 0.2667, + "eval_samples_per_second": 611.086, + "eval_steps_per_second": 3.749, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1908 + }, + { + "epoch": 107.0, + "eval_accuracy": 0.8980577587960138, + "eval_auc": 0.9256414889578861, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6758286176232822, + "eval_f1_macro": 0.8076738937412058, + "eval_loss": 0.2696084678173065, + "eval_pr_auc": 0.6691135839215693, + "eval_precision": 0.6776913099870299, + "eval_precision_macro": 0.8083644683075526, + "eval_pred_class_0": 16584, + "eval_pred_class_1": 3084, + "eval_predicted_binding_ratio": 0.15680292861500916, + "eval_recall": 0.673976136730087, + "eval_recall_macro": 0.8069886719746289, + "eval_runtime": 0.2591, + "eval_samples_per_second": 629.158, + "eval_steps_per_second": 3.86, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1926 + }, + { + "epoch": 108.0, + "eval_accuracy": 0.8980069147854383, + "eval_auc": 0.9258192627838369, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6769726247987118, + "eval_f1_macro": 0.8082079811791663, + "eval_loss": 0.26944610476493835, + "eval_pr_auc": 0.6696389857739906, + "eval_precision": 0.676101640398842, + "eval_precision_macro": 0.8078859551713395, + "eval_pred_class_0": 16559, + "eval_pred_class_1": 3109, + "eval_predicted_binding_ratio": 0.158074028879398, + "eval_recall": 0.6778458561754273, + "eval_recall_macro": 0.8085311854668409, + "eval_runtime": 0.261, + "eval_samples_per_second": 624.512, + "eval_steps_per_second": 3.831, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1944 + }, + { + "epoch": 109.0, + "eval_accuracy": 0.898159446817165, + "eval_auc": 0.9261110139050743, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.676779086654833, + "eval_f1_macro": 0.8081683537924276, + "eval_loss": 0.26896923780441284, + "eval_pr_auc": 0.6708410126864026, + "eval_precision": 0.6773255813953488, + "eval_precision_macro": 0.8083707318031536, + "eval_pred_class_0": 16572, + "eval_pred_class_1": 3096, + "eval_predicted_binding_ratio": 0.1574130567419158, + "eval_recall": 0.6762334730732021, + "eval_recall_macro": 0.8079664377498563, + "eval_runtime": 0.2579, + "eval_samples_per_second": 632.096, + "eval_steps_per_second": 3.878, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1962 + }, + { + "epoch": 110.0, + "eval_accuracy": 0.8985153548911938, + "eval_auc": 0.9263511243868452, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6783757653883339, + "eval_f1_macro": 0.809064127789247, + "eval_loss": 0.26861709356307983, + "eval_pr_auc": 0.6718712574127733, + "eval_precision": 0.677938808373591, + "eval_precision_macro": 0.808902387342021, + "eval_pred_class_0": 16563, + "eval_pred_class_1": 3105, + "eval_predicted_binding_ratio": 0.15787065283709578, + "eval_recall": 0.6788132860367624, + "eval_recall_macro": 0.8092261637523704, + "eval_runtime": 0.2543, + "eval_samples_per_second": 640.936, + "eval_steps_per_second": 3.932, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1980 + }, + { + "epoch": 111.0, + "eval_accuracy": 0.8989729509863738, + "eval_auc": 0.9265832055569767, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6794644297467334, + "eval_f1_macro": 0.8097506232989936, + "eval_loss": 0.2682516574859619, + "eval_pr_auc": 0.6727910026400046, + "eval_precision": 0.6797934151065204, + "eval_precision_macro": 0.8098725675411902, + "eval_pred_class_0": 16570, + "eval_pred_class_1": 3098, + "eval_predicted_binding_ratio": 0.1575147447630669, + "eval_recall": 0.6791357626572073, + "eval_recall_macro": 0.809628845896721, + "eval_runtime": 0.2091, + "eval_samples_per_second": 779.644, + "eval_steps_per_second": 4.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1998 + }, + { + "epoch": 111.11111111111111, + "grad_norm": 13330.4609375, + "learning_rate": 9.552616846852138e-07, + "loss": 0.252, + "step": 2000 + }, + { + "epoch": 112.0, + "eval_accuracy": 0.899176327028676, + "eval_auc": 0.9269119888367457, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6793856103476152, + "eval_f1_macro": 0.8097842051315767, + "eval_loss": 0.2677896022796631, + "eval_pr_auc": 0.6743175064299574, + "eval_precision": 0.6812581063553826, + "eval_precision_macro": 0.8104795114507255, + "eval_pred_class_0": 16584, + "eval_pred_class_1": 3084, + "eval_predicted_binding_ratio": 0.15680292861500916, + "eval_recall": 0.6775233795549823, + "eval_recall_macro": 0.8090942786590025, + "eval_runtime": 0.2454, + "eval_samples_per_second": 664.193, + "eval_steps_per_second": 4.075, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2016 + }, + { + "epoch": 113.0, + "eval_accuracy": 0.8994813910921293, + "eval_auc": 0.927058521341044, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6812832500403031, + "eval_f1_macro": 0.8108073208521016, + "eval_loss": 0.26760444045066833, + "eval_pr_auc": 0.6746134464200654, + "eval_precision": 0.6811734364925854, + "eval_precision_macro": 0.8107666047608406, + "eval_pred_class_0": 16566, + "eval_pred_class_1": 3102, + "eval_predicted_binding_ratio": 0.15771812080536912, + "eval_recall": 0.6813930990003225, + "eval_recall_macro": 0.8108480555060766, + "eval_runtime": 0.2588, + "eval_samples_per_second": 629.901, + "eval_steps_per_second": 3.864, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2034 + }, + { + "epoch": 114.0, + "eval_accuracy": 0.8993797030709783, + "eval_auc": 0.9272620862892311, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6811664250040277, + "eval_f1_macro": 0.81071512110173, + "eval_loss": 0.267299622297287, + "eval_pr_auc": 0.6753372489001316, + "eval_precision": 0.6806181584030908, + "eval_precision_macro": 0.8105119532505733, + "eval_pred_class_0": 16562, + "eval_pred_class_1": 3106, + "eval_predicted_binding_ratio": 0.15792149684767134, + "eval_recall": 0.6817155756207675, + "eval_recall_macro": 0.8109187523785011, + "eval_runtime": 0.2924, + "eval_samples_per_second": 557.453, + "eval_steps_per_second": 3.42, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2052 + }, + { + "epoch": 115.0, + "eval_accuracy": 0.8994813910921293, + "eval_auc": 0.9274648142425077, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6817962337035248, + "eval_f1_macro": 0.8110548055574955, + "eval_loss": 0.2670327126979828, + "eval_pr_auc": 0.6759104665767571, + "eval_precision": 0.6805912596401028, + "eval_precision_macro": 0.8106085073266955, + "eval_pred_class_0": 16556, + "eval_pred_class_1": 3112, + "eval_predicted_binding_ratio": 0.15822656091112466, + "eval_recall": 0.6830054821025475, + "eval_recall_macro": 0.811503344660859, + "eval_runtime": 0.2501, + "eval_samples_per_second": 651.841, + "eval_steps_per_second": 3.999, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2070 + }, + { + "epoch": 116.0, + "eval_accuracy": 0.8995830791132805, + "eval_auc": 0.927707601161492, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6824248271426274, + "eval_f1_macro": 0.8113938913621764, + "eval_loss": 0.2667410373687744, + "eval_pr_auc": 0.676645416517246, + "eval_precision": 0.6805644644002565, + "eval_precision_macro": 0.8107051929252038, + "eval_pred_class_0": 16550, + "eval_pred_class_1": 3118, + "eval_predicted_binding_ratio": 0.15853162497457798, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.812087936943217, + "eval_runtime": 0.2676, + "eval_samples_per_second": 609.094, + "eval_steps_per_second": 3.737, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2088 + }, + { + "epoch": 117.0, + "eval_accuracy": 0.8998881431767338, + "eval_auc": 0.927944061956154, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6828796907714608, + "eval_f1_macro": 0.8117208850210732, + "eval_loss": 0.26635268330574036, + "eval_pr_auc": 0.6777081363329963, + "eval_precision": 0.6821106821106822, + "eval_precision_macro": 0.8114357758379498, + "eval_pred_class_0": 16560, + "eval_pred_class_1": 3108, + "eval_predicted_binding_ratio": 0.15802318486882244, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8120069041569002, + "eval_runtime": 0.2624, + "eval_samples_per_second": 621.137, + "eval_steps_per_second": 3.811, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2106 + }, + { + "epoch": 118.0, + "eval_accuracy": 0.9001932072401871, + "eval_auc": 0.9282739839383012, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6831315577078289, + "eval_f1_macro": 0.8119498952052617, + "eval_loss": 0.2659379541873932, + "eval_pr_auc": 0.6790830976589266, + "eval_precision": 0.6839043309631545, + "eval_precision_macro": 0.8122369488772572, + "eval_pred_class_0": 16574, + "eval_pred_class_1": 3094, + "eval_predicted_binding_ratio": 0.1573113687207647, + "eval_recall": 0.6823605288616575, + "eval_recall_macro": 0.8116637557086703, + "eval_runtime": 0.2592, + "eval_samples_per_second": 628.745, + "eval_steps_per_second": 3.857, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2124 + }, + { + "epoch": 119.0, + "eval_accuracy": 0.9001932072401871, + "eval_auc": 0.9284926050623749, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6838460299565148, + "eval_f1_macro": 0.812294615183528, + "eval_loss": 0.2656570076942444, + "eval_pr_auc": 0.6798372835892805, + "eval_precision": 0.6830759330759331, + "eval_precision_macro": 0.8120089810307202, + "eval_pred_class_0": 16560, + "eval_pred_class_1": 3108, + "eval_predicted_binding_ratio": 0.15802318486882244, + "eval_recall": 0.6846178652047726, + "eval_recall_macro": 0.8125811605253657, + "eval_runtime": 0.2617, + "eval_samples_per_second": 622.824, + "eval_steps_per_second": 3.821, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2142 + }, + { + "epoch": 120.0, + "eval_accuracy": 0.9001932072401871, + "eval_auc": 0.9287595480437707, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6841512469831054, + "eval_f1_macro": 0.8124418563951485, + "eval_loss": 0.26531943678855896, + "eval_pr_auc": 0.6808986584956077, + "eval_precision": 0.682723185613359, + "eval_precision_macro": 0.8119125170545953, + "eval_pred_class_0": 16554, + "eval_pred_class_1": 3114, + "eval_predicted_binding_ratio": 0.1583282489322758, + "eval_recall": 0.6855852950661077, + "eval_recall_macro": 0.8129743340182352, + "eval_runtime": 0.2686, + "eval_samples_per_second": 606.857, + "eval_steps_per_second": 3.723, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2160 + }, + { + "epoch": 121.0, + "eval_accuracy": 0.9005999593247915, + "eval_auc": 0.9289936341086871, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6851344822032533, + "eval_f1_macro": 0.8130595887334677, + "eval_loss": 0.26495063304901123, + "eval_pr_auc": 0.6818525990139518, + "eval_precision": 0.6843629343629344, + "eval_precision_macro": 0.8127732546210807, + "eval_pred_class_0": 16560, + "eval_pred_class_1": 3108, + "eval_predicted_binding_ratio": 0.15802318486882244, + "eval_recall": 0.6859077716865527, + "eval_recall_macro": 0.8133468356833198, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.159, + "eval_steps_per_second": 3.817, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2178 + }, + { + "epoch": 122.0, + "eval_accuracy": 0.9010067114093959, + "eval_auc": 0.9291471740122346, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6880307643005928, + "eval_f1_macro": 0.8146000626156236, + "eval_loss": 0.2648627460002899, + "eval_pr_auc": 0.6821768129155847, + "eval_precision": 0.6837579617834395, + "eval_precision_macro": 0.8130188647252145, + "eval_pred_class_0": 16528, + "eval_pred_class_1": 3140, + "eval_predicted_binding_ratio": 0.1596501932072402, + "eval_recall": 0.692357304095453, + "eval_recall_macro": 0.8162094361365779, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.373, + "eval_steps_per_second": 3.849, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2196 + }, + { + "epoch": 123.0, + "eval_accuracy": 0.9011083994305471, + "eval_auc": 0.9293803062922532, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.687851067244423, + "eval_f1_macro": 0.8145493064661928, + "eval_loss": 0.26444998383522034, + "eval_pr_auc": 0.6832369783380787, + "eval_precision": 0.6846645367412141, + "eval_precision_macro": 0.8133686693864494, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.8157455657712839, + "eval_runtime": 0.2527, + "eval_samples_per_second": 645.052, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2214 + }, + { + "epoch": 124.0, + "eval_accuracy": 0.9017185275574537, + "eval_auc": 0.929644952403895, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6885774125986789, + "eval_f1_macro": 0.815114870687036, + "eval_loss": 0.26401567459106445, + "eval_pr_auc": 0.6842971435384069, + "eval_precision": 0.6880231809401159, + "eval_precision_macro": 0.8149088251035563, + "eval_pred_class_0": 16562, + "eval_pred_class_1": 3106, + "eval_predicted_binding_ratio": 0.15792149684767134, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8153213845367371, + "eval_runtime": 0.2233, + "eval_samples_per_second": 729.933, + "eval_steps_per_second": 4.478, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2232 + }, + { + "epoch": 125.0, + "eval_accuracy": 0.9017185275574537, + "eval_auc": 0.9298876614628875, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6892782510850346, + "eval_f1_macro": 0.8154529561328843, + "eval_loss": 0.26374292373657227, + "eval_pr_auc": 0.6852698748697685, + "eval_precision": 0.6871794871794872, + "eval_precision_macro": 0.8146738625165021, + "eval_pred_class_0": 16548, + "eval_pred_class_1": 3120, + "eval_predicted_binding_ratio": 0.1586333129957291, + "eval_recall": 0.691389874234118, + "eval_recall_macro": 0.8162387893534325, + "eval_runtime": 0.2352, + "eval_samples_per_second": 692.981, + "eval_steps_per_second": 4.251, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2250 + }, + { + "epoch": 126.0, + "eval_accuracy": 0.9020235916209071, + "eval_auc": 0.9300761410376911, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6909382518043304, + "eval_f1_macro": 0.8163612439650636, + "eval_loss": 0.2635449767112732, + "eval_pr_auc": 0.685931037905785, + "eval_precision": 0.6873005743458839, + "eval_precision_macro": 0.815012329026093, + "eval_pred_class_0": 16534, + "eval_pred_class_1": 3134, + "eval_predicted_binding_ratio": 0.15934512914378687, + "eval_recall": 0.6946146404385682, + "eval_recall_macro": 0.8177304505385936, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.44, + "eval_steps_per_second": 3.88, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2268 + }, + { + "epoch": 127.0, + "eval_accuracy": 0.902837095790116, + "eval_auc": 0.9303049910181687, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6925181013676589, + "eval_f1_macro": 0.8174102840403102, + "eval_loss": 0.263118714094162, + "eval_pr_auc": 0.6869422132706717, + "eval_precision": 0.691072575465639, + "eval_precision_macro": 0.8168725206674576, + "eval_pred_class_0": 16554, + "eval_pred_class_1": 3114, + "eval_predicted_binding_ratio": 0.1583282489322758, + "eval_recall": 0.6939696871976782, + "eval_recall_macro": 0.8179512225449368, + "eval_runtime": 0.2583, + "eval_samples_per_second": 630.976, + "eval_steps_per_second": 3.871, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2286 + }, + { + "epoch": 128.0, + "eval_accuracy": 0.9027354077689648, + "eval_auc": 0.9304756990498765, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6929866795056974, + "eval_f1_macro": 0.8176004232749752, + "eval_loss": 0.2629205286502838, + "eval_pr_auc": 0.6875064110834537, + "eval_precision": 0.689776357827476, + "eval_precision_macro": 0.8164083143593783, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.6962270235407932, + "eval_recall_macro": 0.8188082664031002, + "eval_runtime": 0.2527, + "eval_samples_per_second": 644.934, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2304 + }, + { + "epoch": 129.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9306633221647718, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6937530110807772, + "eval_f1_macro": 0.818077689508494, + "eval_loss": 0.2625824213027954, + "eval_pr_auc": 0.6881187823465719, + "eval_precision": 0.690978886756238, + "eval_precision_macro": 0.8170466915947796, + "eval_pred_class_0": 16542, + "eval_pred_class_1": 3126, + "eval_predicted_binding_ratio": 0.15893837705918243, + "eval_recall": 0.6965495001612383, + "eval_recall_macro": 0.8191204071096527, + "eval_runtime": 0.2718, + "eval_samples_per_second": 599.717, + "eval_steps_per_second": 3.679, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2322 + }, + { + "epoch": 130.0, + "eval_accuracy": 0.902938783811267, + "eval_auc": 0.9308322783466673, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6933333333333334, + "eval_f1_macro": 0.817839388722781, + "eval_loss": 0.2622954547405243, + "eval_pr_auc": 0.6887035254510873, + "eval_precision": 0.6907810499359796, + "eval_precision_macro": 0.816890766747487, + "eval_pred_class_0": 16544, + "eval_pred_class_1": 3124, + "eval_predicted_binding_ratio": 0.15883668903803133, + "eval_recall": 0.6959045469203483, + "eval_recall_macro": 0.8187979304892078, + "eval_runtime": 0.2604, + "eval_samples_per_second": 625.881, + "eval_steps_per_second": 3.84, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2340 + }, + { + "epoch": 131.0, + "eval_accuracy": 0.9027354077689648, + "eval_auc": 0.9309924460820045, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6929866795056974, + "eval_f1_macro": 0.8176004232749752, + "eval_loss": 0.26203182339668274, + "eval_pr_auc": 0.6893090005690568, + "eval_precision": 0.689776357827476, + "eval_precision_macro": 0.8164083143593783, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.6962270235407932, + "eval_recall_macro": 0.8188082664031002, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.443, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2358 + }, + { + "epoch": 132.0, + "eval_accuracy": 0.902938783811267, + "eval_auc": 0.9312000694822565, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6936286310383566, + "eval_f1_macro": 0.817981812876073, + "eval_loss": 0.26177045702934265, + "eval_pr_auc": 0.6902838377634022, + "eval_precision": 0.6904153354632588, + "eval_precision_macro": 0.8167882699809945, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.6968719767816833, + "eval_recall_macro": 0.8191911039820772, + "eval_runtime": 0.2756, + "eval_samples_per_second": 591.34, + "eval_steps_per_second": 3.628, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2376 + }, + { + "epoch": 133.0, + "eval_accuracy": 0.902938783811267, + "eval_auc": 0.9314264084780033, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6933333333333334, + "eval_f1_macro": 0.817839388722781, + "eval_loss": 0.2614164650440216, + "eval_pr_auc": 0.6912123921690412, + "eval_precision": 0.6907810499359796, + "eval_precision_macro": 0.816890766747487, + "eval_pred_class_0": 16544, + "eval_pred_class_1": 3124, + "eval_predicted_binding_ratio": 0.15883668903803133, + "eval_recall": 0.6959045469203483, + "eval_recall_macro": 0.8187979304892078, + "eval_runtime": 0.2552, + "eval_samples_per_second": 638.769, + "eval_steps_per_second": 3.919, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2394 + }, + { + "epoch": 134.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9316330780933575, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6934576434656807, + "eval_f1_macro": 0.8179352236654993, + "eval_loss": 0.26109954714775085, + "eval_pr_auc": 0.6919150376493911, + "eval_precision": 0.6913461538461538, + "eval_precision_macro": 0.817149992562429, + "eval_pred_class_0": 16548, + "eval_pred_class_1": 3120, + "eval_predicted_binding_ratio": 0.1586333129957291, + "eval_recall": 0.6955820702999033, + "eval_recall_macro": 0.8187272336167832, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.557, + "eval_steps_per_second": 3.844, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2412 + }, + { + "epoch": 135.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9318176062735843, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.694733472066592, + "eval_f1_macro": 0.8185505131193367, + "eval_loss": 0.2609698474407196, + "eval_pr_auc": 0.69244594350898, + "eval_precision": 0.6897647806738716, + "eval_precision_macro": 0.8167078351983328, + "eval_pred_class_0": 16522, + "eval_pred_class_1": 3146, + "eval_predicted_binding_ratio": 0.1599552572706935, + "eval_recall": 0.6997742663656885, + "eval_recall_macro": 0.8204309854192178, + "eval_runtime": 0.2491, + "eval_samples_per_second": 654.333, + "eval_steps_per_second": 4.014, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2430 + }, + { + "epoch": 136.0, + "eval_accuracy": 0.9034472239170226, + "eval_auc": 0.9320372006475538, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6941536479304236, + "eval_f1_macro": 0.8184144035830462, + "eval_loss": 0.2604828178882599, + "eval_pr_auc": 0.6935318303087233, + "eval_precision": 0.6933719433719434, + "eval_precision_macro": 0.8181231697536046, + "eval_pred_class_0": 16560, + "eval_pred_class_1": 3108, + "eval_predicted_binding_ratio": 0.15802318486882244, + "eval_recall": 0.6949371170590132, + "eval_recall_macro": 0.8187065617889984, + "eval_runtime": 0.2485, + "eval_samples_per_second": 655.847, + "eval_steps_per_second": 4.024, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2448 + }, + { + "epoch": 137.0, + "eval_accuracy": 0.9033963799064471, + "eval_auc": 0.9321443165310757, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6960972488803583, + "eval_f1_macro": 0.8193338378363828, + "eval_loss": 0.2604370415210724, + "eval_pr_auc": 0.693643099537468, + "eval_precision": 0.6905744208187877, + "eval_precision_macro": 0.8172857573610195, + "eval_pred_class_0": 16517, + "eval_pred_class_1": 3151, + "eval_predicted_binding_ratio": 0.16020947732357127, + "eval_recall": 0.7017091260883586, + "eval_recall_macro": 0.8214285957598189, + "eval_runtime": 0.2215, + "eval_samples_per_second": 736.024, + "eval_steps_per_second": 4.515, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2466 + }, + { + "epoch": 138.0, + "eval_accuracy": 0.9034980679275981, + "eval_auc": 0.9323432098797633, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.695638229634381, + "eval_f1_macro": 0.8191484199531422, + "eval_loss": 0.2601032257080078, + "eval_pr_auc": 0.694473981068256, + "eval_precision": 0.691866028708134, + "eval_precision_macro": 0.817746962215919, + "eval_pred_class_0": 16533, + "eval_pred_class_1": 3135, + "eval_predicted_binding_ratio": 0.1593959731543624, + "eval_recall": 0.6994517897452435, + "eval_recall_macro": 0.8205715519016554, + "eval_runtime": 0.2592, + "eval_samples_per_second": 628.928, + "eval_steps_per_second": 3.858, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2484 + }, + { + "epoch": 138.88888888888889, + "grad_norm": 12954.3583984375, + "learning_rate": 9.068887706579789e-07, + "loss": 0.2385, + "step": 2500 + }, + { + "epoch": 139.0, + "eval_accuracy": 0.904311572096807, + "eval_auc": 0.9325454122780963, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6969404186795491, + "eval_f1_macro": 0.8200635197304043, + "eval_loss": 0.25969693064689636, + "eval_pr_auc": 0.6954050242581626, + "eval_precision": 0.6960437439691219, + "eval_precision_macro": 0.819729100681946, + "eval_pred_class_0": 16559, + "eval_pred_class_1": 3109, + "eval_predicted_binding_ratio": 0.158074028879398, + "eval_recall": 0.6978394066430184, + "eval_recall_macro": 0.820399150415129, + "eval_runtime": 0.2657, + "eval_samples_per_second": 613.439, + "eval_steps_per_second": 3.763, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2502 + }, + { + "epoch": 140.0, + "eval_accuracy": 0.903853976001627, + "eval_auc": 0.9326487714170208, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6971977582065653, + "eval_f1_macro": 0.8200261554019741, + "eval_loss": 0.2596379518508911, + "eval_pr_auc": 0.6956168134976223, + "eval_precision": 0.6924300254452926, + "eval_precision_macro": 0.8182556808417458, + "eval_pred_class_0": 16524, + "eval_pred_class_1": 3144, + "eval_predicted_binding_ratio": 0.15985356924954242, + "eval_recall": 0.7020316027088036, + "eval_recall_macro": 0.8218312779041694, + "eval_runtime": 0.2604, + "eval_samples_per_second": 626.028, + "eval_steps_per_second": 3.841, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2520 + }, + { + "epoch": 141.0, + "eval_accuracy": 0.904311572096807, + "eval_auc": 0.9329799294265357, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6962556488056811, + "eval_f1_macro": 0.819733135205496, + "eval_loss": 0.25904589891433716, + "eval_pr_auc": 0.6970902220334548, + "eval_precision": 0.6969305331179322, + "eval_precision_macro": 0.8199852086334245, + "eval_pred_class_0": 16573, + "eval_pred_class_1": 3095, + "eval_predicted_binding_ratio": 0.15736221273134024, + "eval_recall": 0.6955820702999033, + "eval_recall_macro": 0.8194817455984336, + "eval_runtime": 0.2524, + "eval_samples_per_second": 645.771, + "eval_steps_per_second": 3.962, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2538 + }, + { + "epoch": 142.0, + "eval_accuracy": 0.9047691681919869, + "eval_auc": 0.9330630060376336, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994061948322902, + "eval_f1_macro": 0.8214143192859533, + "eval_loss": 0.2590080201625824, + "eval_pr_auc": 0.697179333543334, + "eval_precision": 0.6961661341853035, + "eval_precision_macro": 0.8202078705755396, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.7026765559496937, + "eval_recall_macro": 0.8226366421928706, + "eval_runtime": 0.2495, + "eval_samples_per_second": 653.345, + "eval_steps_per_second": 4.008, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2556 + }, + { + "epoch": 143.0, + "eval_accuracy": 0.9047691681919869, + "eval_auc": 0.9332962551076398, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991164658634538, + "eval_f1_macro": 0.8212745809731632, + "eval_loss": 0.25865858793258667, + "eval_pr_auc": 0.6981950669404262, + "eval_precision": 0.6965428937259923, + "eval_precision_macro": 0.8203156925109651, + "eval_pred_class_0": 16544, + "eval_pred_class_1": 3124, + "eval_predicted_binding_ratio": 0.15883668903803133, + "eval_recall": 0.7017091260883586, + "eval_recall_macro": 0.8222434687000011, + "eval_runtime": 0.2633, + "eval_samples_per_second": 619.149, + "eval_steps_per_second": 3.798, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2574 + }, + { + "epoch": 144.0, + "eval_accuracy": 0.9050233882448647, + "eval_auc": 0.9334749729859892, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995818591186876, + "eval_f1_macro": 0.8215887434369935, + "eval_loss": 0.2584296464920044, + "eval_pr_auc": 0.6986932637956595, + "eval_precision": 0.697786333012512, + "eval_precision_macro": 0.8209190259709409, + "eval_pred_class_0": 16551, + "eval_pred_class_1": 3117, + "eval_predicted_binding_ratio": 0.15848078096400245, + "eval_recall": 0.7013866494679136, + "eval_recall_macro": 0.8222633132653747, + "eval_runtime": 0.2769, + "eval_samples_per_second": 588.703, + "eval_steps_per_second": 3.612, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2592 + }, + { + "epoch": 145.0, + "eval_accuracy": 0.9050742322554403, + "eval_auc": 0.9336868786857826, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991136180499598, + "eval_f1_macro": 0.8213807805320277, + "eval_loss": 0.25803840160369873, + "eval_pr_auc": 0.699660351667112, + "eval_precision": 0.6987757731958762, + "eval_precision_macro": 0.8212545854629465, + "eval_pred_class_0": 16564, + "eval_pred_class_1": 3104, + "eval_predicted_binding_ratio": 0.15781980882652025, + "eval_recall": 0.6994517897452435, + "eval_recall_macro": 0.8215071467589017, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.163, + "eval_steps_per_second": 3.891, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2610 + }, + { + "epoch": 146.0, + "eval_accuracy": 0.9047183241814114, + "eval_auc": 0.9337996783486955, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995831997435076, + "eval_f1_macro": 0.8214817322060338, + "eval_loss": 0.2579362094402313, + "eval_pr_auc": 0.6999214829412527, + "eval_precision": 0.6955690149824674, + "eval_precision_macro": 0.8199882459220607, + "eval_pred_class_0": 16531, + "eval_pred_class_1": 3137, + "eval_predicted_binding_ratio": 0.15949766117551353, + "eval_recall": 0.7036439858110287, + "eval_recall_macro": 0.8229996352064741, + "eval_runtime": 0.2538, + "eval_samples_per_second": 642.317, + "eval_steps_per_second": 3.941, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2628 + }, + { + "epoch": 147.0, + "eval_accuracy": 0.9051759202765914, + "eval_auc": 0.9339161666287131, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.701647736362182, + "eval_f1_macro": 0.8226388900943448, + "eval_loss": 0.2578524649143219, + "eval_pr_auc": 0.7001081914566338, + "eval_precision": 0.6961904761904761, + "eval_precision_macro": 0.820610070399391, + "eval_pred_class_0": 16518, + "eval_pred_class_1": 3150, + "eval_predicted_binding_ratio": 0.16015863331299574, + "eval_recall": 0.7071912286359239, + "eval_recall_macro": 0.8247128956603896, + "eval_runtime": 0.2529, + "eval_samples_per_second": 644.43, + "eval_steps_per_second": 3.954, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2646 + }, + { + "epoch": 148.0, + "eval_accuracy": 0.9050233882448647, + "eval_auc": 0.9341703697689739, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995818591186876, + "eval_f1_macro": 0.8215887434369935, + "eval_loss": 0.25731131434440613, + "eval_pr_auc": 0.7012902373057504, + "eval_precision": 0.697786333012512, + "eval_precision_macro": 0.8209190259709409, + "eval_pred_class_0": 16551, + "eval_pred_class_1": 3117, + "eval_predicted_binding_ratio": 0.15848078096400245, + "eval_recall": 0.7013866494679136, + "eval_recall_macro": 0.8222633132653747, + "eval_runtime": 0.2512, + "eval_samples_per_second": 648.858, + "eval_steps_per_second": 3.981, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2664 + }, + { + "epoch": 149.0, + "eval_accuracy": 0.9052776082977425, + "eval_auc": 0.9343408636857047, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003377834968635, + "eval_f1_macro": 0.8220430425380088, + "eval_loss": 0.25706911087036133, + "eval_pr_auc": 0.7018656885391451, + "eval_precision": 0.6986521181001284, + "eval_precision_macro": 0.8214140242506442, + "eval_pred_class_0": 16552, + "eval_pred_class_1": 3116, + "eval_predicted_binding_ratio": 0.1584299369534269, + "eval_recall": 0.7020316027088036, + "eval_recall_macro": 0.8226763313236177, + "eval_runtime": 0.2513, + "eval_samples_per_second": 648.672, + "eval_steps_per_second": 3.98, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2682 + }, + { + "epoch": 150.0, + "eval_accuracy": 0.9054301403294692, + "eval_auc": 0.9345392996069414, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7006758931445124, + "eval_f1_macro": 0.822259931959612, + "eval_loss": 0.2567782402038574, + "eval_pr_auc": 0.7026441101697649, + "eval_precision": 0.6993254095727593, + "eval_precision_macro": 0.8217557280421937, + "eval_pred_class_0": 16555, + "eval_pred_class_1": 3113, + "eval_predicted_binding_ratio": 0.15827740492170023, + "eval_recall": 0.7020316027088036, + "eval_recall_macro": 0.8227668727614157, + "eval_runtime": 0.2591, + "eval_samples_per_second": 629.188, + "eval_steps_per_second": 3.86, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2700 + }, + { + "epoch": 151.0, + "eval_accuracy": 0.9051250762660159, + "eval_auc": 0.934685296823797, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003853564547207, + "eval_f1_macro": 0.8220121780461352, + "eval_loss": 0.25664329528808594, + "eval_pr_auc": 0.7029644506395569, + "eval_precision": 0.6974736168851935, + "eval_precision_macro": 0.8209271234175075, + "eval_pred_class_0": 16541, + "eval_pred_class_1": 3127, + "eval_predicted_binding_ratio": 0.158989221069758, + "eval_recall": 0.7033215091905837, + "eval_recall_macro": 0.8231100212096456, + "eval_runtime": 0.2408, + "eval_samples_per_second": 676.856, + "eval_steps_per_second": 4.152, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2718 + }, + { + "epoch": 152.0, + "eval_accuracy": 0.9055826723611958, + "eval_auc": 0.9349076649599691, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7008216529724505, + "eval_f1_macro": 0.8223840221758023, + "eval_loss": 0.25626465678215027, + "eval_pr_auc": 0.7040942732775656, + "eval_precision": 0.7002575660012879, + "eval_precision_macro": 0.82217322207805, + "eval_pred_class_0": 16562, + "eval_pred_class_1": 3106, + "eval_predicted_binding_ratio": 0.15792149684767134, + "eval_recall": 0.7013866494679136, + "eval_recall_macro": 0.8225952985373008, + "eval_runtime": 0.2564, + "eval_samples_per_second": 635.699, + "eval_steps_per_second": 3.9, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2736 + }, + { + "epoch": 153.0, + "eval_accuracy": 0.905226764287167, + "eval_auc": 0.9350594919437002, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7014734144778988, + "eval_f1_macro": 0.8225728005545544, + "eval_loss": 0.2562600076198578, + "eval_pr_auc": 0.7046022469135804, + "eval_precision": 0.6967865097041044, + "eval_precision_macro": 0.8208289583316286, + "eval_pred_class_0": 16525, + "eval_pred_class_1": 3143, + "eval_predicted_binding_ratio": 0.15980272523896685, + "eval_recall": 0.7062237987745889, + "eval_recall_macro": 0.8243499026467862, + "eval_runtime": 0.2358, + "eval_samples_per_second": 691.279, + "eval_steps_per_second": 4.241, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2754 + }, + { + "epoch": 154.0, + "eval_accuracy": 0.9054809843400448, + "eval_auc": 0.9352728672508359, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7010773436243769, + "eval_f1_macro": 0.8224715159707777, + "eval_loss": 0.25581732392311096, + "eval_pr_auc": 0.7056629719926804, + "eval_precision": 0.6991661321359846, + "eval_precision_macro": 0.821758292654095, + "eval_pred_class_0": 16550, + "eval_pred_class_1": 3118, + "eval_predicted_binding_ratio": 0.15853162497457798, + "eval_recall": 0.7029990325701386, + "eval_recall_macro": 0.8231902267335512, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.5, + "eval_steps_per_second": 3.844, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2772 + }, + { + "epoch": 155.0, + "eval_accuracy": 0.9057860484034981, + "eval_auc": 0.9354356530283926, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7025204687750842, + "eval_f1_macro": 0.8232752161134611, + "eval_loss": 0.25560733675956726, + "eval_pr_auc": 0.7062858637533224, + "eval_precision": 0.6994884910485933, + "eval_precision_macro": 0.8221444873622652, + "eval_pred_class_0": 16540, + "eval_pred_class_1": 3128, + "eval_predicted_binding_ratio": 0.15904006508033353, + "eval_recall": 0.7055788455336988, + "eval_recall_macro": 0.8244197722567993, + "eval_runtime": 0.2671, + "eval_samples_per_second": 610.27, + "eval_steps_per_second": 3.744, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2790 + }, + { + "epoch": 156.0, + "eval_accuracy": 0.9058877364246491, + "eval_auc": 0.9356189159837551, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7028415475999358, + "eval_f1_macro": 0.8234659606184656, + "eval_loss": 0.2553412616252899, + "eval_pr_auc": 0.7070636650718337, + "eval_precision": 0.6998081841432225, + "eval_precision_macro": 0.8223345636556499, + "eval_pred_class_0": 16540, + "eval_pred_class_1": 3128, + "eval_predicted_binding_ratio": 0.15904006508033353, + "eval_recall": 0.7059013221541438, + "eval_recall_macro": 0.8246111910462879, + "eval_runtime": 0.2539, + "eval_samples_per_second": 642.017, + "eval_steps_per_second": 3.939, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2808 + }, + { + "epoch": 157.0, + "eval_accuracy": 0.9061419564775269, + "eval_auc": 0.9357918624902231, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7031199742682535, + "eval_f1_macro": 0.8236899466727462, + "eval_loss": 0.25509998202323914, + "eval_pr_auc": 0.7077594222055618, + "eval_precision": 0.7013153673403913, + "eval_precision_macro": 0.8230158493399438, + "eval_pred_class_0": 16551, + "eval_pred_class_1": 3117, + "eval_predicted_binding_ratio": 0.15848078096400245, + "eval_recall": 0.7049338922928088, + "eval_recall_macro": 0.8243689199497484, + "eval_runtime": 0.255, + "eval_samples_per_second": 639.309, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2826 + }, + { + "epoch": 158.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9359727409833409, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7034593724859212, + "eval_f1_macro": 0.8239074586532139, + "eval_loss": 0.2548539340496063, + "eval_pr_auc": 0.7084539339673516, + "eval_precision": 0.7019910083493899, + "eval_precision_macro": 0.8233586792381238, + "eval_pred_class_0": 16554, + "eval_pred_class_1": 3114, + "eval_predicted_binding_ratio": 0.1583282489322758, + "eval_recall": 0.7049338922928088, + "eval_recall_macro": 0.8244594613875464, + "eval_runtime": 0.2566, + "eval_samples_per_second": 635.242, + "eval_steps_per_second": 3.897, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2844 + }, + { + "epoch": 159.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9360812778117108, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7043380822794941, + "eval_f1_macro": 0.8242594639388645, + "eval_loss": 0.2548294961452484, + "eval_pr_auc": 0.708809734771939, + "eval_precision": 0.6993006993006993, + "eval_precision_macro": 0.822383674913635, + "eval_pred_class_0": 16522, + "eval_pred_class_1": 3146, + "eval_predicted_binding_ratio": 0.1599552572706935, + "eval_recall": 0.709448564979039, + "eval_recall_macro": 0.8261735491038733, + "eval_runtime": 0.27, + "eval_samples_per_second": 603.758, + "eval_steps_per_second": 3.704, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2862 + }, + { + "epoch": 160.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9362697476540152, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7040308334671591, + "eval_f1_macro": 0.82418310527748, + "eval_loss": 0.25451889634132385, + "eval_pr_auc": 0.70979034812957, + "eval_precision": 0.7012156110044786, + "eval_precision_macro": 0.8231322886360805, + "eval_pred_class_0": 16542, + "eval_pred_class_1": 3126, + "eval_predicted_binding_ratio": 0.15893837705918243, + "eval_recall": 0.7068687520154788, + "eval_recall_macro": 0.8252458083732854, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.302, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2880 + }, + { + "epoch": 161.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9364340127714132, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7047863796980405, + "eval_f1_macro": 0.824637224883753, + "eval_loss": 0.25425252318382263, + "eval_pr_auc": 0.7104452483887299, + "eval_precision": 0.70208, + "eval_precision_macro": 0.8236265925164723, + "eval_pred_class_0": 16543, + "eval_pred_class_1": 3125, + "eval_predicted_binding_ratio": 0.15888753304860687, + "eval_recall": 0.7075137052563689, + "eval_recall_macro": 0.8256588264315284, + "eval_runtime": 0.2609, + "eval_samples_per_second": 624.767, + "eval_steps_per_second": 3.833, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2898 + }, + { + "epoch": 162.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9366581522223957, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7045965927354548, + "eval_f1_macro": 0.8245456841795291, + "eval_loss": 0.25392982363700867, + "eval_pr_auc": 0.7114160919166963, + "eval_precision": 0.7023389939122077, + "eval_precision_macro": 0.8237022823552698, + "eval_pred_class_0": 16547, + "eval_pred_class_1": 3121, + "eval_predicted_binding_ratio": 0.15868415700630464, + "eval_recall": 0.7068687520154788, + "eval_recall_macro": 0.8253967107696154, + "eval_runtime": 0.2487, + "eval_samples_per_second": 655.473, + "eval_steps_per_second": 4.021, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2916 + }, + { + "epoch": 163.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.936698911928028, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7069544364508393, + "eval_f1_macro": 0.8257724934589374, + "eval_loss": 0.25399070978164673, + "eval_pr_auc": 0.7113652786898896, + "eval_precision": 0.7010145846544071, + "eval_precision_macro": 0.8235604593370134, + "eval_pred_class_0": 16514, + "eval_pred_class_1": 3154, + "eval_predicted_binding_ratio": 0.16036200935529796, + "eval_recall": 0.7129958078039342, + "eval_recall_macro": 0.8280377119541189, + "eval_runtime": 0.2519, + "eval_samples_per_second": 647.019, + "eval_steps_per_second": 3.969, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2934 + }, + { + "epoch": 164.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9369248810888143, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7062780269058296, + "eval_f1_macro": 0.8254283885284618, + "eval_loss": 0.25362086296081543, + "eval_pr_auc": 0.7123203296069245, + "eval_precision": 0.7015590200445434, + "eval_precision_macro": 0.8236690712930735, + "eval_pred_class_0": 16525, + "eval_pred_class_1": 3143, + "eval_predicted_binding_ratio": 0.15980272523896685, + "eval_recall": 0.7110609480812641, + "eval_recall_macro": 0.827221184489114, + "eval_runtime": 0.261, + "eval_samples_per_second": 624.444, + "eval_steps_per_second": 3.831, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2952 + }, + { + "epoch": 165.0, + "eval_accuracy": 0.9072096806996136, + "eval_auc": 0.9371562809840187, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7070155723230053, + "eval_f1_macro": 0.8259456391835222, + "eval_loss": 0.2532632350921631, + "eval_pr_auc": 0.7133697274093473, + "eval_precision": 0.7039641943734015, + "eval_precision_macro": 0.8248055554696512, + "eval_pred_class_0": 16540, + "eval_pred_class_1": 3128, + "eval_predicted_binding_ratio": 0.15904006508033353, + "eval_recall": 0.710093518219929, + "eval_recall_macro": 0.8270996353096386, + "eval_runtime": 0.2545, + "eval_samples_per_second": 640.417, + "eval_steps_per_second": 3.929, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2970 + }, + { + "epoch": 166.0, + "eval_accuracy": 0.9071588366890381, + "eval_auc": 0.9372476886142239, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7072779737095223, + "eval_f1_macro": 0.8260542385315996, + "eval_loss": 0.2531469762325287, + "eval_pr_auc": 0.7136781835058345, + "eval_precision": 0.7032196365954734, + "eval_precision_macro": 0.8245394656269969, + "eval_pred_class_0": 16531, + "eval_pred_class_1": 3137, + "eval_predicted_binding_ratio": 0.15949766117551353, + "eval_recall": 0.7113834247017091, + "eval_recall_macro": 0.8275936861541986, + "eval_runtime": 0.2653, + "eval_samples_per_second": 614.378, + "eval_steps_per_second": 3.769, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2988 + }, + { + "epoch": 166.66666666666666, + "grad_norm": 14056.4111328125, + "learning_rate": 8.432618494003656e-07, + "loss": 0.2279, + "step": 3000 + }, + { + "epoch": 167.0, + "eval_accuracy": 0.9072605247101891, + "eval_auc": 0.9373718655684177, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7077859660365268, + "eval_f1_macro": 0.8263351175441593, + "eval_loss": 0.25298377871513367, + "eval_pr_auc": 0.7140728736694715, + "eval_precision": 0.7032792104425343, + "eval_precision_macro": 0.8246534613355044, + "eval_pred_class_0": 16527, + "eval_pred_class_1": 3141, + "eval_predicted_binding_ratio": 0.15970103721781573, + "eval_recall": 0.7123508545630441, + "eval_recall_macro": 0.8280472206056, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.943, + "eval_steps_per_second": 3.773, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3006 + }, + { + "epoch": 168.0, + "eval_accuracy": 0.9075655887736425, + "eval_auc": 0.9374749229998747, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7094918504314478, + "eval_f1_macro": 0.8272654245808608, + "eval_loss": 0.25285741686820984, + "eval_pr_auc": 0.7143683695359583, + "eval_precision": 0.7031992397846056, + "eval_precision_macro": 0.8249204363177162, + "eval_pred_class_0": 16511, + "eval_pred_class_1": 3157, + "eval_predicted_binding_ratio": 0.16051454138702462, + "eval_recall": 0.7158980973879394, + "eval_recall_macro": 0.8296699396217176, + "eval_runtime": 0.2383, + "eval_samples_per_second": 683.908, + "eval_steps_per_second": 4.196, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3024 + }, + { + "epoch": 169.0, + "eval_accuracy": 0.9076672767947935, + "eval_auc": 0.9377269168628721, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7087876844130853, + "eval_f1_macro": 0.8269618180373584, + "eval_loss": 0.25237372517585754, + "eval_pr_auc": 0.7154735567799367, + "eval_precision": 0.7049441786283892, + "eval_precision_macro": 0.8255259815297635, + "eval_pred_class_0": 16533, + "eval_pred_class_1": 3135, + "eval_predicted_binding_ratio": 0.1593959731543624, + "eval_recall": 0.7126733311834892, + "eval_recall_macro": 0.8284197222706846, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.364, + "eval_steps_per_second": 3.843, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3042 + }, + { + "epoch": 170.0, + "eval_accuracy": 0.9079214968476713, + "eval_auc": 0.9378333125414714, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7099151049175076, + "eval_f1_macro": 0.8275952704051472, + "eval_loss": 0.2522483766078949, + "eval_pr_auc": 0.7157732693940176, + "eval_precision": 0.7052832590706556, + "eval_precision_macro": 0.8258656401852128, + "eval_pred_class_0": 16526, + "eval_pred_class_1": 3142, + "eval_predicted_binding_ratio": 0.1597518812283913, + "eval_recall": 0.7146081909061593, + "eval_recall_macro": 0.8293569716527538, + "eval_runtime": 0.2212, + "eval_samples_per_second": 736.855, + "eval_steps_per_second": 4.521, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3060 + }, + { + "epoch": 171.0, + "eval_accuracy": 0.9079214968476713, + "eval_auc": 0.9379969158489405, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7098221438871976, + "eval_f1_macro": 0.8275504434498686, + "eval_loss": 0.25200438499450684, + "eval_pr_auc": 0.7163721445757975, + "eval_precision": 0.7054140127388535, + "eval_precision_macro": 0.8259040054013725, + "eval_pred_class_0": 16528, + "eval_pred_class_1": 3140, + "eval_predicted_binding_ratio": 0.1596501932072402, + "eval_recall": 0.7142857142857143, + "eval_recall_macro": 0.8292259138217972, + "eval_runtime": 0.2637, + "eval_samples_per_second": 618.089, + "eval_steps_per_second": 3.792, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3078 + }, + { + "epoch": 172.0, + "eval_accuracy": 0.9083282489322758, + "eval_auc": 0.9382153715205318, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7102683593122289, + "eval_f1_macro": 0.8279092226905722, + "eval_loss": 0.25164341926574707, + "eval_pr_auc": 0.7173810220120935, + "eval_precision": 0.7078795643818065, + "eval_precision_macro": 0.8270148456503497, + "eval_pred_class_0": 16546, + "eval_pred_class_1": 3122, + "eval_predicted_binding_ratio": 0.1587350010168802, + "eval_recall": 0.7126733311834892, + "eval_recall_macro": 0.8288120685011429, + "eval_runtime": 0.2387, + "eval_samples_per_second": 682.942, + "eval_steps_per_second": 4.19, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3096 + }, + { + "epoch": 173.0, + "eval_accuracy": 0.9084807809640024, + "eval_auc": 0.9383315970230777, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.711168164313222, + "eval_f1_macro": 0.8283970352740591, + "eval_loss": 0.2515103816986084, + "eval_pr_auc": 0.7177908159595219, + "eval_precision": 0.7077610986905142, + "eval_precision_macro": 0.8271223707155178, + "eval_pred_class_0": 16537, + "eval_pred_class_1": 3131, + "eval_predicted_binding_ratio": 0.15919259711206019, + "eval_recall": 0.7146081909061593, + "eval_recall_macro": 0.8296889569246799, + "eval_runtime": 0.2615, + "eval_samples_per_second": 623.31, + "eval_steps_per_second": 3.824, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3114 + }, + { + "epoch": 174.0, + "eval_accuracy": 0.9088366890380313, + "eval_auc": 0.9384487568455233, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7133493205435651, + "eval_f1_macro": 0.8295745121505045, + "eval_loss": 0.25142860412597656, + "eval_pr_auc": 0.7181535155231535, + "eval_precision": 0.7073557387444515, + "eval_precision_macro": 0.8273365831908039, + "eval_pred_class_0": 16514, + "eval_pred_class_1": 3154, + "eval_predicted_binding_ratio": 0.16036200935529796, + "eval_recall": 0.7194453402128346, + "eval_recall_macro": 0.8318660877438894, + "eval_runtime": 0.27, + "eval_samples_per_second": 603.694, + "eval_steps_per_second": 3.704, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3132 + }, + { + "epoch": 175.0, + "eval_accuracy": 0.9090400650803335, + "eval_auc": 0.9386484385266265, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7135308246597278, + "eval_f1_macro": 0.8297338931856857, + "eval_loss": 0.2510823905467987, + "eval_pr_auc": 0.7190368898285651, + "eval_precision": 0.7086513994910941, + "eval_precision_macro": 0.8279095777411898, + "eval_pred_class_0": 16524, + "eval_pred_class_1": 3144, + "eval_predicted_binding_ratio": 0.15985356924954242, + "eval_recall": 0.7184779103514995, + "eval_recall_macro": 0.8315936361680839, + "eval_runtime": 0.2642, + "eval_samples_per_second": 616.851, + "eval_steps_per_second": 3.784, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3150 + }, + { + "epoch": 176.0, + "eval_accuracy": 0.9090909090909091, + "eval_auc": 0.9388042461024309, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7136450992953235, + "eval_f1_macro": 0.8298069567551197, + "eval_loss": 0.25084683299064636, + "eval_pr_auc": 0.7196986972872739, + "eval_precision": 0.7088768692332167, + "eval_precision_macro": 0.8280239111672891, + "eval_pred_class_0": 16525, + "eval_pred_class_1": 3143, + "eval_predicted_binding_ratio": 0.15980272523896685, + "eval_recall": 0.7184779103514995, + "eval_recall_macro": 0.83162381664735, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.711, + "eval_steps_per_second": 3.869, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3168 + }, + { + "epoch": 177.0, + "eval_accuracy": 0.9093451291437868, + "eval_auc": 0.9389100189010969, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7144001281435207, + "eval_f1_macro": 0.8302608322100373, + "eval_loss": 0.25058600306510925, + "eval_pr_auc": 0.7201232901620662, + "eval_precision": 0.7097390197326544, + "eval_precision_macro": 0.8285170954889824, + "eval_pred_class_0": 16526, + "eval_pred_class_1": 3142, + "eval_predicted_binding_ratio": 0.1597518812283913, + "eval_recall": 0.7191228635923895, + "eval_recall_macro": 0.832036834705593, + "eval_runtime": 0.2429, + "eval_samples_per_second": 671.088, + "eval_steps_per_second": 4.117, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3186 + }, + { + "epoch": 178.0, + "eval_accuracy": 0.9091417531014846, + "eval_auc": 0.9390935349014322, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.713392141138733, + "eval_f1_macro": 0.8297029283682245, + "eval_loss": 0.2502758800983429, + "eval_pr_auc": 0.7210120001490467, + "eval_precision": 0.7096362476068921, + "eval_precision_macro": 0.8282970157836081, + "eval_pred_class_0": 16534, + "eval_pred_class_1": 3134, + "eval_predicted_binding_ratio": 0.15934512914378687, + "eval_recall": 0.7171880038697195, + "eval_recall_macro": 0.83112976580279, + "eval_runtime": 0.27, + "eval_samples_per_second": 603.763, + "eval_steps_per_second": 3.704, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3204 + }, + { + "epoch": 179.0, + "eval_accuracy": 0.9097518812283913, + "eval_auc": 0.939160767004228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7162270183852918, + "eval_f1_macro": 0.8312854205617097, + "eval_loss": 0.2502012550830841, + "eval_pr_auc": 0.7211443774602971, + "eval_precision": 0.7102092580849715, + "eval_precision_macro": 0.8290358389250096, + "eval_pred_class_0": 16514, + "eval_pred_class_1": 3154, + "eval_predicted_binding_ratio": 0.16036200935529796, + "eval_recall": 0.7223476297968398, + "eval_recall_macro": 0.8335888568492861, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.383, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3222 + }, + { + "epoch": 180.0, + "eval_accuracy": 0.9099552572706935, + "eval_auc": 0.9392979952395233, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7171378374061651, + "eval_f1_macro": 0.8317964319305957, + "eval_loss": 0.2500424385070801, + "eval_pr_auc": 0.7216701546304439, + "eval_precision": 0.7104430379746836, + "eval_precision_macro": 0.8292946956289701, + "eval_pred_class_0": 16508, + "eval_pred_class_1": 3160, + "eval_predicted_binding_ratio": 0.16066707341875128, + "eval_recall": 0.7239600128990649, + "eval_recall_macro": 0.8343648679211326, + "eval_runtime": 0.2706, + "eval_samples_per_second": 602.263, + "eval_steps_per_second": 3.695, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3240 + }, + { + "epoch": 181.0, + "eval_accuracy": 0.910006101281269, + "eval_auc": 0.9394494621207929, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7173427020121367, + "eval_f1_macro": 0.8319131723763289, + "eval_loss": 0.24980410933494568, + "eval_pr_auc": 0.722382187322872, + "eval_precision": 0.7105346409364125, + "eval_precision_macro": 0.8293692166334694, + "eval_pred_class_0": 16507, + "eval_pred_class_1": 3161, + "eval_predicted_binding_ratio": 0.1607179174293268, + "eval_recall": 0.7242824895195098, + "eval_recall_macro": 0.8345261062313551, + "eval_runtime": 0.2696, + "eval_samples_per_second": 604.516, + "eval_steps_per_second": 3.709, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3258 + }, + { + "epoch": 182.0, + "eval_accuracy": 0.9102094773235713, + "eval_auc": 0.9395796926893379, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7179814755669115, + "eval_f1_macro": 0.8322930296138966, + "eval_loss": 0.2495713084936142, + "eval_pr_auc": 0.7229232947500771, + "eval_precision": 0.7111673521037646, + "eval_precision_macro": 0.8297461525769929, + "eval_pred_class_0": 16507, + "eval_pred_class_1": 3161, + "eval_predicted_binding_ratio": 0.1607179174293268, + "eval_recall": 0.7249274427603999, + "eval_recall_macro": 0.8349089438103321, + "eval_runtime": 0.2615, + "eval_samples_per_second": 623.403, + "eval_steps_per_second": 3.825, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3276 + }, + { + "epoch": 183.0, + "eval_accuracy": 0.910362009355298, + "eval_auc": 0.9396445500623882, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7187749242303397, + "eval_f1_macro": 0.8327294647159501, + "eval_loss": 0.2494269460439682, + "eval_pr_auc": 0.7232071190390562, + "eval_precision": 0.7111742424242424, + "eval_precision_macro": 0.8298901515151516, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.726539825862625, + "eval_recall_macro": 0.8356547744029127, + "eval_runtime": 0.2535, + "eval_samples_per_second": 643.015, + "eval_steps_per_second": 3.945, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3294 + }, + { + "epoch": 184.0, + "eval_accuracy": 0.9106670734187513, + "eval_auc": 0.9397990826808293, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7191945021575835, + "eval_f1_macro": 0.8330396163256251, + "eval_loss": 0.24917152523994446, + "eval_pr_auc": 0.7237798760580164, + "eval_precision": 0.7129277566539924, + "eval_precision_macro": 0.830694740730097, + "eval_pred_class_0": 16512, + "eval_pred_class_1": 3156, + "eval_predicted_binding_ratio": 0.16046369737644905, + "eval_recall": 0.7255723960012899, + "eval_recall_macro": 0.8354426837856392, + "eval_runtime": 0.2605, + "eval_samples_per_second": 625.837, + "eval_steps_per_second": 3.839, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3312 + }, + { + "epoch": 185.0, + "eval_accuracy": 0.9110738255033557, + "eval_auc": 0.9399604475135382, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7199359487590072, + "eval_f1_macro": 0.8335408491792982, + "eval_loss": 0.24888525903224945, + "eval_pr_auc": 0.7244599264333298, + "eval_precision": 0.7150127226463104, + "eval_precision_macro": 0.8316954196625403, + "eval_pred_class_0": 16524, + "eval_pred_class_1": 3144, + "eval_predicted_binding_ratio": 0.15985356924954242, + "eval_recall": 0.7249274427603999, + "eval_recall_macro": 0.8354220119578544, + "eval_runtime": 0.2549, + "eval_samples_per_second": 639.342, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3330 + }, + { + "epoch": 186.0, + "eval_accuracy": 0.9109721374822046, + "eval_auc": 0.9401063571379032, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7194359878224643, + "eval_f1_macro": 0.8332638467590944, + "eval_loss": 0.2486649453639984, + "eval_pr_auc": 0.7249907062273525, + "eval_precision": 0.714968152866242, + "eval_precision_macro": 0.8315886262879129, + "eval_pred_class_0": 16528, + "eval_pred_class_1": 3140, + "eval_predicted_binding_ratio": 0.1596501932072402, + "eval_recall": 0.7239600128990649, + "eval_recall_macro": 0.8349684775064528, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.8, + "eval_steps_per_second": 3.944, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3348 + }, + { + "epoch": 187.0, + "eval_accuracy": 0.9112263575350824, + "eval_auc": 0.9402670406956853, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7203715566944267, + "eval_f1_macro": 0.8338047799185901, + "eval_loss": 0.24845102429389954, + "eval_pr_auc": 0.7255585294747666, + "eval_precision": 0.7155583837098314, + "eval_precision_macro": 0.832000069313312, + "eval_pred_class_0": 16525, + "eval_pred_class_1": 3143, + "eval_predicted_binding_ratio": 0.15980272523896685, + "eval_recall": 0.7252499193808449, + "eval_recall_macro": 0.8356436112266088, + "eval_runtime": 0.2397, + "eval_samples_per_second": 680.081, + "eval_steps_per_second": 4.172, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3366 + }, + { + "epoch": 188.0, + "eval_accuracy": 0.9114805775879601, + "eval_auc": 0.9404174175370716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7215736446505677, + "eval_f1_macro": 0.8344742146415792, + "eval_loss": 0.24825866520404816, + "eval_pr_auc": 0.7262464197023197, + "eval_precision": 0.7157360406091371, + "eval_precision_macro": 0.8322867657635175, + "eval_pred_class_0": 16516, + "eval_pred_class_1": 3152, + "eval_predicted_binding_ratio": 0.16026032133414683, + "eval_recall": 0.72750725572396, + "eval_recall_macro": 0.8367119184396343, + "eval_runtime": 0.2593, + "eval_samples_per_second": 628.496, + "eval_steps_per_second": 3.856, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3384 + }, + { + "epoch": 189.0, + "eval_accuracy": 0.9114297335773845, + "eval_auc": 0.9404972726910148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7223461906279885, + "eval_f1_macro": 0.8348286515416876, + "eval_loss": 0.24819281697273254, + "eval_pr_auc": 0.7264794032375063, + "eval_precision": 0.7141506460762685, + "eval_precision_macro": 0.8317646228259488, + "eval_pred_class_0": 16495, + "eval_pred_class_1": 3173, + "eval_predicted_binding_ratio": 0.16132804555623348, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.8379923162699333, + "eval_runtime": 0.2478, + "eval_samples_per_second": 657.696, + "eval_steps_per_second": 4.035, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3402 + }, + { + "epoch": 190.0, + "eval_accuracy": 0.9116839536302623, + "eval_auc": 0.940656301723974, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.722922316158877, + "eval_f1_macro": 0.8351963018783921, + "eval_loss": 0.24791164696216583, + "eval_pr_auc": 0.7271914277283732, + "eval_precision": 0.7152777777777778, + "eval_precision_macro": 0.8323358585858586, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.8381432186662634, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.211, + "eval_steps_per_second": 3.866, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3420 + }, + { + "epoch": 191.0, + "eval_accuracy": 0.9118873296725646, + "eval_auc": 0.9407912914845091, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7233838786911413, + "eval_f1_macro": 0.8354907358742514, + "eval_loss": 0.24768619239330292, + "eval_pr_auc": 0.727892471269696, + "eval_precision": 0.7161820480404552, + "eval_precision_macro": 0.8327941262984632, + "eval_pred_class_0": 16504, + "eval_pred_class_1": 3164, + "eval_predicted_binding_ratio": 0.1608704494610535, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.8382639405833274, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.424, + "eval_steps_per_second": 3.935, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3438 + }, + { + "epoch": 192.0, + "eval_accuracy": 0.9119381736831401, + "eval_auc": 0.9409245683252279, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7240280433397068, + "eval_f1_macro": 0.8358192243316804, + "eval_loss": 0.2475385069847107, + "eval_pr_auc": 0.7283869598485145, + "eval_precision": 0.7155905511811024, + "eval_precision_macro": 0.832663401462133, + "eval_pred_class_0": 16493, + "eval_pred_class_1": 3175, + "eval_predicted_binding_ratio": 0.16142973357738458, + "eval_recall": 0.7326668816510803, + "eval_recall_macro": 0.8390804680483324, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.682, + "eval_steps_per_second": 3.943, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3456 + }, + { + "epoch": 193.0, + "eval_accuracy": 0.9120398617042912, + "eval_auc": 0.9410766288889338, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7232885476647473, + "eval_f1_macro": 0.8354987049773379, + "eval_loss": 0.24721089005470276, + "eval_pr_auc": 0.7290279509427341, + "eval_precision": 0.7175499841320215, + "eval_precision_macro": 0.8333466455139735, + "eval_pred_class_0": 16517, + "eval_pred_class_1": 3151, + "eval_predicted_binding_ratio": 0.16020947732357127, + "eval_recall": 0.7291196388261851, + "eval_recall_macro": 0.837699192866343, + "eval_runtime": 0.2594, + "eval_samples_per_second": 628.255, + "eval_steps_per_second": 3.854, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3474 + }, + { + "epoch": 194.0, + "eval_accuracy": 0.9122432377465934, + "eval_auc": 0.9411616325348253, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7239283429302623, + "eval_f1_macro": 0.8358790547924193, + "eval_loss": 0.2470986247062683, + "eval_pr_auc": 0.7293209780794321, + "eval_precision": 0.7181847032688036, + "eval_precision_macro": 0.8337245487646312, + "eval_pred_class_0": 16517, + "eval_pred_class_1": 3151, + "eval_predicted_binding_ratio": 0.16020947732357127, + "eval_recall": 0.7297645920670751, + "eval_recall_macro": 0.83808203044532, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.922, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3492 + }, + { + "epoch": 194.44444444444446, + "grad_norm": 15854.8017578125, + "learning_rate": 7.667662546617938e-07, + "loss": 0.2185, + "step": 3500 + }, + { + "epoch": 195.0, + "eval_accuracy": 0.9121923937360179, + "eval_auc": 0.9412192294636534, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7248685677871595, + "eval_f1_macro": 0.8363143165624445, + "eval_loss": 0.2470363825559616, + "eval_pr_auc": 0.7294473908430833, + "eval_precision": 0.7163098236775819, + "eval_precision_macro": 0.8331124670170592, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7336343115124153, + "eval_recall_macro": 0.839624543937532, + "eval_runtime": 0.2532, + "eval_samples_per_second": 643.796, + "eval_steps_per_second": 3.95, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3510 + }, + { + "epoch": 196.0, + "eval_accuracy": 0.9123449257677445, + "eval_auc": 0.9413068608842632, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7252151737328658, + "eval_f1_macro": 0.8365353589310388, + "eval_loss": 0.2468923032283783, + "eval_pr_auc": 0.7297206006779651, + "eval_precision": 0.7169870784746297, + "eval_precision_macro": 0.8334556489675362, + "eval_pred_class_0": 16495, + "eval_pred_class_1": 3173, + "eval_predicted_binding_ratio": 0.16132804555623348, + "eval_recall": 0.7336343115124153, + "eval_recall_macro": 0.83971508537533, + "eval_runtime": 0.2673, + "eval_samples_per_second": 609.86, + "eval_steps_per_second": 3.741, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3528 + }, + { + "epoch": 197.0, + "eval_accuracy": 0.9124466137888957, + "eval_auc": 0.9415050729580239, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7246562200191877, + "eval_f1_macro": 0.8363018721763311, + "eval_loss": 0.2465437948703766, + "eval_pr_auc": 0.7306168212570879, + "eval_precision": 0.7186806216301934, + "eval_precision_macro": 0.8340602623742853, + "eval_pred_class_0": 16515, + "eval_pred_class_1": 3153, + "eval_predicted_binding_ratio": 0.1603111653447224, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.8385959258552536, + "eval_runtime": 0.2512, + "eval_samples_per_second": 648.769, + "eval_steps_per_second": 3.98, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3546 + }, + { + "epoch": 198.0, + "eval_accuracy": 0.9127008338417735, + "eval_auc": 0.9416286951597772, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7254996003197443, + "eval_f1_macro": 0.836798347664482, + "eval_loss": 0.24635502696037292, + "eval_pr_auc": 0.7311574695224861, + "eval_precision": 0.7194039315155358, + "eval_precision_macro": 0.8345112185130059, + "eval_pred_class_0": 16514, + "eval_pred_class_1": 3154, + "eval_predicted_binding_ratio": 0.16036200935529796, + "eval_recall": 0.7316994517897453, + "eval_recall_macro": 0.8391400017444531, + "eval_runtime": 0.2567, + "eval_samples_per_second": 635.086, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3564 + }, + { + "epoch": 199.0, + "eval_accuracy": 0.9126499898311979, + "eval_auc": 0.941704083096699, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7255591054313099, + "eval_f1_macro": 0.8368090605158727, + "eval_loss": 0.24623039364814758, + "eval_pr_auc": 0.7315041353273113, + "eval_precision": 0.7188983855650523, + "eval_precision_macro": 0.8343113891602595, + "eval_pred_class_0": 16509, + "eval_pred_class_1": 3159, + "eval_predicted_binding_ratio": 0.16061622940817571, + "eval_recall": 0.7323444050306352, + "eval_recall_macro": 0.8393719369271001, + "eval_runtime": 0.2608, + "eval_samples_per_second": 625.096, + "eval_steps_per_second": 3.835, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3582 + }, + { + "epoch": 200.0, + "eval_accuracy": 0.912751677852349, + "eval_auc": 0.9417882886776758, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7266645428480408, + "eval_f1_macro": 0.8373778882187448, + "eval_loss": 0.24614199995994568, + "eval_pr_auc": 0.7317905290059212, + "eval_precision": 0.7179729304375196, + "eval_precision_macro": 0.8341244192542944, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7355691712350855, + "eval_recall_macro": 0.8407428761951972, + "eval_runtime": 0.2558, + "eval_samples_per_second": 637.19, + "eval_steps_per_second": 3.909, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3600 + }, + { + "epoch": 201.0, + "eval_accuracy": 0.9129042098840756, + "eval_auc": 0.9419125142943645, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7274463007159905, + "eval_f1_macro": 0.837808654578745, + "eval_loss": 0.2459731251001358, + "eval_pr_auc": 0.7322365639924645, + "eval_precision": 0.717964824120603, + "eval_precision_macro": 0.8342614705412528, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7371815543373106, + "eval_recall_macro": 0.8414887067877777, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.607, + "eval_steps_per_second": 3.924, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3618 + }, + { + "epoch": 202.0, + "eval_accuracy": 0.9131075859263779, + "eval_auc": 0.9421228725268236, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7264286857691692, + "eval_f1_macro": 0.8373900508237788, + "eval_loss": 0.2455640733242035, + "eval_pr_auc": 0.7332689412482398, + "eval_precision": 0.721233312142403, + "eval_precision_macro": 0.8354381062588301, + "eval_pred_class_0": 16522, + "eval_pred_class_1": 3146, + "eval_predicted_binding_ratio": 0.1599552572706935, + "eval_recall": 0.7316994517897453, + "eval_recall_macro": 0.8393814455785812, + "eval_runtime": 0.2535, + "eval_samples_per_second": 643.062, + "eval_steps_per_second": 3.945, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3636 + }, + { + "epoch": 203.0, + "eval_accuracy": 0.9132092739475289, + "eval_auc": 0.9422253168108461, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7266613290632506, + "eval_f1_macro": 0.8375381529725912, + "eval_loss": 0.24535632133483887, + "eval_pr_auc": 0.7337329229212076, + "eval_precision": 0.7216921119592875, + "eval_precision_macro": 0.8356705536799585, + "eval_pred_class_0": 16524, + "eval_pred_class_1": 3144, + "eval_predicted_binding_ratio": 0.15985356924954242, + "eval_recall": 0.7316994517897453, + "eval_recall_macro": 0.8394418065371132, + "eval_runtime": 0.2645, + "eval_samples_per_second": 616.363, + "eval_steps_per_second": 3.781, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3654 + }, + { + "epoch": 204.0, + "eval_accuracy": 0.9133109619686801, + "eval_auc": 0.9423872461284946, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7268066015061689, + "eval_f1_macro": 0.8376441226295008, + "eval_loss": 0.2451435625553131, + "eval_pr_auc": 0.7344416461934514, + "eval_precision": 0.7222929936305732, + "eval_precision_macro": 0.8359468356342605, + "eval_pred_class_0": 16528, + "eval_pred_class_1": 3140, + "eval_predicted_binding_ratio": 0.1596501932072402, + "eval_recall": 0.7313769751693002, + "eval_recall_macro": 0.8393711096646888, + "eval_runtime": 0.24, + "eval_samples_per_second": 679.026, + "eval_steps_per_second": 4.166, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3672 + }, + { + "epoch": 205.0, + "eval_accuracy": 0.9138194020744357, + "eval_auc": 0.9424586242758461, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7293629251157592, + "eval_f1_macro": 0.8390563302747484, + "eval_loss": 0.24504327774047852, + "eval_pr_auc": 0.7346240551024029, + "eval_precision": 0.7223276407337128, + "eval_precision_macro": 0.8364152440915626, + "eval_pred_class_0": 16506, + "eval_pred_class_1": 3162, + "eval_predicted_binding_ratio": 0.16076876143990237, + "eval_recall": 0.7365366010964205, + "eval_recall_macro": 0.8417698397526527, + "eval_runtime": 0.2606, + "eval_samples_per_second": 625.36, + "eval_steps_per_second": 3.837, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3690 + }, + { + "epoch": 206.0, + "eval_accuracy": 0.91376855806386, + "eval_auc": 0.9426215171108914, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.729073482428115, + "eval_f1_macro": 0.8388988164347613, + "eval_loss": 0.2448122650384903, + "eval_pr_auc": 0.7352376772544322, + "eval_precision": 0.7223805001582779, + "eval_precision_macro": 0.8363855980711433, + "eval_pred_class_0": 16509, + "eval_pred_class_1": 3159, + "eval_predicted_binding_ratio": 0.16061622940817571, + "eval_recall": 0.7358916478555305, + "eval_recall_macro": 0.8414775436114739, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.41, + "eval_steps_per_second": 3.88, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3708 + }, + { + "epoch": 207.0, + "eval_accuracy": 0.9139210900955868, + "eval_auc": 0.9426929439207377, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7299409794225554, + "eval_f1_macro": 0.8393709493840633, + "eval_loss": 0.24476298689842224, + "eval_pr_auc": 0.7354485111055644, + "eval_precision": 0.7222222222222222, + "eval_precision_macro": 0.8364747474747475, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7378265075782006, + "eval_recall_macro": 0.8423544320350108, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.323, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3726 + }, + { + "epoch": 208.0, + "eval_accuracy": 0.9138702460850112, + "eval_auc": 0.9428956913390121, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.72896, + "eval_f1_macro": 0.8388800483588226, + "eval_loss": 0.24439764022827148, + "eval_pr_auc": 0.736435270049487, + "eval_precision": 0.723404255319149, + "eval_precision_macro": 0.8367914187788916, + "eval_pred_class_0": 16519, + "eval_pred_class_1": 3149, + "eval_predicted_binding_ratio": 0.16010778930242017, + "eval_recall": 0.7346017413737504, + "eval_recall_macro": 0.8410136732461799, + "eval_runtime": 0.2835, + "eval_samples_per_second": 574.921, + "eval_steps_per_second": 3.527, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3744 + }, + { + "epoch": 209.0, + "eval_accuracy": 0.9140736221273134, + "eval_auc": 0.9429205676063466, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.730805989168525, + "eval_f1_macro": 0.83984185960937, + "eval_loss": 0.24438706040382385, + "eval_pr_auc": 0.7364306271247294, + "eval_precision": 0.722064841045011, + "eval_precision_macro": 0.8365645289452815, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7397613673008707, + "eval_recall_macro": 0.8432313204585479, + "eval_runtime": 0.2624, + "eval_samples_per_second": 621.222, + "eval_steps_per_second": 3.811, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3762 + }, + { + "epoch": 210.0, + "eval_accuracy": 0.9142769981696156, + "eval_auc": 0.9430619808161933, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7309288222151292, + "eval_f1_macro": 0.8399730291904192, + "eval_loss": 0.2441486269235611, + "eval_pr_auc": 0.7370811402900591, + "eval_precision": 0.7235387045813586, + "eval_precision_macro": 0.8371980622222068, + "eval_pred_class_0": 16503, + "eval_pred_class_1": 3165, + "eval_predicted_binding_ratio": 0.16092129347162903, + "eval_recall": 0.7384714608190907, + "eval_recall_macro": 0.842827811051786, + "eval_runtime": 0.2675, + "eval_samples_per_second": 609.291, + "eval_steps_per_second": 3.738, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3780 + }, + { + "epoch": 211.0, + "eval_accuracy": 0.9146329062436445, + "eval_auc": 0.9431815542983806, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7325155328978812, + "eval_f1_macro": 0.8408637738901821, + "eval_loss": 0.24397221207618713, + "eval_pr_auc": 0.7374915342644908, + "eval_precision": 0.7238664987405542, + "eval_precision_macro": 0.8376184300639468, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7413737504030957, + "eval_recall_macro": 0.8442185948852564, + "eval_runtime": 0.264, + "eval_samples_per_second": 617.425, + "eval_steps_per_second": 3.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3798 + }, + { + "epoch": 212.0, + "eval_accuracy": 0.9144295302013423, + "eval_auc": 0.9432977798009264, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7319636884854276, + "eval_f1_macro": 0.8405258137499286, + "eval_loss": 0.24377743899822235, + "eval_pr_auc": 0.7380125440447487, + "eval_precision": 0.7230962869729389, + "eval_precision_macro": 0.837200053735105, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7410512737826508, + "eval_recall_macro": 0.8439668151372359, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.044, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3816 + }, + { + "epoch": 213.0, + "eval_accuracy": 0.9144295302013423, + "eval_auc": 0.9434619086633391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7311072056239016, + "eval_f1_macro": 0.8401129643018078, + "eval_loss": 0.24348998069763184, + "eval_pr_auc": 0.7388402605739814, + "eval_precision": 0.7245091830272324, + "eval_precision_macro": 0.8376331499630408, + "eval_pred_class_0": 16510, + "eval_pred_class_1": 3158, + "eval_predicted_binding_ratio": 0.16056538539760015, + "eval_recall": 0.7378265075782006, + "eval_recall_macro": 0.8426562368276709, + "eval_runtime": 0.2496, + "eval_samples_per_second": 652.954, + "eval_steps_per_second": 4.006, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3834 + }, + { + "epoch": 214.0, + "eval_accuracy": 0.9147854382753712, + "eval_auc": 0.943601764673353, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7310654685494223, + "eval_f1_macro": 0.8402185728440683, + "eval_loss": 0.24328412115573883, + "eval_pr_auc": 0.7395557790782865, + "eval_precision": 0.727563078888534, + "eval_precision_macro": 0.838897945080114, + "eval_pred_class_0": 16537, + "eval_pred_class_1": 3131, + "eval_predicted_binding_ratio": 0.15919259711206019, + "eval_recall": 0.7346017413737504, + "eval_recall_macro": 0.841556921872968, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.38, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3852 + }, + { + "epoch": 215.0, + "eval_accuracy": 0.9146837502542201, + "eval_auc": 0.9436924715636332, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7310897435897435, + "eval_f1_macro": 0.8401943762667112, + "eval_loss": 0.2431441992521286, + "eval_pr_auc": 0.7398761361047077, + "eval_precision": 0.7266645428480408, + "eval_precision_macro": 0.838527383046018, + "eval_pred_class_0": 16529, + "eval_pred_class_1": 3139, + "eval_predicted_binding_ratio": 0.15959934919666463, + "eval_recall": 0.7355691712350855, + "eval_recall_macro": 0.8418897344073055, + "eval_runtime": 0.2583, + "eval_samples_per_second": 631.075, + "eval_steps_per_second": 3.872, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3870 + }, + { + "epoch": 216.0, + "eval_accuracy": 0.9147345942647956, + "eval_auc": 0.9437266715649686, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7332591060919358, + "eval_f1_macro": 0.8412581348487456, + "eval_loss": 0.24318096041679382, + "eval_pr_auc": 0.739819563727558, + "eval_precision": 0.7234777150031387, + "eval_precision_macro": 0.8375913025931845, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7433086101257659, + "eval_recall_macro": 0.8450653028295274, + "eval_runtime": 0.2695, + "eval_samples_per_second": 604.716, + "eval_steps_per_second": 3.71, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3888 + }, + { + "epoch": 217.0, + "eval_accuracy": 0.915039658328249, + "eval_auc": 0.9438657295100676, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7336202773792444, + "eval_f1_macro": 0.8415401994826537, + "eval_loss": 0.24295340478420258, + "eval_pr_auc": 0.740473021691125, + "eval_precision": 0.7254098360655737, + "eval_precision_macro": 0.8384566154139702, + "eval_pred_class_0": 16496, + "eval_pred_class_1": 3172, + "eval_predicted_binding_ratio": 0.16127720154565792, + "eval_recall": 0.7420187036439858, + "eval_recall_macro": 0.8447221543812975, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.926, + "eval_steps_per_second": 3.852, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3906 + }, + { + "epoch": 218.0, + "eval_accuracy": 0.9148362822859467, + "eval_auc": 0.9439684365715622, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7333227193122114, + "eval_f1_macro": 0.8413247993777817, + "eval_loss": 0.24282881617546082, + "eval_pr_auc": 0.7409189185674594, + "eval_precision": 0.7242138364779874, + "eval_precision_macro": 0.837907500480624, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7426636568848759, + "eval_recall_macro": 0.8448635481261465, + "eval_runtime": 0.2549, + "eval_samples_per_second": 639.434, + "eval_steps_per_second": 3.923, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3924 + }, + { + "epoch": 219.0, + "eval_accuracy": 0.9149888143176734, + "eval_auc": 0.9440560582596731, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7337579617834394, + "eval_f1_macro": 0.8415885646284089, + "eval_loss": 0.24268390238285065, + "eval_pr_auc": 0.7412204047828347, + "eval_precision": 0.724756212645486, + "eval_precision_macro": 0.8382104794199593, + "eval_pred_class_0": 16489, + "eval_pred_class_1": 3179, + "eval_predicted_binding_ratio": 0.1616331096196868, + "eval_recall": 0.7429861335053208, + "eval_recall_macro": 0.845085147394901, + "eval_runtime": 0.2558, + "eval_samples_per_second": 637.151, + "eval_steps_per_second": 3.909, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3942 + }, + { + "epoch": 220.0, + "eval_accuracy": 0.9152430343705511, + "eval_auc": 0.9441587945186644, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7346808849275823, + "eval_f1_macro": 0.842123366857946, + "eval_loss": 0.24253520369529724, + "eval_pr_auc": 0.7415422610927452, + "eval_precision": 0.7253299811439347, + "eval_precision_macro": 0.8386142808788943, + "eval_pred_class_0": 16486, + "eval_pred_class_1": 3182, + "eval_predicted_binding_ratio": 0.16178564165141346, + "eval_recall": 0.7442760399871009, + "eval_recall_macro": 0.8457602811150571, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.818, + "eval_steps_per_second": 3.888, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3960 + }, + { + "epoch": 221.0, + "eval_accuracy": 0.9153955664022778, + "eval_auc": 0.9442562168332251, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7354531001589825, + "eval_f1_macro": 0.8425495241156832, + "eval_loss": 0.2423904687166214, + "eval_pr_auc": 0.741902799087436, + "eval_precision": 0.7253057384760113, + "eval_precision_macro": 0.8387436514456639, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.745888423089326, + "eval_recall_macro": 0.8465061117076376, + "eval_runtime": 0.2433, + "eval_samples_per_second": 669.926, + "eval_steps_per_second": 4.11, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3978 + }, + { + "epoch": 222.0, + "eval_accuracy": 0.9153955664022778, + "eval_auc": 0.944399226172901, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.734609250398724, + "eval_f1_macro": 0.8421428275824746, + "eval_loss": 0.24210123717784882, + "eval_pr_auc": 0.7426309034006747, + "eval_precision": 0.7267276743452193, + "eval_precision_macro": 0.8391805533372256, + "eval_pred_class_0": 16499, + "eval_pred_class_1": 3169, + "eval_predicted_binding_ratio": 0.16112466951393126, + "eval_recall": 0.7426636568848759, + "eval_recall_macro": 0.8451955333980726, + "eval_runtime": 0.2674, + "eval_samples_per_second": 609.607, + "eval_steps_per_second": 3.74, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3996 + }, + { + "epoch": 222.22222222222223, + "grad_norm": 16301.5107421875, + "learning_rate": 6.802697587657594e-07, + "loss": 0.211, + "step": 4000 + }, + { + "epoch": 223.0, + "eval_accuracy": 0.9156497864551556, + "eval_auc": 0.9445356174132858, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.735026353617633, + "eval_f1_macro": 0.8424338117294514, + "eval_loss": 0.2418563961982727, + "eval_pr_auc": 0.7432491183346004, + "eval_precision": 0.7281645569620253, + "eval_precision_macro": 0.8398516024451512, + "eval_pred_class_0": 16508, + "eval_pred_class_1": 3160, + "eval_predicted_binding_ratio": 0.16066707341875128, + "eval_recall": 0.7420187036439858, + "eval_recall_macro": 0.8450843201324897, + "eval_runtime": 0.2638, + "eval_samples_per_second": 617.886, + "eval_steps_per_second": 3.791, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4014 + }, + { + "epoch": 224.0, + "eval_accuracy": 0.9159040065080334, + "eval_auc": 0.9445885524751136, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.736624203821656, + "eval_f1_macro": 0.8432939508943711, + "eval_loss": 0.24180874228477478, + "eval_pr_auc": 0.7434863322076849, + "eval_precision": 0.7275872916011324, + "eval_precision_macro": 0.8398989281099847, + "eval_pred_class_0": 16489, + "eval_pred_class_1": 3179, + "eval_predicted_binding_ratio": 0.1616331096196868, + "eval_recall": 0.745888423089326, + "eval_recall_macro": 0.8468079165002977, + "eval_runtime": 0.2501, + "eval_samples_per_second": 651.671, + "eval_steps_per_second": 3.998, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4032 + }, + { + "epoch": 225.0, + "eval_accuracy": 0.9159548505186089, + "eval_auc": 0.9446536531606378, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7374106433677522, + "eval_f1_macro": 0.8436909456056703, + "eval_loss": 0.24177636206150055, + "eval_pr_auc": 0.7437995583771988, + "eval_precision": 0.7266750156543519, + "eval_precision_macro": 0.8396638402297497, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7484682360528861, + "eval_recall_macro": 0.8478865596272157, + "eval_runtime": 0.2167, + "eval_samples_per_second": 752.113, + "eval_steps_per_second": 4.614, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4050 + }, + { + "epoch": 226.0, + "eval_accuracy": 0.9161582265609112, + "eval_auc": 0.9447719419529625, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.737629276054097, + "eval_f1_macro": 0.843868342907385, + "eval_loss": 0.24154822528362274, + "eval_pr_auc": 0.7443388353128296, + "eval_precision": 0.7280150753768844, + "eval_precision_macro": 0.8402572343640063, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7475008061915511, + "eval_recall_macro": 0.8476141080514102, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.202, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4068 + }, + { + "epoch": 227.0, + "eval_accuracy": 0.9162599145820622, + "eval_auc": 0.9448749507219245, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7381974248927039, + "eval_f1_macro": 0.8441781495775367, + "eval_loss": 0.2414349913597107, + "eval_pr_auc": 0.7447448668341484, + "eval_precision": 0.7278996865203762, + "eval_precision_macro": 0.840312265884293, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7487907126733312, + "eval_recall_macro": 0.8481987003337683, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.465, + "eval_steps_per_second": 3.88, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4086 + }, + { + "epoch": 228.0, + "eval_accuracy": 0.9163107585926378, + "eval_auc": 0.9449481196490842, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.738314785373609, + "eval_f1_macro": 0.8442527143596241, + "eval_loss": 0.2413274347782135, + "eval_pr_auc": 0.7451543649133733, + "eval_precision": 0.7281279397930386, + "eval_precision_macro": 0.840427826926679, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7487907126733312, + "eval_recall_macro": 0.8482288808130343, + "eval_runtime": 0.2185, + "eval_samples_per_second": 746.149, + "eval_steps_per_second": 4.578, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4104 + }, + { + "epoch": 229.0, + "eval_accuracy": 0.9162599145820622, + "eval_auc": 0.9450982726429946, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7376135096383623, + "eval_f1_macro": 0.843896745442007, + "eval_loss": 0.2410273402929306, + "eval_pr_auc": 0.7459954701052622, + "eval_precision": 0.728904282115869, + "eval_precision_macro": 0.8406224054285385, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7465333763302161, + "eval_recall_macro": 0.8472812955170728, + "eval_runtime": 0.2597, + "eval_samples_per_second": 627.64, + "eval_steps_per_second": 3.851, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4122 + }, + { + "epoch": 230.0, + "eval_accuracy": 0.9167683546878178, + "eval_auc": 0.9451833444163787, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7392066273697626, + "eval_f1_macro": 0.8448445490519523, + "eval_loss": 0.2408701479434967, + "eval_pr_auc": 0.74641099889946, + "eval_precision": 0.7304785894206549, + "eval_precision_macro": 0.8415611477299734, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7481457594324411, + "eval_recall_macro": 0.8482383894645154, + "eval_runtime": 0.2678, + "eval_samples_per_second": 608.615, + "eval_steps_per_second": 3.734, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4140 + }, + { + "epoch": 231.0, + "eval_accuracy": 0.9167175106772423, + "eval_auc": 0.9452793360535927, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7393380012730745, + "eval_f1_macro": 0.8448898647294817, + "eval_loss": 0.24077175557613373, + "eval_pr_auc": 0.7467905435777061, + "eval_precision": 0.7298146402764687, + "eval_precision_macro": 0.8413101105537636, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7491131892937762, + "eval_recall_macro": 0.8486013824781189, + "eval_runtime": 0.2588, + "eval_samples_per_second": 629.884, + "eval_steps_per_second": 3.864, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4158 + }, + { + "epoch": 232.0, + "eval_accuracy": 0.9163616026032133, + "eval_auc": 0.9453335850027798, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7391786903440621, + "eval_f1_macro": 0.8446869866386211, + "eval_loss": 0.24075280129909515, + "eval_pr_auc": 0.7469096614042753, + "eval_precision": 0.7270742358078602, + "eval_precision_macro": 0.840149923152381, + "eval_pred_class_0": 16462, + "eval_pred_class_1": 3206, + "eval_predicted_binding_ratio": 0.16300589790522677, + "eval_recall": 0.7516930022573364, + "eval_recall_macro": 0.8494385817709088, + "eval_runtime": 0.2648, + "eval_samples_per_second": 615.647, + "eval_steps_per_second": 3.777, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4176 + }, + { + "epoch": 233.0, + "eval_accuracy": 0.9168191986983933, + "eval_auc": 0.9454244865430391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7405645417063115, + "eval_f1_macro": 0.845516906033295, + "eval_loss": 0.24061860144138336, + "eval_pr_auc": 0.7472467341999135, + "eval_precision": 0.7285491419656787, + "eval_precision_macro": 0.8410102813636934, + "eval_pred_class_0": 16463, + "eval_pred_class_1": 3205, + "eval_predicted_binding_ratio": 0.1629550538946512, + "eval_recall": 0.7529829087391164, + "eval_recall_macro": 0.8502344374081289, + "eval_runtime": 0.2576, + "eval_samples_per_second": 632.77, + "eval_steps_per_second": 3.882, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4194 + }, + { + "epoch": 234.0, + "eval_accuracy": 0.9171242627618467, + "eval_auc": 0.9455514956544295, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7409408773045136, + "eval_f1_macro": 0.8458063544009555, + "eval_loss": 0.24034352600574493, + "eval_pr_auc": 0.7478642707879094, + "eval_precision": 0.7304920087746788, + "eval_precision_macro": 0.841880100399963, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7516930022573364, + "eval_recall_macro": 0.849891288959899, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.626, + "eval_steps_per_second": 3.924, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4212 + }, + { + "epoch": 235.0, + "eval_accuracy": 0.9170734187512711, + "eval_auc": 0.945608099868364, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7413984461709212, + "eval_f1_macro": 0.8460087995182923, + "eval_loss": 0.24029456079006195, + "eval_pr_auc": 0.7479864003180418, + "eval_precision": 0.7292576419213974, + "eval_precision_macro": 0.8414542370705274, + "eval_pred_class_0": 16462, + "eval_pred_class_1": 3206, + "eval_predicted_binding_ratio": 0.16300589790522677, + "eval_recall": 0.7539503386004515, + "eval_recall_macro": 0.8507785132973285, + "eval_runtime": 0.2166, + "eval_samples_per_second": 752.582, + "eval_steps_per_second": 4.617, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4230 + }, + { + "epoch": 236.0, + "eval_accuracy": 0.9172767947935733, + "eval_auc": 0.9457173958316524, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7416229950770208, + "eval_f1_macro": 0.8461890816058248, + "eval_loss": 0.2400863915681839, + "eval_pr_auc": 0.7485735786505677, + "eval_precision": 0.7306007509386734, + "eval_precision_macro": 0.8420487970332027, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7529829087391164, + "eval_recall_macro": 0.850506061721523, + "eval_runtime": 0.2654, + "eval_samples_per_second": 614.11, + "eval_steps_per_second": 3.768, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4248 + }, + { + "epoch": 237.0, + "eval_accuracy": 0.9173784828147244, + "eval_auc": 0.9457852508143816, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7421044278685923, + "eval_f1_macro": 0.8464570875531852, + "eval_loss": 0.24001120030879974, + "eval_pr_auc": 0.7487297504117033, + "eval_precision": 0.730625, + "eval_precision_macro": 0.8421463596065095, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7539503386004515, + "eval_recall_macro": 0.8509595961729245, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.497, + "eval_steps_per_second": 3.825, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4266 + }, + { + "epoch": 238.0, + "eval_accuracy": 0.9176835468781778, + "eval_auc": 0.9458918216779619, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7428934413212641, + "eval_f1_macro": 0.8469453737675663, + "eval_loss": 0.23980914056301117, + "eval_pr_auc": 0.7491659673680734, + "eval_precision": 0.7318523153942428, + "eval_precision_macro": 0.8427959974251447, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7542728152208965, + "eval_recall_macro": 0.8512717368794771, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.674, + "eval_steps_per_second": 3.869, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4284 + }, + { + "epoch": 239.0, + "eval_accuracy": 0.9175818588570266, + "eval_auc": 0.9459814190633611, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7427392477384542, + "eval_f1_macro": 0.8468350393376697, + "eval_loss": 0.2397017627954483, + "eval_pr_auc": 0.7495363660441035, + "eval_precision": 0.73125, + "eval_precision_macro": 0.8425195834345397, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7545952918413416, + "eval_recall_macro": 0.8513424337519016, + "eval_runtime": 0.2693, + "eval_samples_per_second": 605.187, + "eval_steps_per_second": 3.713, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4302 + }, + { + "epoch": 240.0, + "eval_accuracy": 0.9177852348993288, + "eval_auc": 0.9460787245879343, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7425569176882661, + "eval_f1_macro": 0.846819224235148, + "eval_loss": 0.2394852489233017, + "eval_pr_auc": 0.7500368484248636, + "eval_precision": 0.7333333333333333, + "eval_precision_macro": 0.8433466763706938, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7520154788777813, + "eval_recall_macro": 0.8504146930213136, + "eval_runtime": 0.2663, + "eval_samples_per_second": 612.01, + "eval_steps_per_second": 3.755, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4320 + }, + { + "epoch": 241.0, + "eval_accuracy": 0.91788692292048, + "eval_auc": 0.9461055278900622, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7437728066000318, + "eval_f1_macro": 0.8474411515820368, + "eval_loss": 0.23946216702461243, + "eval_pr_auc": 0.7500126507936957, + "eval_precision": 0.7320424734540912, + "eval_precision_macro": 0.8430344761294506, + "eval_pred_class_0": 16466, + "eval_pred_class_1": 3202, + "eval_predicted_binding_ratio": 0.16280252186292454, + "eval_recall": 0.7558851983231216, + "eval_recall_macro": 0.8520477479513235, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.523, + "eval_steps_per_second": 3.881, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4338 + }, + { + "epoch": 242.0, + "eval_accuracy": 0.9178360789099044, + "eval_auc": 0.9462651603379567, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7429207763283487, + "eval_f1_macro": 0.8470125818101653, + "eval_loss": 0.23918889462947845, + "eval_pr_auc": 0.7507506323790389, + "eval_precision": 0.7331240188383046, + "eval_precision_macro": 0.8433259480225619, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7529829087391164, + "eval_recall_macro": 0.8508380469934491, + "eval_runtime": 0.2685, + "eval_samples_per_second": 607.073, + "eval_steps_per_second": 3.724, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4356 + }, + { + "epoch": 243.0, + "eval_accuracy": 0.9181411429733577, + "eval_auc": 0.9463650352422546, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7440381558028617, + "eval_f1_macro": 0.847659094847506, + "eval_loss": 0.2390899360179901, + "eval_pr_auc": 0.7511682669955798, + "eval_precision": 0.7337723424270931, + "eval_precision_macro": 0.8437961778887089, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7545952918413416, + "eval_recall_macro": 0.8516744190238277, + "eval_runtime": 0.2667, + "eval_samples_per_second": 611.141, + "eval_steps_per_second": 3.749, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4374 + }, + { + "epoch": 244.0, + "eval_accuracy": 0.918446207036811, + "eval_auc": 0.9465344196541042, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7436061381074168, + "eval_f1_macro": 0.8475588127054617, + "eval_loss": 0.2388136237859726, + "eval_pr_auc": 0.7520628478642977, + "eval_precision": 0.7372424722662441, + "eval_precision_macro": 0.8451548762954184, + "eval_pred_class_0": 16513, + "eval_pred_class_1": 3155, + "eval_predicted_binding_ratio": 0.1604128533658735, + "eval_recall": 0.7500806191551113, + "eval_recall_macro": 0.8500206922660327, + "eval_runtime": 0.2635, + "eval_samples_per_second": 618.538, + "eval_steps_per_second": 3.795, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4392 + }, + { + "epoch": 245.0, + "eval_accuracy": 0.9182936750050844, + "eval_auc": 0.946561008841255, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7443931923015746, + "eval_f1_macro": 0.8478842115097998, + "eval_loss": 0.2388090342283249, + "eval_pr_auc": 0.7520269916576209, + "eval_precision": 0.7344632768361582, + "eval_precision_macro": 0.844145847858681, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7545952918413416, + "eval_recall_macro": 0.8517649604616258, + "eval_runtime": 0.2356, + "eval_samples_per_second": 691.824, + "eval_steps_per_second": 4.244, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4410 + }, + { + "epoch": 246.0, + "eval_accuracy": 0.9185478950579622, + "eval_auc": 0.9466103526109676, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7456335344553826, + "eval_f1_macro": 0.8485719582198821, + "eval_loss": 0.23877908289432526, + "eval_pr_auc": 0.7522268586665612, + "eval_precision": 0.7344385361276197, + "eval_precision_macro": 0.844360910951309, + "eval_pred_class_0": 16471, + "eval_pred_class_1": 3197, + "eval_predicted_binding_ratio": 0.16254830181004679, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.8529643255056077, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.409, + "eval_steps_per_second": 3.868, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4428 + }, + { + "epoch": 247.0, + "eval_accuracy": 0.9181919869839333, + "eval_auc": 0.9466863828928207, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.74464370734804, + "eval_f1_macro": 0.847968894691123, + "eval_loss": 0.23864901065826416, + "eval_pr_auc": 0.7525479720158522, + "eval_precision": 0.733125, + "eval_precision_macro": 0.84363925491863, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7565301515640116, + "eval_recall_macro": 0.8524909464888326, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.776, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4446 + }, + { + "epoch": 248.0, + "eval_accuracy": 0.9184970510473867, + "eval_auc": 0.9467725739035849, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7454343338097507, + "eval_f1_macro": 0.8484579580910492, + "eval_loss": 0.23847386240959167, + "eval_pr_auc": 0.752960490937812, + "eval_precision": 0.7343554443053817, + "eval_precision_macro": 0.8442903982090288, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7568526281844566, + "eval_recall_macro": 0.8528030871953852, + "eval_runtime": 0.2626, + "eval_samples_per_second": 620.673, + "eval_steps_per_second": 3.808, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4464 + }, + { + "epoch": 249.0, + "eval_accuracy": 0.9183953630262355, + "eval_auc": 0.9468334214870647, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7452785272179019, + "eval_f1_macro": 0.8483468464756075, + "eval_loss": 0.238382488489151, + "eval_pr_auc": 0.7531576423642267, + "eval_precision": 0.73375, + "eval_precision_macro": 0.8440124787466602, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.8528737840678097, + "eval_runtime": 0.2634, + "eval_samples_per_second": 618.912, + "eval_steps_per_second": 3.797, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4482 + }, + { + "epoch": 250.0, + "grad_norm": 32703.09375, + "learning_rate": 5.870150616070439e-07, + "loss": 0.2045, + "step": 4500 + }, + { + "epoch": 250.0, + "eval_accuracy": 0.9189546471425666, + "eval_auc": 0.9469757203543168, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7464206172446707, + "eval_f1_macro": 0.849095331315225, + "eval_loss": 0.23810486495494843, + "eval_pr_auc": 0.7538503066163141, + "eval_precision": 0.7365777080062794, + "eval_precision_macro": 0.8453864697284325, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7565301515640116, + "eval_recall_macro": 0.8529436536778228, + "eval_runtime": 0.2683, + "eval_samples_per_second": 607.492, + "eval_steps_per_second": 3.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4500 + }, + { + "epoch": 251.0, + "eval_accuracy": 0.9192088671954444, + "eval_auc": 0.9470911964544428, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7465305471367044, + "eval_f1_macro": 0.8492382980338313, + "eval_loss": 0.23794293403625488, + "eval_pr_auc": 0.7543678751573928, + "eval_precision": 0.7386363636363636, + "eval_precision_macro": 0.8462575757575758, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7545952918413416, + "eval_recall_macro": 0.8523082090884139, + "eval_runtime": 0.2575, + "eval_samples_per_second": 632.943, + "eval_steps_per_second": 3.883, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4518 + }, + { + "epoch": 252.0, + "eval_accuracy": 0.9186495830791133, + "eval_auc": 0.9471146615094285, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7459510955859003, + "eval_f1_macro": 0.8487610069611806, + "eval_loss": 0.2380078136920929, + "eval_pr_auc": 0.7543445385135205, + "eval_precision": 0.7347513293712856, + "eval_precision_macro": 0.8445476639570896, + "eval_pred_class_0": 16471, + "eval_pred_class_1": 3197, + "eval_predicted_binding_ratio": 0.16254830181004679, + "eval_recall": 0.7574975814253466, + "eval_recall_macro": 0.8531557442950962, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.051, + "eval_steps_per_second": 3.816, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4536 + }, + { + "epoch": 253.0, + "eval_accuracy": 0.9187004270896888, + "eval_auc": 0.9472153636761378, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7461501825686617, + "eval_f1_macro": 0.8488749520465066, + "eval_loss": 0.23785638809204102, + "eval_pr_auc": 0.7549126832149435, + "eval_precision": 0.7348342714196373, + "eval_precision_macro": 0.8446181071730852, + "eval_pred_class_0": 16470, + "eval_pred_class_1": 3198, + "eval_predicted_binding_ratio": 0.16259914582062232, + "eval_recall": 0.7578200580457917, + "eval_recall_macro": 0.8533169826053187, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.46, + "eval_steps_per_second": 3.825, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4554 + }, + { + "epoch": 254.0, + "eval_accuracy": 0.918903803131991, + "eval_auc": 0.9473291852514412, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7465437788018433, + "eval_f1_macro": 0.849136671654349, + "eval_loss": 0.23767386376857758, + "eval_pr_auc": 0.7554825230801616, + "eval_precision": 0.7359022556390977, + "eval_precision_macro": 0.8451300547435596, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7574975814253466, + "eval_recall_macro": 0.8533066466914263, + "eval_runtime": 0.2597, + "eval_samples_per_second": 627.53, + "eval_steps_per_second": 3.85, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4572 + }, + { + "epoch": 255.0, + "eval_accuracy": 0.9196156192800488, + "eval_auc": 0.9474168847995438, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7481280866656046, + "eval_f1_macro": 0.8501522492676461, + "eval_loss": 0.23749451339244843, + "eval_pr_auc": 0.7559063738482461, + "eval_precision": 0.739294710327456, + "eval_precision_macro": 0.8468181046180088, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.8535981155701939, + "eval_runtime": 0.267, + "eval_samples_per_second": 610.581, + "eval_steps_per_second": 3.746, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4590 + }, + { + "epoch": 256.0, + "eval_accuracy": 0.9195647752694732, + "eval_auc": 0.9474761751831905, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7480089200382287, + "eval_f1_macro": 0.8500768176935048, + "eval_loss": 0.23738548159599304, + "eval_pr_auc": 0.7561572732622138, + "eval_precision": 0.7390620081838212, + "eval_precision_macro": 0.8467003692001515, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.853567935090928, + "eval_runtime": 0.2595, + "eval_samples_per_second": 628.111, + "eval_steps_per_second": 3.853, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4608 + }, + { + "epoch": 257.0, + "eval_accuracy": 0.919818995322351, + "eval_auc": 0.9475989409250355, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7486051331101546, + "eval_f1_macro": 0.8504541559450298, + "eval_loss": 0.23718814551830292, + "eval_pr_auc": 0.7567216824275462, + "eval_precision": 0.7402269861286255, + "eval_precision_macro": 0.8472897782243516, + "eval_pred_class_0": 16496, + "eval_pred_class_1": 3172, + "eval_predicted_binding_ratio": 0.16127720154565792, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.853718837487258, + "eval_runtime": 0.2144, + "eval_samples_per_second": 760.368, + "eval_steps_per_second": 4.665, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4626 + }, + { + "epoch": 258.0, + "eval_accuracy": 0.9193613992271711, + "eval_auc": 0.9475927315907009, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.748013981569749, + "eval_f1_macro": 0.8500072329009691, + "eval_loss": 0.23723167181015015, + "eval_pr_auc": 0.7565508642499409, + "eval_precision": 0.7372377074851237, + "eval_precision_macro": 0.845948140540741, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7591099645275717, + "eval_recall_macro": 0.8542335601596028, + "eval_runtime": 0.2406, + "eval_samples_per_second": 677.35, + "eval_steps_per_second": 4.156, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4644 + }, + { + "epoch": 259.0, + "eval_accuracy": 0.9197681513117755, + "eval_auc": 0.9476849762158163, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7486460656259956, + "eval_f1_macro": 0.8504558902151395, + "eval_loss": 0.23700466752052307, + "eval_pr_auc": 0.7570817989267699, + "eval_precision": 0.7396915328926661, + "eval_precision_macro": 0.8470757706910725, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7578200580457917, + "eval_recall_macro": 0.8539507726699049, + "eval_runtime": 0.2559, + "eval_samples_per_second": 636.906, + "eval_steps_per_second": 3.907, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4662 + }, + { + "epoch": 260.0, + "eval_accuracy": 0.9199715273540777, + "eval_auc": 0.9477206555569931, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7495225970719287, + "eval_f1_macro": 0.8509503339952407, + "eval_loss": 0.23693729937076569, + "eval_pr_auc": 0.7572519889982183, + "eval_precision": 0.7398680490103676, + "eval_precision_macro": 0.8473073942352414, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7594324411480168, + "eval_recall_macro": 0.8547267837417515, + "eval_runtime": 0.2431, + "eval_samples_per_second": 670.398, + "eval_steps_per_second": 4.113, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4680 + }, + { + "epoch": 261.0, + "eval_accuracy": 0.919818995322351, + "eval_auc": 0.9477750407611654, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7492447129909365, + "eval_f1_macro": 0.8507623994645729, + "eval_loss": 0.23685960471630096, + "eval_pr_auc": 0.7574807332819814, + "eval_precision": 0.739021329987453, + "eval_precision_macro": 0.8469075096539207, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7597549177684618, + "eval_recall_macro": 0.85476730013491, + "eval_runtime": 0.2608, + "eval_samples_per_second": 625.06, + "eval_steps_per_second": 3.835, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4698 + }, + { + "epoch": 262.0, + "eval_accuracy": 0.9200223713646533, + "eval_auc": 0.947870010485989, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7495621716287215, + "eval_f1_macro": 0.8509874086097018, + "eval_loss": 0.23665639758110046, + "eval_pr_auc": 0.7579557575566394, + "eval_precision": 0.740251572327044, + "eval_precision_macro": 0.8474729477355745, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7591099645275717, + "eval_recall_macro": 0.854625906390061, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.828, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4716 + }, + { + "epoch": 263.0, + "eval_accuracy": 0.9201749033963799, + "eval_auc": 0.9479619047411422, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7501591343093571, + "eval_f1_macro": 0.8513291133243506, + "eval_loss": 0.23653987050056458, + "eval_pr_auc": 0.7584013256784117, + "eval_precision": 0.7404963870562362, + "eval_precision_macro": 0.8476822244653337, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7600773943889068, + "eval_recall_macro": 0.8551096213207285, + "eval_runtime": 0.2539, + "eval_samples_per_second": 641.941, + "eval_steps_per_second": 3.938, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4734 + }, + { + "epoch": 264.0, + "eval_accuracy": 0.9203274354281066, + "eval_auc": 0.9480515799865329, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7501195981502152, + "eval_f1_macro": 0.8513640482812168, + "eval_loss": 0.2363332211971283, + "eval_pr_auc": 0.7588834286830018, + "eval_precision": 0.7419558359621451, + "eval_precision_macro": 0.848278196802748, + "eval_pred_class_0": 16498, + "eval_pred_class_1": 3170, + "eval_predicted_binding_ratio": 0.16117551352450682, + "eval_recall": 0.7584650112866818, + "eval_recall_macro": 0.8545448736037441, + "eval_runtime": 0.253, + "eval_samples_per_second": 644.351, + "eval_steps_per_second": 3.953, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4752 + }, + { + "epoch": 265.0, + "eval_accuracy": 0.9203782794386821, + "eval_auc": 0.9481172938194913, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.750557502389296, + "eval_f1_macro": 0.8515931077800434, + "eval_loss": 0.23625436425209045, + "eval_pr_auc": 0.7591087523185005, + "eval_precision": 0.7415801070192005, + "eval_precision_macro": 0.8482019751638359, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7597549177684618, + "eval_recall_macro": 0.8550992854068361, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.903, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4770 + }, + { + "epoch": 266.0, + "eval_accuracy": 0.9200223713646533, + "eval_auc": 0.9482093632596256, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7495621716287215, + "eval_f1_macro": 0.8509874086097018, + "eval_loss": 0.23614051938056946, + "eval_pr_auc": 0.7594934170637511, + "eval_precision": 0.740251572327044, + "eval_precision_macro": 0.8474729477355745, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7591099645275717, + "eval_recall_macro": 0.854625906390061, + "eval_runtime": 0.2555, + "eval_samples_per_second": 638.024, + "eval_steps_per_second": 3.914, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4788 + }, + { + "epoch": 267.0, + "eval_accuracy": 0.9204291234492576, + "eval_auc": 0.9483264257570818, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7504385265507894, + "eval_f1_macro": 0.8515537559413557, + "eval_loss": 0.23595084249973297, + "eval_pr_auc": 0.7600448623712702, + "eval_precision": 0.7422712933753943, + "eval_precision_macro": 0.8484662322132155, + "eval_pred_class_0": 16498, + "eval_pred_class_1": 3170, + "eval_predicted_binding_ratio": 0.16117551352450682, + "eval_recall": 0.7587874879071267, + "eval_recall_macro": 0.8547362923932326, + "eval_runtime": 0.2589, + "eval_samples_per_second": 629.531, + "eval_steps_per_second": 3.862, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4806 + }, + { + "epoch": 268.0, + "eval_accuracy": 0.9203782794386821, + "eval_auc": 0.9483670102777331, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7509541984732825, + "eval_f1_macro": 0.8517842887791249, + "eval_loss": 0.23594258725643158, + "eval_pr_auc": 0.7601171664174119, + "eval_precision": 0.740822089739567, + "eval_precision_macro": 0.8479609508220922, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7613673008706868, + "eval_recall_macro": 0.8557545745616185, + "eval_runtime": 0.2463, + "eval_samples_per_second": 661.883, + "eval_steps_per_second": 4.061, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4824 + }, + { + "epoch": 269.0, + "eval_accuracy": 0.920276591417531, + "eval_auc": 0.9484603741402287, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7505567928730512, + "eval_f1_macro": 0.8515567625484772, + "eval_loss": 0.23575998842716217, + "eval_pr_auc": 0.7605631058573062, + "eval_precision": 0.7406593406593407, + "eval_precision_macro": 0.8478216317444613, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7607223476297968, + "eval_recall_macro": 0.8554320979411736, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.928, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4842 + }, + { + "epoch": 270.0, + "eval_accuracy": 0.9206833435021354, + "eval_auc": 0.9485513243429828, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.751434034416826, + "eval_f1_macro": 0.8521235507837306, + "eval_loss": 0.235606849193573, + "eval_pr_auc": 0.7610077759721279, + "eval_precision": 0.7426771653543307, + "eval_precision_macro": 0.8488138752255192, + "eval_pred_class_0": 16493, + "eval_pred_class_1": 3175, + "eval_predicted_binding_ratio": 0.16142973357738458, + "eval_recall": 0.7603998710093518, + "eval_recall_macro": 0.8555424839443451, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.303, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4860 + }, + { + "epoch": 271.0, + "eval_accuracy": 0.9209884075655888, + "eval_auc": 0.948647082400222, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7519948930737312, + "eval_f1_macro": 0.8525018311755109, + "eval_loss": 0.23542079329490662, + "eval_pr_auc": 0.7614859215167992, + "eval_precision": 0.744391785150079, + "eval_precision_macro": 0.8496242389363071, + "eval_pred_class_0": 16503, + "eval_pred_class_1": 3165, + "eval_predicted_binding_ratio": 0.16092129347162903, + "eval_recall": 0.7597549177684618, + "eval_recall_macro": 0.8554614511580283, + "eval_runtime": 0.2456, + "eval_samples_per_second": 663.61, + "eval_steps_per_second": 4.071, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4878 + }, + { + "epoch": 272.0, + "eval_accuracy": 0.920734187512711, + "eval_auc": 0.9487102755159504, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7517120560598821, + "eval_f1_macro": 0.8522755458325244, + "eval_loss": 0.2353215366601944, + "eval_pr_auc": 0.761752604262035, + "eval_precision": 0.7426054122089364, + "eval_precision_macro": 0.8488345435817272, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7610448242502419, + "eval_recall_macro": 0.8558347800855242, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.279, + "eval_steps_per_second": 3.965, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4896 + }, + { + "epoch": 273.0, + "eval_accuracy": 0.920734187512711, + "eval_auc": 0.9487825490532058, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7517911160643209, + "eval_f1_macro": 0.8523136490925144, + "eval_loss": 0.23522616922855377, + "eval_pr_auc": 0.7620046741511783, + "eval_precision": 0.7424528301886792, + "eval_precision_macro": 0.8487858522607636, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7613673008706868, + "eval_recall_macro": 0.8559658379164806, + "eval_runtime": 0.2581, + "eval_samples_per_second": 631.657, + "eval_steps_per_second": 3.875, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4914 + }, + { + "epoch": 274.0, + "eval_accuracy": 0.9208358755338621, + "eval_auc": 0.9488776355680169, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7518725099601593, + "eval_f1_macro": 0.8523888728682258, + "eval_loss": 0.2350645512342453, + "eval_pr_auc": 0.7623918274747735, + "eval_precision": 0.7432262129804663, + "eval_precision_macro": 0.8491200787225601, + "eval_pred_class_0": 16494, + "eval_pred_class_1": 3174, + "eval_predicted_binding_ratio": 0.16137888956680904, + "eval_recall": 0.7607223476297968, + "eval_recall_macro": 0.8557640832130997, + "eval_runtime": 0.2427, + "eval_samples_per_second": 671.591, + "eval_steps_per_second": 4.12, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4932 + }, + { + "epoch": 275.0, + "eval_accuracy": 0.9210900955867399, + "eval_auc": 0.9489531597599242, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7525510204081632, + "eval_f1_macro": 0.8528058755561261, + "eval_loss": 0.2349635362625122, + "eval_pr_auc": 0.7627186958629152, + "eval_precision": 0.7442447177546515, + "eval_precision_macro": 0.8496637300357182, + "eval_pred_class_0": 16497, + "eval_pred_class_1": 3171, + "eval_predicted_binding_ratio": 0.16122635753508235, + "eval_recall": 0.7610448242502419, + "eval_recall_macro": 0.8560460434403863, + "eval_runtime": 0.2606, + "eval_samples_per_second": 625.549, + "eval_steps_per_second": 3.838, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4950 + }, + { + "epoch": 276.0, + "eval_accuracy": 0.9207850315232866, + "eval_auc": 0.9489723619803666, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7524626628535113, + "eval_f1_macro": 0.8526552767085183, + "eval_loss": 0.2350020557641983, + "eval_pr_auc": 0.7627735453579831, + "eval_precision": 0.7416222987785781, + "eval_precision_macro": 0.8485653223786669, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.763624637213802, + "eval_recall_macro": 0.8569134232124421, + "eval_runtime": 0.2544, + "eval_samples_per_second": 640.627, + "eval_steps_per_second": 3.93, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4968 + }, + { + "epoch": 277.0, + "eval_accuracy": 0.9209884075655888, + "eval_auc": 0.9490740568619694, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7524689391525964, + "eval_f1_macro": 0.8527303253449472, + "eval_loss": 0.2348015159368515, + "eval_pr_auc": 0.7632371897507115, + "eval_precision": 0.7434686811457349, + "eval_precision_macro": 0.8493281796365992, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7616897774911319, + "eval_recall_macro": 0.8562477981437672, + "eval_runtime": 0.2476, + "eval_samples_per_second": 658.287, + "eval_steps_per_second": 4.039, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4986 + }, + { + "epoch": 277.77777777777777, + "grad_norm": 14799.8212890625, + "learning_rate": 4.904982238472025e-07, + "loss": 0.199, + "step": 5000 + }, + { + "epoch": 278.0, + "eval_accuracy": 0.9207850315232866, + "eval_auc": 0.9491519363186243, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7514358647096363, + "eval_f1_macro": 0.8521604145127957, + "eval_loss": 0.23470228910446167, + "eval_pr_auc": 0.7636185597213633, + "eval_precision": 0.7436059362172402, + "eval_precision_macro": 0.8491982774838095, + "eval_pred_class_0": 16501, + "eval_pred_class_1": 3167, + "eval_predicted_binding_ratio": 0.16102298149278016, + "eval_recall": 0.7594324411480168, + "eval_recall_macro": 0.8552096714100077, + "eval_runtime": 0.2561, + "eval_samples_per_second": 636.482, + "eval_steps_per_second": 3.905, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5004 + }, + { + "epoch": 279.0, + "eval_accuracy": 0.9209375635550132, + "eval_auc": 0.9492209202712323, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7521912350597609, + "eval_f1_macro": 0.8525784825369885, + "eval_loss": 0.2345963418483734, + "eval_pr_auc": 0.7638862490185815, + "eval_precision": 0.7435412728418399, + "eval_precision_macro": 0.8493079227068421, + "eval_pred_class_0": 16494, + "eval_pred_class_1": 3174, + "eval_predicted_binding_ratio": 0.16137888956680904, + "eval_recall": 0.7610448242502419, + "eval_recall_macro": 0.8559555020025882, + "eval_runtime": 0.273, + "eval_samples_per_second": 597.168, + "eval_steps_per_second": 3.664, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5022 + }, + { + "epoch": 280.0, + "eval_accuracy": 0.9210900955867399, + "eval_auc": 0.9492453877736104, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7532591414944356, + "eval_f1_macro": 0.8531471523002045, + "eval_loss": 0.23453067243099213, + "eval_pr_auc": 0.7639137465976396, + "eval_precision": 0.7428661022264033, + "eval_precision_macro": 0.8492229655497572, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7639471138342471, + "eval_recall_macro": 0.8572255639189947, + "eval_runtime": 0.2247, + "eval_samples_per_second": 725.266, + "eval_steps_per_second": 4.449, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5040 + }, + { + "epoch": 281.0, + "eval_accuracy": 0.9210900955867399, + "eval_auc": 0.9493002596027307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7531806615776081, + "eval_f1_macro": 0.8531093334515976, + "eval_loss": 0.2344331294298172, + "eval_pr_auc": 0.7641447446055818, + "eval_precision": 0.7430185127078758, + "eval_precision_macro": 0.8492715280607518, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.763624637213802, + "eval_recall_macro": 0.8570945060880382, + "eval_runtime": 0.263, + "eval_samples_per_second": 619.693, + "eval_steps_per_second": 3.802, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5058 + }, + { + "epoch": 282.0, + "eval_accuracy": 0.9212426276184665, + "eval_auc": 0.9493485327975579, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7538534880025425, + "eval_f1_macro": 0.8534875889608693, + "eval_loss": 0.2344052791595459, + "eval_pr_auc": 0.764296468123137, + "eval_precision": 0.7431077694235589, + "eval_precision_macro": 0.849430796583593, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8577092788496623, + "eval_runtime": 0.2484, + "eval_samples_per_second": 656.097, + "eval_steps_per_second": 4.025, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5076 + }, + { + "epoch": 283.0, + "eval_accuracy": 0.9212426276184665, + "eval_auc": 0.9494190544850012, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7536969311496263, + "eval_f1_macro": 0.8534121476034391, + "eval_loss": 0.23425185680389404, + "eval_pr_auc": 0.7646652388104445, + "eval_precision": 0.7434127979924717, + "eval_precision_macro": 0.8495280009379834, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.764269590454692, + "eval_recall_macro": 0.8574471631877492, + "eval_runtime": 0.2547, + "eval_samples_per_second": 639.881, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5094 + }, + { + "epoch": 284.0, + "eval_accuracy": 0.9213951596501933, + "eval_auc": 0.9495173137944721, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7539000318369946, + "eval_f1_macro": 0.853564041452472, + "eval_loss": 0.2340681403875351, + "eval_pr_auc": 0.7651886756989377, + "eval_precision": 0.744419993712669, + "eval_precision_macro": 0.8499803613859639, + "eval_pred_class_0": 16487, + "eval_pred_class_1": 3181, + "eval_predicted_binding_ratio": 0.1617347976408379, + "eval_recall": 0.763624637213802, + "eval_recall_macro": 0.8572755889636343, + "eval_runtime": 0.2477, + "eval_samples_per_second": 658.168, + "eval_steps_per_second": 4.038, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5112 + }, + { + "epoch": 285.0, + "eval_accuracy": 0.9213951596501933, + "eval_auc": 0.9495497619459952, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7540566337893733, + "eval_f1_macro": 0.8536395120535369, + "eval_loss": 0.234034925699234, + "eval_pr_auc": 0.7653794962608654, + "eval_precision": 0.7441130298273155, + "eval_precision_macro": 0.8498821534503319, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.764269590454692, + "eval_recall_macro": 0.8575377046255472, + "eval_runtime": 0.2024, + "eval_samples_per_second": 805.487, + "eval_steps_per_second": 4.942, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5130 + }, + { + "epoch": 286.0, + "eval_accuracy": 0.9215476916819199, + "eval_auc": 0.9496228140831674, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.754181934044926, + "eval_f1_macro": 0.8537539029854382, + "eval_loss": 0.23388919234275818, + "eval_pr_auc": 0.7657504791267543, + "eval_precision": 0.7452770780856424, + "eval_precision_macro": 0.8503853253634615, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.763302160593357, + "eval_recall_macro": 0.8572350725704758, + "eval_runtime": 0.1796, + "eval_samples_per_second": 907.381, + "eval_steps_per_second": 5.567, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5148 + }, + { + "epoch": 287.0, + "eval_accuracy": 0.9219544437665244, + "eval_auc": 0.9497348546111616, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7550662198819211, + "eval_f1_macro": 0.8543240621923138, + "eval_loss": 0.2337103933095932, + "eval_pr_auc": 0.7663000808208629, + "eval_precision": 0.7473152242577384, + "eval_precision_macro": 0.8513875842534602, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7629796839729119, + "eval_recall_macro": 0.8573454585736473, + "eval_runtime": 0.2659, + "eval_samples_per_second": 613.107, + "eval_steps_per_second": 3.761, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5166 + }, + { + "epoch": 288.0, + "eval_accuracy": 0.9218527557453732, + "eval_auc": 0.9498016974139991, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7548253309937789, + "eval_f1_macro": 0.8541719723587154, + "eval_loss": 0.23359474539756775, + "eval_pr_auc": 0.7666273574525592, + "eval_precision": 0.7468434343434344, + "eval_precision_macro": 0.8511489898989899, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7629796839729119, + "eval_recall_macro": 0.8572850976151154, + "eval_runtime": 0.2686, + "eval_samples_per_second": 606.819, + "eval_steps_per_second": 3.723, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5184 + }, + { + "epoch": 289.0, + "eval_accuracy": 0.9216493797030709, + "eval_auc": 0.9498063690134986, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7546569017672345, + "eval_f1_macro": 0.8540188154275592, + "eval_loss": 0.23359988629817963, + "eval_pr_auc": 0.7666725529217715, + "eval_precision": 0.7452830188679245, + "eval_precision_macro": 0.8504738723645784, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.764269590454692, + "eval_recall_macro": 0.8576886070218773, + "eval_runtime": 0.2712, + "eval_samples_per_second": 600.959, + "eval_steps_per_second": 3.687, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5202 + }, + { + "epoch": 290.0, + "eval_accuracy": 0.9213951596501933, + "eval_auc": 0.9498248412965191, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7543692405465523, + "eval_f1_macro": 0.8537901526260393, + "eval_loss": 0.23362942039966583, + "eval_pr_auc": 0.7666566917414745, + "eval_precision": 0.7435014093329158, + "eval_precision_macro": 0.8496869717377781, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7655594969364721, + "eval_recall_macro": 0.8580619359493733, + "eval_runtime": 0.2158, + "eval_samples_per_second": 755.492, + "eval_steps_per_second": 4.635, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5220 + }, + { + "epoch": 291.0, + "eval_accuracy": 0.9219544437665244, + "eval_auc": 0.9499087646350263, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7554564282300462, + "eval_f1_macro": 0.8545121458733945, + "eval_loss": 0.23345860838890076, + "eval_pr_auc": 0.767032300253484, + "eval_precision": 0.746536523929471, + "eval_precision_macro": 0.8511363192046093, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.764592067075137, + "eval_recall_macro": 0.8580007477284299, + "eval_runtime": 0.2582, + "eval_samples_per_second": 631.391, + "eval_steps_per_second": 3.874, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5238 + }, + { + "epoch": 292.0, + "eval_accuracy": 0.9226154159040065, + "eval_auc": 0.9500412726083274, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7567135549872123, + "eval_f1_macro": 0.8553519407342349, + "eval_loss": 0.23320625722408295, + "eval_pr_auc": 0.7678050059622479, + "eval_precision": 0.7502377179080824, + "eval_precision_macro": 0.8528939452496871, + "eval_pred_class_0": 16513, + "eval_pred_class_1": 3155, + "eval_predicted_binding_ratio": 0.1604128533658735, + "eval_recall": 0.763302160593357, + "eval_recall_macro": 0.857868862635062, + "eval_runtime": 0.2022, + "eval_samples_per_second": 806.124, + "eval_steps_per_second": 4.946, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5256 + }, + { + "epoch": 293.0, + "eval_accuracy": 0.9219035997559487, + "eval_auc": 0.9500337688516315, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7551801083838062, + "eval_f1_macro": 0.8543609694420392, + "eval_loss": 0.23323103785514832, + "eval_pr_auc": 0.7676369094541509, + "eval_precision": 0.7466120390797353, + "eval_precision_macro": 0.8511174775574487, + "eval_pred_class_0": 16495, + "eval_pred_class_1": 3173, + "eval_predicted_binding_ratio": 0.16132804555623348, + "eval_recall": 0.7639471138342471, + "eval_recall_macro": 0.8577084515872508, + "eval_runtime": 0.2457, + "eval_samples_per_second": 663.373, + "eval_steps_per_second": 4.07, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5274 + }, + { + "epoch": 294.0, + "eval_accuracy": 0.9219035997559487, + "eval_auc": 0.9500722803500048, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7554140127388536, + "eval_f1_macro": 0.8544737053045671, + "eval_loss": 0.23318050801753998, + "eval_pr_auc": 0.7677669872107012, + "eval_precision": 0.7461465869770368, + "eval_precision_macro": 0.8509676473001504, + "eval_pred_class_0": 16489, + "eval_pred_class_1": 3179, + "eval_predicted_binding_ratio": 0.1616331096196868, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8581016250801203, + "eval_runtime": 0.2622, + "eval_samples_per_second": 621.765, + "eval_steps_per_second": 3.815, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5292 + }, + { + "epoch": 295.0, + "eval_accuracy": 0.9223103518405532, + "eval_auc": 0.9501381693679445, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7562998405103668, + "eval_f1_macro": 0.8550446157127531, + "eval_loss": 0.23305083811283112, + "eval_pr_auc": 0.7681080957655504, + "eval_precision": 0.7481855474913222, + "eval_precision_macro": 0.8519702208636682, + "eval_pred_class_0": 16499, + "eval_pred_class_1": 3169, + "eval_predicted_binding_ratio": 0.16112466951393126, + "eval_recall": 0.764592067075137, + "eval_recall_macro": 0.8582120110832919, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.885, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5310 + }, + { + "epoch": 296.0, + "eval_accuracy": 0.9220052877770999, + "eval_auc": 0.9501577511558462, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7556546670914304, + "eval_f1_macro": 0.8546256879531203, + "eval_loss": 0.23304298520088196, + "eval_pr_auc": 0.7681639014007426, + "eval_precision": 0.7466163046899591, + "eval_precision_macro": 0.8512051870912047, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8581619860386525, + "eval_runtime": 0.2233, + "eval_samples_per_second": 730.092, + "eval_steps_per_second": 4.479, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5328 + }, + { + "epoch": 297.0, + "eval_accuracy": 0.9223611958511287, + "eval_auc": 0.9502301512155882, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7564981661616967, + "eval_f1_macro": 0.8551582014839937, + "eval_loss": 0.23290005326271057, + "eval_pr_auc": 0.7685493782671796, + "eval_precision": 0.7482649842271294, + "eval_precision_macro": 0.8520389050120978, + "eval_pred_class_0": 16498, + "eval_pred_class_1": 3170, + "eval_predicted_binding_ratio": 0.16117551352450682, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8583732493935144, + "eval_runtime": 0.259, + "eval_samples_per_second": 629.312, + "eval_steps_per_second": 3.861, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5346 + }, + { + "epoch": 298.0, + "eval_accuracy": 0.9224628838722798, + "eval_auc": 0.9503260649928105, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.756739511883873, + "eval_f1_macro": 0.8553105125875349, + "eval_loss": 0.23275841772556305, + "eval_pr_auc": 0.768979970846171, + "eval_precision": 0.7487373737373737, + "eval_precision_macro": 0.8522777777777777, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8584336103520465, + "eval_runtime": 0.265, + "eval_samples_per_second": 615.056, + "eval_steps_per_second": 3.773, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5364 + }, + { + "epoch": 299.0, + "eval_accuracy": 0.9224120398617043, + "eval_auc": 0.9503799246420391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7565411614550096, + "eval_f1_macro": 0.8551969143430849, + "eval_loss": 0.23264609277248383, + "eval_pr_auc": 0.7692428666566218, + "eval_precision": 0.748658035996211, + "eval_precision_macro": 0.8522091464751675, + "eval_pred_class_0": 16501, + "eval_pred_class_1": 3167, + "eval_predicted_binding_ratio": 0.16102298149278016, + "eval_recall": 0.764592067075137, + "eval_recall_macro": 0.8582723720418239, + "eval_runtime": 0.2384, + "eval_samples_per_second": 683.765, + "eval_steps_per_second": 4.195, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5382 + }, + { + "epoch": 300.0, + "eval_accuracy": 0.9223611958511287, + "eval_auc": 0.9503983969250598, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7565758010521282, + "eval_f1_macro": 0.8551956221484214, + "eval_loss": 0.23263320326805115, + "eval_pr_auc": 0.7693119480202146, + "eval_precision": 0.748108448928121, + "eval_precision_macro": 0.8519882690809373, + "eval_pred_class_0": 16496, + "eval_pred_class_1": 3172, + "eval_predicted_binding_ratio": 0.16127720154565792, + "eval_recall": 0.7652370203160271, + "eval_recall_macro": 0.8585043072244709, + "eval_runtime": 0.264, + "eval_samples_per_second": 617.523, + "eval_steps_per_second": 3.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5400 + }, + { + "epoch": 301.0, + "eval_accuracy": 0.9225137278828553, + "eval_auc": 0.9504205091626904, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7572475310608474, + "eval_f1_macro": 0.855573369257207, + "eval_loss": 0.2325783669948578, + "eval_pr_auc": 0.7694562109808358, + "eval_precision": 0.7481901164620711, + "eval_precision_macro": 0.8521436908185075, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7665269267978072, + "eval_recall_macro": 0.859119079986095, + "eval_runtime": 0.2413, + "eval_samples_per_second": 675.443, + "eval_steps_per_second": 4.144, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5418 + }, + { + "epoch": 302.0, + "eval_accuracy": 0.9223611958511287, + "eval_auc": 0.9504706120673215, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.756808408982322, + "eval_f1_macro": 0.8553077347570655, + "eval_loss": 0.23251411318778992, + "eval_pr_auc": 0.7697272239104135, + "eval_precision": 0.7476400251730648, + "eval_precision_macro": 0.8518369925744038, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7662044501773622, + "eval_recall_macro": 0.8588974807173404, + "eval_runtime": 0.2708, + "eval_samples_per_second": 602.022, + "eval_steps_per_second": 3.693, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5436 + }, + { + "epoch": 303.0, + "eval_accuracy": 0.9223611958511287, + "eval_auc": 0.9504959749596038, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7571178622554477, + "eval_f1_macro": 0.8554568705510044, + "eval_loss": 0.23248492181301117, + "eval_pr_auc": 0.7698215821647963, + "eval_precision": 0.7470182046453233, + "eval_precision_macro": 0.8516367567335341, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8594217120411665, + "eval_runtime": 0.2271, + "eval_samples_per_second": 717.802, + "eval_steps_per_second": 4.404, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5454 + }, + { + "epoch": 304.0, + "eval_accuracy": 0.9228187919463087, + "eval_auc": 0.9505587106478812, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7579719387755102, + "eval_f1_macro": 0.8560304891070873, + "eval_loss": 0.23235370218753815, + "eval_pr_auc": 0.7701546218803492, + "eval_precision": 0.749605802585935, + "eval_precision_macro": 0.8528595176474563, + "eval_pred_class_0": 16497, + "eval_pred_class_1": 3171, + "eval_predicted_binding_ratio": 0.16122635753508235, + "eval_recall": 0.7665269267978072, + "eval_recall_macro": 0.8593001628616911, + "eval_runtime": 0.1849, + "eval_samples_per_second": 881.772, + "eval_steps_per_second": 5.41, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5472 + }, + { + "epoch": 305.0, + "eval_accuracy": 0.9231238560097621, + "eval_auc": 0.9506673837312365, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7583120204603581, + "eval_f1_macro": 0.8563023222011585, + "eval_loss": 0.23212042450904846, + "eval_pr_auc": 0.7706813722507476, + "eval_precision": 0.7518225039619651, + "eval_precision_macro": 0.8538377341465491, + "eval_pred_class_0": 16513, + "eval_pred_class_1": 3155, + "eval_predicted_binding_ratio": 0.1604128533658735, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8588259565825047, + "eval_runtime": 0.1758, + "eval_samples_per_second": 927.397, + "eval_steps_per_second": 5.69, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5490 + }, + { + "epoch": 305.55555555555554, + "grad_norm": 15827.6396484375, + "learning_rate": 3.943376017723057e-07, + "loss": 0.1954, + "step": 5500 + }, + { + "epoch": 306.0, + "eval_accuracy": 0.9233272320520642, + "eval_auc": 0.9506862453142154, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7591823698498882, + "eval_f1_macro": 0.8567938214370079, + "eval_loss": 0.23211389780044556, + "eval_pr_auc": 0.7707434518060764, + "eval_precision": 0.7519772223979754, + "eval_precision_macro": 0.8540585209342515, + "eval_pred_class_0": 16507, + "eval_pred_class_1": 3161, + "eval_predicted_binding_ratio": 0.1607179174293268, + "eval_recall": 0.7665269267978072, + "eval_recall_macro": 0.8596019676543512, + "eval_runtime": 0.2349, + "eval_samples_per_second": 693.884, + "eval_steps_per_second": 4.257, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5508 + }, + { + "epoch": 307.0, + "eval_accuracy": 0.9230730119991865, + "eval_auc": 0.9506972041080411, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7588075880758808, + "eval_f1_macro": 0.8565232326853711, + "eval_loss": 0.23213696479797363, + "eval_pr_auc": 0.7706724583082463, + "eval_precision": 0.7503152585119798, + "eval_precision_macro": 0.8533038465207814, + "eval_pred_class_0": 16496, + "eval_pred_class_1": 3172, + "eval_predicted_binding_ratio": 0.16127720154565792, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8598442387508907, + "eval_runtime": 0.2681, + "eval_samples_per_second": 608.072, + "eval_steps_per_second": 3.731, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5526 + }, + { + "epoch": 308.0, + "eval_accuracy": 0.9229204799674599, + "eval_auc": 0.9507381389986547, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.758521822236381, + "eval_f1_macro": 0.8563315143004762, + "eval_loss": 0.23207640647888184, + "eval_pr_auc": 0.7708824410889222, + "eval_precision": 0.7494491658797607, + "eval_precision_macro": 0.8528944938003498, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7678168332795873, + "eval_recall_macro": 0.8598847551440492, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.193, + "eval_steps_per_second": 3.946, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5544 + }, + { + "epoch": 309.0, + "eval_accuracy": 0.9233272320520642, + "eval_auc": 0.9508327778185136, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7593360995850622, + "eval_f1_macro": 0.8568679288369823, + "eval_loss": 0.2318853884935379, + "eval_pr_auc": 0.7713979747932131, + "eval_precision": 0.7516587677725118, + "eval_precision_macro": 0.8539545732457663, + "eval_pred_class_0": 16503, + "eval_pred_class_1": 3165, + "eval_predicted_binding_ratio": 0.16092129347162903, + "eval_recall": 0.7671718800386972, + "eval_recall_macro": 0.8598640833162641, + "eval_runtime": 0.3164, + "eval_samples_per_second": 515.146, + "eval_steps_per_second": 3.16, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5562 + }, + { + "epoch": 310.0, + "eval_accuracy": 0.9231238560097621, + "eval_auc": 0.9508632794702453, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7587747287811104, + "eval_f1_macro": 0.8565253830188363, + "eval_loss": 0.23183651268482208, + "eval_pr_auc": 0.7715329415149417, + "eval_precision": 0.7508683296495106, + "eval_precision_macro": 0.8535264016588866, + "eval_pred_class_0": 16501, + "eval_pred_class_1": 3167, + "eval_predicted_binding_ratio": 0.16102298149278016, + "eval_recall": 0.7668494034182521, + "eval_recall_macro": 0.8596123035682436, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.635, + "eval_steps_per_second": 3.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5580 + }, + { + "epoch": 311.0, + "eval_accuracy": 0.9230730119991865, + "eval_auc": 0.9509030269959862, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7588844621513944, + "eval_f1_macro": 0.8565602855810055, + "eval_loss": 0.23182560503482819, + "eval_pr_auc": 0.7716106695707178, + "eval_precision": 0.7501575299306869, + "eval_precision_macro": 0.8532526463767658, + "eval_pred_class_0": 16494, + "eval_pred_class_1": 3174, + "eval_predicted_binding_ratio": 0.16137888956680904, + "eval_recall": 0.7678168332795873, + "eval_recall_macro": 0.8599752965818471, + "eval_runtime": 0.2638, + "eval_samples_per_second": 617.932, + "eval_steps_per_second": 3.791, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5598 + }, + { + "epoch": 312.0, + "eval_accuracy": 0.9231238560097621, + "eval_auc": 0.9509431638216853, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7591589678241478, + "eval_f1_macro": 0.8567105868221108, + "eval_loss": 0.23177149891853333, + "eval_pr_auc": 0.7718084005522707, + "eval_precision": 0.7500786905886057, + "eval_precision_macro": 0.853269895291271, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8602675927230261, + "eval_runtime": 0.2364, + "eval_samples_per_second": 689.439, + "eval_steps_per_second": 4.23, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5616 + }, + { + "epoch": 313.0, + "eval_accuracy": 0.9230730119991865, + "eval_auc": 0.9509867264870173, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7590380633858895, + "eval_f1_macro": 0.856634317411552, + "eval_loss": 0.23171813786029816, + "eval_pr_auc": 0.771954368377823, + "eval_precision": 0.749842668344871, + "eval_precision_macro": 0.8531505640086999, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8602374122437602, + "eval_runtime": 0.2592, + "eval_samples_per_second": 628.85, + "eval_steps_per_second": 3.858, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5634 + }, + { + "epoch": 314.0, + "eval_accuracy": 0.9228696359568843, + "eval_auc": 0.9509922740114228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7584010192705845, + "eval_f1_macro": 0.8562552937959844, + "eval_loss": 0.23173367977142334, + "eval_pr_auc": 0.7720376545527768, + "eval_precision": 0.7492133417243549, + "eval_precision_macro": 0.8527752578846153, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7678168332795873, + "eval_recall_macro": 0.8598545746647831, + "eval_runtime": 0.2664, + "eval_samples_per_second": 611.795, + "eval_steps_per_second": 3.753, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5652 + }, + { + "epoch": 315.0, + "eval_accuracy": 0.9233780760626398, + "eval_auc": 0.9510762654774227, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7595340673368438, + "eval_f1_macro": 0.8569813431425517, + "eval_loss": 0.23158977925777435, + "eval_pr_auc": 0.7725350282702966, + "eval_precision": 0.7517372078332281, + "eval_precision_macro": 0.8540227670483556, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8600253216264866, + "eval_runtime": 0.2525, + "eval_samples_per_second": 645.435, + "eval_steps_per_second": 3.96, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5670 + }, + { + "epoch": 316.0, + "eval_accuracy": 0.9236322961155176, + "eval_auc": 0.9511527434542277, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7601405301820504, + "eval_f1_macro": 0.8573636072933594, + "eval_loss": 0.23149563372135162, + "eval_pr_auc": 0.7728791764414321, + "eval_precision": 0.7529262891490035, + "eval_precision_macro": 0.8546239248495366, + "eval_pred_class_0": 16507, + "eval_pred_class_1": 3161, + "eval_predicted_binding_ratio": 0.1607179174293268, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8601762240228168, + "eval_runtime": 0.2078, + "eval_samples_per_second": 784.391, + "eval_steps_per_second": 4.812, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5688 + }, + { + "epoch": 317.0, + "eval_accuracy": 0.923581452104942, + "eval_auc": 0.9511918583675363, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7598657932577089, + "eval_f1_macro": 0.8572131820235396, + "eval_loss": 0.23143813014030457, + "eval_pr_auc": 0.7730966214358813, + "eval_precision": 0.7530082330588981, + "eval_precision_macro": 0.8546082958147307, + "eval_pred_class_0": 16510, + "eval_pred_class_1": 3158, + "eval_predicted_binding_ratio": 0.16056538539760015, + "eval_recall": 0.7668494034182521, + "eval_recall_macro": 0.8598839278816377, + "eval_runtime": 0.2098, + "eval_samples_per_second": 777.022, + "eval_steps_per_second": 4.767, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5706 + }, + { + "epoch": 318.0, + "eval_accuracy": 0.923479764083791, + "eval_auc": 0.9512240340090886, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7597765363128491, + "eval_f1_macro": 0.8571341935895835, + "eval_loss": 0.23142649233341217, + "eval_pr_auc": 0.7731516186784236, + "eval_precision": 0.7522123893805309, + "eval_precision_macro": 0.8542630051604545, + "eval_pred_class_0": 16504, + "eval_pred_class_1": 3164, + "eval_predicted_binding_ratio": 0.1608704494610535, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8600856825850187, + "eval_runtime": 0.2091, + "eval_samples_per_second": 779.633, + "eval_steps_per_second": 4.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5724 + }, + { + "epoch": 319.0, + "eval_accuracy": 0.9236322961155176, + "eval_auc": 0.9512825457928188, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7600638977635783, + "eval_f1_macro": 0.8573266640831436, + "eval_loss": 0.2313271462917328, + "eval_pr_auc": 0.7734563478045352, + "eval_precision": 0.7530864197530864, + "eval_precision_macro": 0.8546763493762101, + "eval_pred_class_0": 16509, + "eval_pred_class_1": 3159, + "eval_predicted_binding_ratio": 0.16061622940817571, + "eval_recall": 0.7671718800386972, + "eval_recall_macro": 0.8600451661918602, + "eval_runtime": 0.2154, + "eval_samples_per_second": 756.779, + "eval_steps_per_second": 4.643, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5742 + }, + { + "epoch": 320.0, + "eval_accuracy": 0.9237848281472443, + "eval_auc": 0.9513326194999532, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7605048729829046, + "eval_f1_macro": 0.8575931868618003, + "eval_loss": 0.23122872412204742, + "eval_pr_auc": 0.7736938128704516, + "eval_precision": 0.7536415452818239, + "eval_precision_macro": 0.8549855212781016, + "eval_pred_class_0": 16510, + "eval_pred_class_1": 3158, + "eval_predicted_binding_ratio": 0.16056538539760015, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8602667654606148, + "eval_runtime": 0.235, + "eval_samples_per_second": 693.713, + "eval_steps_per_second": 4.256, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5760 + }, + { + "epoch": 321.0, + "eval_accuracy": 0.9236322961155176, + "eval_auc": 0.9513477438033324, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7602171136653896, + "eval_f1_macro": 0.8574005258699469, + "eval_loss": 0.2312333732843399, + "eval_pr_auc": 0.773709338696213, + "eval_precision": 0.7527663610496365, + "eval_precision_macro": 0.8545716082739852, + "eval_pred_class_0": 16505, + "eval_pred_class_1": 3163, + "eval_predicted_binding_ratio": 0.16081960545047794, + "eval_recall": 0.7678168332795873, + "eval_recall_macro": 0.8603072818537733, + "eval_runtime": 0.2276, + "eval_samples_per_second": 716.105, + "eval_steps_per_second": 4.393, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5778 + }, + { + "epoch": 322.0, + "eval_accuracy": 0.9235306080943665, + "eval_auc": 0.9513562889374167, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7602040816326531, + "eval_f1_macro": 0.8573582711574831, + "eval_loss": 0.23121465742588043, + "eval_pr_auc": 0.7737540605936648, + "eval_precision": 0.7518133081046988, + "eval_precision_macro": 0.854175430193466, + "eval_pred_class_0": 16497, + "eval_pred_class_1": 3171, + "eval_predicted_binding_ratio": 0.16122635753508235, + "eval_recall": 0.7687842631409223, + "eval_recall_macro": 0.8606400943881107, + "eval_runtime": 0.2488, + "eval_samples_per_second": 655.047, + "eval_steps_per_second": 4.019, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5796 + }, + { + "epoch": 323.0, + "eval_accuracy": 0.9236831401260931, + "eval_auc": 0.9514191998106756, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7604914632200415, + "eval_f1_macro": 0.8575507604890313, + "eval_loss": 0.23111233115196228, + "eval_pr_auc": 0.7739781495758574, + "eval_precision": 0.7526847757422616, + "eval_precision_macro": 0.8545874490758332, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8605995779949522, + "eval_runtime": 0.2355, + "eval_samples_per_second": 692.067, + "eval_steps_per_second": 4.246, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5814 + }, + { + "epoch": 324.0, + "eval_accuracy": 0.9236831401260931, + "eval_auc": 0.9514517842171841, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7604914632200415, + "eval_f1_macro": 0.8575507604890313, + "eval_loss": 0.23104801774024963, + "eval_pr_auc": 0.7741130008089699, + "eval_precision": 0.7526847757422616, + "eval_precision_macro": 0.8545874490758332, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8605995779949522, + "eval_runtime": 0.2425, + "eval_samples_per_second": 672.295, + "eval_steps_per_second": 4.125, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5832 + }, + { + "epoch": 325.0, + "eval_accuracy": 0.9237339841366687, + "eval_auc": 0.951504651151519, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7606128311522502, + "eval_f1_macro": 0.857627250169412, + "eval_loss": 0.23095941543579102, + "eval_pr_auc": 0.7744096919390852, + "eval_precision": 0.7529225908372827, + "eval_precision_macro": 0.8547076748648027, + "eval_pred_class_0": 16503, + "eval_pred_class_1": 3165, + "eval_predicted_binding_ratio": 0.16092129347162903, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8606297584742182, + "eval_runtime": 0.2175, + "eval_samples_per_second": 749.374, + "eval_steps_per_second": 4.597, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5850 + }, + { + "epoch": 326.0, + "eval_accuracy": 0.9237848281472443, + "eval_auc": 0.951546208922066, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7606578317100431, + "eval_f1_macro": 0.8576669257120046, + "eval_loss": 0.23088191449642181, + "eval_pr_auc": 0.774641356281408, + "eval_precision": 0.7533206831119544, + "eval_precision_macro": 0.8548803827531177, + "eval_pred_class_0": 16506, + "eval_pred_class_1": 3162, + "eval_predicted_binding_ratio": 0.16076876143990237, + "eval_recall": 0.7681393099000322, + "eval_recall_macro": 0.8605288811225278, + "eval_runtime": 0.2627, + "eval_samples_per_second": 620.56, + "eval_steps_per_second": 3.807, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5868 + }, + { + "epoch": 327.0, + "eval_accuracy": 0.9237848281472443, + "eval_auc": 0.9515728175742149, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7608105951811074, + "eval_f1_macro": 0.8577405662711912, + "eval_loss": 0.23083868622779846, + "eval_pr_auc": 0.7747608129205691, + "eval_precision": 0.7530006317119393, + "eval_precision_macro": 0.8547756764183257, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7687842631409223, + "eval_recall_macro": 0.8607909967844407, + "eval_runtime": 0.2492, + "eval_samples_per_second": 654.065, + "eval_steps_per_second": 4.013, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5886 + }, + { + "epoch": 328.0, + "eval_accuracy": 0.9238356721578198, + "eval_auc": 0.9516116113150578, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7611607142857143, + "eval_f1_macro": 0.8579273206076528, + "eval_loss": 0.23078228533267975, + "eval_pr_auc": 0.7749744562806883, + "eval_precision": 0.7527593818984547, + "eval_precision_macro": 0.8547393927131844, + "eval_pred_class_0": 16497, + "eval_pred_class_1": 3171, + "eval_predicted_binding_ratio": 0.16122635753508235, + "eval_recall": 0.7697516930022573, + "eval_recall_macro": 0.8612143507565763, + "eval_runtime": 0.2121, + "eval_samples_per_second": 768.541, + "eval_steps_per_second": 4.715, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5904 + }, + { + "epoch": 329.0, + "eval_accuracy": 0.9238865161683953, + "eval_auc": 0.9516614027797223, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7614342629482072, + "eval_f1_macro": 0.8580771629311073, + "eval_loss": 0.2307167798280716, + "eval_pr_auc": 0.7752067172424865, + "eval_precision": 0.7526780088216761, + "eval_precision_macro": 0.8547553982510223, + "eval_pred_class_0": 16494, + "eval_pred_class_1": 3174, + "eval_predicted_binding_ratio": 0.16137888956680904, + "eval_recall": 0.7703966462431474, + "eval_recall_macro": 0.8615066468977552, + "eval_runtime": 0.2538, + "eval_samples_per_second": 642.134, + "eval_steps_per_second": 3.939, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5922 + }, + { + "epoch": 330.0, + "eval_accuracy": 0.9236322961155176, + "eval_auc": 0.9516455582714203, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7612841703750794, + "eval_f1_macro": 0.857914812460267, + "eval_loss": 0.23076769709587097, + "eval_pr_auc": 0.775112377066796, + "eval_precision": 0.750548417424005, + "eval_precision_macro": 0.8538504058352652, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7723315059658175, + "eval_recall_macro": 0.8621420914871643, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.508, + "eval_steps_per_second": 3.868, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5940 + }, + { + "epoch": 331.0, + "eval_accuracy": 0.9240390482001221, + "eval_auc": 0.9516963035209824, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7621776504297995, + "eval_f1_macro": 0.8584894423868002, + "eval_loss": 0.23067235946655273, + "eval_pr_auc": 0.7753820734835624, + "eval_precision": 0.7525935240490412, + "eval_precision_macro": 0.8548556265844769, + "eval_pred_class_0": 16487, + "eval_pred_class_1": 3181, + "eval_predicted_binding_ratio": 0.1617347976408379, + "eval_recall": 0.7720090293453724, + "eval_recall_macro": 0.8622524774903357, + "eval_runtime": 0.1824, + "eval_samples_per_second": 893.499, + "eval_steps_per_second": 5.482, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5958 + }, + { + "epoch": 332.0, + "eval_accuracy": 0.9240390482001221, + "eval_auc": 0.9517492385828104, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7619502868068834, + "eval_f1_macro": 0.8583798620967267, + "eval_loss": 0.23056790232658386, + "eval_pr_auc": 0.775607049366595, + "eval_precision": 0.7530708661417322, + "eval_precision_macro": 0.8550111500417023, + "eval_pred_class_0": 16493, + "eval_pred_class_1": 3175, + "eval_predicted_binding_ratio": 0.16142973357738458, + "eval_recall": 0.7710415994840374, + "eval_recall_macro": 0.8618593039974662, + "eval_runtime": 0.2665, + "eval_samples_per_second": 611.679, + "eval_steps_per_second": 3.753, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5976 + }, + { + "epoch": 333.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9517777256072577, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7621475227019276, + "eval_f1_macro": 0.8584929210351648, + "eval_loss": 0.23053352534770966, + "eval_pr_auc": 0.7757600766000483, + "eval_precision": 0.7531486146095718, + "eval_precision_macro": 0.855079036870636, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7713640761044824, + "eval_recall_macro": 0.8620205423076888, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.883, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5994 + }, + { + "epoch": 333.3333333333333, + "grad_norm": 16736.6328125, + "learning_rate": 3.021381973636964e-07, + "loss": 0.1913, + "step": 6000 + }, + { + "epoch": 334.0, + "eval_accuracy": 0.9237848281472443, + "eval_auc": 0.9517933365355851, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7616473207187152, + "eval_f1_macro": 0.8581438407085573, + "eval_loss": 0.23052088916301727, + "eval_pr_auc": 0.7758187649274527, + "eval_precision": 0.751254705144291, + "eval_precision_macro": 0.8542074496595242, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7723315059658175, + "eval_recall_macro": 0.8622326329249622, + "eval_runtime": 0.2573, + "eval_samples_per_second": 633.473, + "eval_steps_per_second": 3.886, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6012 + }, + { + "epoch": 335.0, + "eval_accuracy": 0.9238356721578198, + "eval_auc": 0.9518365877609504, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7619198982835347, + "eval_f1_macro": 0.8582932017746205, + "eval_loss": 0.23045583069324493, + "eval_pr_auc": 0.7760158372270667, + "eval_precision": 0.7511751801942964, + "eval_precision_macro": 0.8542244778801185, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8625249290661412, + "eval_runtime": 0.219, + "eval_samples_per_second": 744.186, + "eval_steps_per_second": 4.566, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6030 + }, + { + "epoch": 336.0, + "eval_accuracy": 0.9241915802318487, + "eval_auc": 0.9518931822423861, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7624661462482077, + "eval_f1_macro": 0.8586824817571539, + "eval_loss": 0.23035065829753876, + "eval_pr_auc": 0.7762652864261658, + "eval_precision": 0.753463476070529, + "eval_precision_macro": 0.855266785330923, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7716865527249275, + "eval_recall_macro": 0.8622119610971773, + "eval_runtime": 0.2363, + "eval_samples_per_second": 689.881, + "eval_steps_per_second": 4.232, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6048 + }, + { + "epoch": 337.0, + "eval_accuracy": 0.9240390482001221, + "eval_auc": 0.9519021264089276, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7623289850461342, + "eval_f1_macro": 0.8585623745200415, + "eval_loss": 0.2303379327058792, + "eval_pr_auc": 0.7763353590359, + "eval_precision": 0.752276295133438, + "eval_precision_macro": 0.8547524774823897, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7726539825862625, + "eval_recall_macro": 0.8625145931522488, + "eval_runtime": 0.2467, + "eval_samples_per_second": 660.828, + "eval_steps_per_second": 4.054, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6066 + }, + { + "epoch": 338.0, + "eval_accuracy": 0.9236831401260931, + "eval_auc": 0.9519062432559864, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7616325234238527, + "eval_f1_macro": 0.8581006831532533, + "eval_loss": 0.23036180436611176, + "eval_pr_auc": 0.7763420780879606, + "eval_precision": 0.7503128911138923, + "eval_precision_macro": 0.8538172032062905, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7732989358271525, + "eval_recall_macro": 0.8625654454592997, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.319, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6084 + }, + { + "epoch": 339.0, + "eval_accuracy": 0.9237339841366687, + "eval_auc": 0.9519363653402588, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7618291521117815, + "eval_f1_macro": 0.8582134440261069, + "eval_loss": 0.23031854629516602, + "eval_pr_auc": 0.776475073481046, + "eval_precision": 0.7503909915545824, + "eval_precision_macro": 0.8538853142461151, + "eval_pred_class_0": 16471, + "eval_pred_class_1": 3197, + "eval_predicted_binding_ratio": 0.16254830181004679, + "eval_recall": 0.7736214124475975, + "eval_recall_macro": 0.8627266837695222, + "eval_runtime": 0.2533, + "eval_samples_per_second": 643.603, + "eval_steps_per_second": 3.948, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6102 + }, + { + "epoch": 340.0, + "eval_accuracy": 0.9236831401260931, + "eval_auc": 0.9519665750170216, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7616325234238527, + "eval_f1_macro": 0.8581006831532533, + "eval_loss": 0.23026354610919952, + "eval_pr_auc": 0.7766276763039114, + "eval_precision": 0.7503128911138923, + "eval_precision_macro": 0.8538172032062905, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7732989358271525, + "eval_recall_macro": 0.8625654454592997, + "eval_runtime": 0.2621, + "eval_samples_per_second": 621.893, + "eval_steps_per_second": 3.815, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6120 + }, + { + "epoch": 341.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9520129601070512, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7625258469858438, + "eval_f1_macro": 0.8586752506435165, + "eval_loss": 0.230192169547081, + "eval_pr_auc": 0.7768138361852162, + "eval_precision": 0.7523540489642184, + "eval_precision_macro": 0.8548203930053466, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8626758314624713, + "eval_runtime": 0.2426, + "eval_samples_per_second": 671.971, + "eval_steps_per_second": 4.123, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6138 + }, + { + "epoch": 342.0, + "eval_accuracy": 0.9239882041895465, + "eval_auc": 0.9520488340982072, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7622833518842423, + "eval_f1_macro": 0.8585223761569667, + "eval_loss": 0.2301386296749115, + "eval_pr_auc": 0.7769598633905366, + "eval_precision": 0.7518820577164367, + "eval_precision_macro": 0.8545818055572474, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8626154705039393, + "eval_runtime": 0.2473, + "eval_samples_per_second": 659.185, + "eval_steps_per_second": 4.044, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6156 + }, + { + "epoch": 343.0, + "eval_accuracy": 0.9241407362212731, + "eval_auc": 0.952100825107636, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7626471524021635, + "eval_f1_macro": 0.8587517153841377, + "eval_loss": 0.23005619645118713, + "eval_pr_auc": 0.7772456933395511, + "eval_precision": 0.7525902668759812, + "eval_precision_macro": 0.8549397976374689, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8627060119417373, + "eval_runtime": 0.2129, + "eval_samples_per_second": 765.55, + "eval_steps_per_second": 4.697, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6174 + }, + { + "epoch": 344.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9521250006350455, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7625258469858438, + "eval_f1_macro": 0.8586752506435165, + "eval_loss": 0.23000310361385345, + "eval_pr_auc": 0.7773917675410515, + "eval_precision": 0.7523540489642184, + "eval_precision_macro": 0.8548203930053466, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8626758314624713, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.276, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6192 + }, + { + "epoch": 345.0, + "eval_accuracy": 0.9241915802318487, + "eval_auc": 0.9521700231752211, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7627684964200477, + "eval_f1_macro": 0.8588281984687149, + "eval_loss": 0.22992061078548431, + "eval_pr_auc": 0.777567865132197, + "eval_precision": 0.7528266331658291, + "eval_precision_macro": 0.8550592763014295, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8627361924210033, + "eval_runtime": 0.2684, + "eval_samples_per_second": 607.371, + "eval_steps_per_second": 3.726, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6210 + }, + { + "epoch": 346.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9521951914175242, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7624502784407319, + "eval_f1_macro": 0.8586388332084449, + "eval_loss": 0.22986458241939545, + "eval_pr_auc": 0.7777181268262345, + "eval_precision": 0.7525125628140703, + "eval_precision_macro": 0.8548719086819685, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7726539825862625, + "eval_recall_macro": 0.8625447736315148, + "eval_runtime": 0.2315, + "eval_samples_per_second": 704.221, + "eval_steps_per_second": 4.32, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6228 + }, + { + "epoch": 347.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9522022182817713, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7625258469858438, + "eval_f1_macro": 0.8586752506435165, + "eval_loss": 0.22987791895866394, + "eval_pr_auc": 0.7777283636078421, + "eval_precision": 0.7523540489642184, + "eval_precision_macro": 0.8548203930053466, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8626758314624713, + "eval_runtime": 0.2021, + "eval_samples_per_second": 806.414, + "eval_steps_per_second": 4.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6246 + }, + { + "epoch": 348.0, + "eval_accuracy": 0.9243441122635754, + "eval_auc": 0.9522300142987927, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7634340222575516, + "eval_f1_macro": 0.8592029398342167, + "eval_loss": 0.229818195104599, + "eval_pr_auc": 0.7778254023800715, + "eval_precision": 0.7529005957980558, + "eval_precision_macro": 0.8552111450378106, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7742663656884876, + "eval_recall_macro": 0.8633509651826273, + "eval_runtime": 0.2422, + "eval_samples_per_second": 672.887, + "eval_steps_per_second": 4.128, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6264 + }, + { + "epoch": 349.0, + "eval_accuracy": 0.9243949562741509, + "eval_auc": 0.9522554939810626, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763780778395552, + "eval_f1_macro": 0.8593880436271214, + "eval_loss": 0.22978660464286804, + "eval_pr_auc": 0.7779049389173774, + "eval_precision": 0.7526612398246713, + "eval_precision_macro": 0.8551760733541227, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7752337955498226, + "eval_recall_macro": 0.8637743191547629, + "eval_runtime": 0.237, + "eval_samples_per_second": 687.696, + "eval_steps_per_second": 4.219, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6282 + }, + { + "epoch": 350.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9522950273918264, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7638270820089001, + "eval_f1_macro": 0.859428369717681, + "eval_loss": 0.2297380119562149, + "eval_pr_auc": 0.7780796039517833, + "eval_precision": 0.7530554685051708, + "eval_precision_macro": 0.855346694014678, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7749113189293776, + "eval_recall_macro": 0.8636734418030723, + "eval_runtime": 0.251, + "eval_samples_per_second": 649.348, + "eval_steps_per_second": 3.984, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6300 + }, + { + "epoch": 351.0, + "eval_accuracy": 0.9242932682529998, + "eval_auc": 0.9523093341652933, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7636883034438978, + "eval_f1_macro": 0.8593074482256571, + "eval_loss": 0.22972844541072845, + "eval_pr_auc": 0.7781517759374512, + "eval_precision": 0.751875, + "eval_precision_macro": 0.8548359697595336, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8639760738581439, + "eval_runtime": 0.2607, + "eval_samples_per_second": 625.342, + "eval_steps_per_second": 3.836, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6318 + }, + { + "epoch": 352.0, + "eval_accuracy": 0.9242424242424242, + "eval_auc": 0.952317528929415, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7635671215487146, + "eval_f1_macro": 0.8592310391299909, + "eval_loss": 0.22972537577152252, + "eval_pr_auc": 0.7781868067856106, + "eval_precision": 0.7516401124648547, + "eval_precision_macro": 0.8547172445484534, + "eval_pred_class_0": 16467, + "eval_pred_class_1": 3201, + "eval_predicted_binding_ratio": 0.16275167785234898, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8639458933788778, + "eval_runtime": 0.234, + "eval_samples_per_second": 696.7, + "eval_steps_per_second": 4.274, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6336 + }, + { + "epoch": 353.0, + "eval_accuracy": 0.9243441122635754, + "eval_auc": 0.9523571596651685, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7638095238095238, + "eval_f1_macro": 0.8593838755989138, + "eval_loss": 0.2296588122844696, + "eval_pr_auc": 0.7784014772150735, + "eval_precision": 0.7521100343857455, + "eval_precision_macro": 0.8549547682402951, + "eval_pred_class_0": 16469, + "eval_pred_class_1": 3199, + "eval_predicted_binding_ratio": 0.16264998983119788, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8640062543374099, + "eval_runtime": 0.2433, + "eval_samples_per_second": 669.953, + "eval_steps_per_second": 4.11, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6354 + }, + { + "epoch": 354.0, + "eval_accuracy": 0.9243441122635754, + "eval_auc": 0.952381568772553, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7638095238095238, + "eval_f1_macro": 0.8593838755989138, + "eval_loss": 0.22961482405662537, + "eval_pr_auc": 0.7785102930543456, + "eval_precision": 0.7521100343857455, + "eval_precision_macro": 0.8549547682402951, + "eval_pred_class_0": 16469, + "eval_pred_class_1": 3199, + "eval_predicted_binding_ratio": 0.16264998983119788, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8640062543374099, + "eval_runtime": 0.2671, + "eval_samples_per_second": 610.236, + "eval_steps_per_second": 3.744, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6372 + }, + { + "epoch": 355.0, + "eval_accuracy": 0.9242932682529998, + "eval_auc": 0.9523906199965831, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763838223632038, + "eval_f1_macro": 0.8593796791618457, + "eval_loss": 0.2295987904071808, + "eval_pr_auc": 0.7785825605072106, + "eval_precision": 0.7515605493133583, + "eval_precision_macro": 0.8547343562893321, + "eval_pred_class_0": 16464, + "eval_pred_class_1": 3204, + "eval_predicted_binding_ratio": 0.16290420988407567, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.8642381895200568, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.418, + "eval_steps_per_second": 3.849, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6390 + }, + { + "epoch": 356.0, + "eval_accuracy": 0.9243949562741509, + "eval_auc": 0.9524596428791869, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7636305833730727, + "eval_f1_macro": 0.8593156699585895, + "eval_loss": 0.229468435049057, + "eval_pr_auc": 0.7789122838326235, + "eval_precision": 0.7529780564263323, + "eval_precision_macro": 0.8552789299002641, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7745888423089327, + "eval_recall_macro": 0.8635122034928499, + "eval_runtime": 0.2641, + "eval_samples_per_second": 617.191, + "eval_steps_per_second": 3.786, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6408 + }, + { + "epoch": 357.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9524861542063461, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763751987281399, + "eval_f1_macro": 0.8593921831946546, + "eval_loss": 0.2294115126132965, + "eval_pr_auc": 0.7790142584142796, + "eval_precision": 0.7532141737221699, + "eval_precision_macro": 0.8553982756468123, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7745888423089327, + "eval_recall_macro": 0.8635423839721159, + "eval_runtime": 0.2557, + "eval_samples_per_second": 637.426, + "eval_steps_per_second": 3.911, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6426 + }, + { + "epoch": 358.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9524829619466881, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7639771283354511, + "eval_f1_macro": 0.8595006707052558, + "eval_loss": 0.2294154018163681, + "eval_pr_auc": 0.7789936192429844, + "eval_precision": 0.7527386541471048, + "eval_precision_macro": 0.8552438490185533, + "eval_pred_class_0": 16473, + "eval_pred_class_1": 3195, + "eval_predicted_binding_ratio": 0.16244661378889566, + "eval_recall": 0.7755562721702677, + "eval_recall_macro": 0.8639355574649854, + "eval_runtime": 0.2445, + "eval_samples_per_second": 666.738, + "eval_steps_per_second": 4.09, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6444 + }, + { + "epoch": 359.0, + "eval_accuracy": 0.9243949562741509, + "eval_auc": 0.9525149624032593, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763780778395552, + "eval_f1_macro": 0.8593880436271214, + "eval_loss": 0.2293538749217987, + "eval_pr_auc": 0.7791661873069065, + "eval_precision": 0.7526612398246713, + "eval_precision_macro": 0.8551760733541227, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7752337955498226, + "eval_recall_macro": 0.8637743191547629, + "eval_runtime": 0.2637, + "eval_samples_per_second": 618.132, + "eval_steps_per_second": 3.792, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6462 + }, + { + "epoch": 360.0, + "eval_accuracy": 0.9243949562741509, + "eval_auc": 0.9525288798767679, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763780778395552, + "eval_f1_macro": 0.8593880436271214, + "eval_loss": 0.22932648658752441, + "eval_pr_auc": 0.7792072068588576, + "eval_precision": 0.7526612398246713, + "eval_precision_macro": 0.8551760733541227, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7752337955498226, + "eval_recall_macro": 0.8637743191547629, + "eval_runtime": 0.2714, + "eval_samples_per_second": 600.631, + "eval_steps_per_second": 3.685, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6480 + }, + { + "epoch": 361.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9525697758373857, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763751987281399, + "eval_f1_macro": 0.8593921831946546, + "eval_loss": 0.22925521433353424, + "eval_pr_auc": 0.77936321808712, + "eval_precision": 0.7532141737221699, + "eval_precision_macro": 0.8553982756468123, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7745888423089327, + "eval_recall_macro": 0.8635423839721159, + "eval_runtime": 0.249, + "eval_samples_per_second": 654.66, + "eval_steps_per_second": 4.016, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6498 + }, + { + "epoch": 361.1111111111111, + "grad_norm": 21180.3203125, + "learning_rate": 2.1735650901333336e-07, + "loss": 0.1893, + "step": 6500 + }, + { + "epoch": 362.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9526172898972942, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7636016544702513, + "eval_f1_macro": 0.8593197379764267, + "eval_loss": 0.22917793691158295, + "eval_pr_auc": 0.7795955328857882, + "eval_precision": 0.7535321821036107, + "eval_precision_macro": 0.8555017581027062, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7739438890680426, + "eval_recall_macro": 0.8632802683102028, + "eval_runtime": 0.208, + "eval_samples_per_second": 783.548, + "eval_steps_per_second": 4.807, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6516 + }, + { + "epoch": 363.0, + "eval_accuracy": 0.924496644295302, + "eval_auc": 0.9526382926300437, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7636479388826993, + "eval_f1_macro": 0.8593600478608577, + "eval_loss": 0.22912409901618958, + "eval_pr_auc": 0.7796869947527124, + "eval_precision": 0.7539283469516027, + "eval_precision_macro": 0.8556733812884909, + "eval_pred_class_0": 16486, + "eval_pred_class_1": 3182, + "eval_predicted_binding_ratio": 0.16178564165141346, + "eval_recall": 0.7736214124475975, + "eval_recall_macro": 0.8631793909585124, + "eval_runtime": 0.2657, + "eval_samples_per_second": 613.513, + "eval_steps_per_second": 3.764, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6534 + }, + { + "epoch": 364.0, + "eval_accuracy": 0.924496644295302, + "eval_auc": 0.952662049659998, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7637231503579952, + "eval_f1_macro": 0.859396294249525, + "eval_loss": 0.22910362482070923, + "eval_pr_auc": 0.779788701256993, + "eval_precision": 0.7537688442211056, + "eval_precision_macro": 0.8556213791598126, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7739438890680426, + "eval_recall_macro": 0.8633104487894689, + "eval_runtime": 0.2607, + "eval_samples_per_second": 625.175, + "eval_steps_per_second": 3.835, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6552 + }, + { + "epoch": 365.0, + "eval_accuracy": 0.9245474883058775, + "eval_auc": 0.9526903420344663, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7638446849140674, + "eval_f1_macro": 0.8594728689002142, + "eval_loss": 0.2290574461221695, + "eval_pr_auc": 0.7799354479263911, + "eval_precision": 0.7540056550424128, + "eval_precision_macro": 0.8557410744123195, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7739438890680426, + "eval_recall_macro": 0.8633406292687349, + "eval_runtime": 0.2049, + "eval_samples_per_second": 795.329, + "eval_steps_per_second": 4.879, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6570 + }, + { + "epoch": 366.0, + "eval_accuracy": 0.9245474883058775, + "eval_auc": 0.9527021767531981, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7640699523052464, + "eval_f1_macro": 0.8595814265550925, + "eval_loss": 0.22903695702552795, + "eval_pr_auc": 0.7799769457420497, + "eval_precision": 0.753527751646284, + "eval_precision_macro": 0.8555854062558139, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7749113189293776, + "eval_recall_macro": 0.8637338027616044, + "eval_runtime": 0.2118, + "eval_samples_per_second": 769.509, + "eval_steps_per_second": 4.721, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6588 + }, + { + "epoch": 367.0, + "eval_accuracy": 0.9247000203376042, + "eval_auc": 0.9527121622971282, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.764659145081837, + "eval_f1_macro": 0.8599193797618125, + "eval_loss": 0.22902432084083557, + "eval_pr_auc": 0.7800307850119022, + "eval_precision": 0.7537593984962406, + "eval_precision_macro": 0.8557884149558164, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.864217517692272, + "eval_runtime": 0.2519, + "eval_samples_per_second": 647.072, + "eval_steps_per_second": 3.97, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6606 + }, + { + "epoch": 368.0, + "eval_accuracy": 0.9248017083587553, + "eval_auc": 0.9527291941703031, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7649769585253456, + "eval_f1_macro": 0.8601085500794873, + "eval_loss": 0.22899393737316132, + "eval_pr_auc": 0.7800749822284204, + "eval_precision": 0.7540726817042607, + "eval_precision_macro": 0.85597540373147, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8644089364817604, + "eval_runtime": 0.2331, + "eval_samples_per_second": 699.217, + "eval_steps_per_second": 4.29, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6624 + }, + { + "epoch": 369.0, + "eval_accuracy": 0.9248525523693308, + "eval_auc": 0.9527588782921224, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7650238473767885, + "eval_f1_macro": 0.8601491566364061, + "eval_loss": 0.22894835472106934, + "eval_pr_auc": 0.780237083741907, + "eval_precision": 0.7544684854186265, + "eval_precision_macro": 0.856146798082819, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.86430805913007, + "eval_runtime": 0.2655, + "eval_samples_per_second": 613.902, + "eval_steps_per_second": 3.766, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6642 + }, + { + "epoch": 370.0, + "eval_accuracy": 0.9248525523693308, + "eval_auc": 0.9527729125556185, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7650238473767885, + "eval_f1_macro": 0.8601491566364061, + "eval_loss": 0.22892294824123383, + "eval_pr_auc": 0.7803195480022762, + "eval_precision": 0.7544684854186265, + "eval_precision_macro": 0.856146798082819, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.86430805913007, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.873, + "eval_steps_per_second": 3.852, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6660 + }, + { + "epoch": 371.0, + "eval_accuracy": 0.9248525523693308, + "eval_auc": 0.9527883872289603, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7650985378258105, + "eval_f1_macro": 0.8601851483463878, + "eval_loss": 0.22889479994773865, + "eval_pr_auc": 0.7804143746656889, + "eval_precision": 0.7543089940457537, + "eval_precision_macro": 0.8560948381043845, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8644391169610264, + "eval_runtime": 0.2368, + "eval_samples_per_second": 688.489, + "eval_steps_per_second": 4.224, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6678 + }, + { + "epoch": 372.0, + "eval_accuracy": 0.924954240390482, + "eval_auc": 0.9528159885960027, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7653418124006359, + "eval_f1_macro": 0.860338399996844, + "eval_loss": 0.22885586321353912, + "eval_pr_auc": 0.7805625189044384, + "eval_precision": 0.7547820633427407, + "eval_precision_macro": 0.8563339286918206, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8644994779195585, + "eval_runtime": 0.1849, + "eval_samples_per_second": 881.352, + "eval_steps_per_second": 5.407, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6696 + }, + { + "epoch": 373.0, + "eval_accuracy": 0.9250559284116331, + "eval_auc": 0.9528519015171544, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7655852417302799, + "eval_f1_macro": 0.8604917251982311, + "eval_loss": 0.22878196835517883, + "eval_pr_auc": 0.7807742707975688, + "eval_precision": 0.7552557263884531, + "eval_precision_macro": 0.8565733155332836, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8645598388780904, + "eval_runtime": 0.2727, + "eval_samples_per_second": 597.724, + "eval_steps_per_second": 3.667, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6714 + }, + { + "epoch": 374.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9528673372605004, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7657070144743121, + "eval_f1_macro": 0.8605684154038177, + "eval_loss": 0.22877708077430725, + "eval_pr_auc": 0.7808321396342274, + "eval_precision": 0.7554927809165097, + "eval_precision_macro": 0.85669312022406, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8645900193573565, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.842, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6732 + }, + { + "epoch": 375.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9528682131854067, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7659303988558716, + "eval_f1_macro": 0.8606760610325117, + "eval_loss": 0.2287902534008026, + "eval_pr_auc": 0.7808487230478494, + "eval_precision": 0.7550125313283208, + "eval_precision_macro": 0.8565363700584309, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8649831928502261, + "eval_runtime": 0.1856, + "eval_samples_per_second": 878.251, + "eval_steps_per_second": 5.388, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6750 + }, + { + "epoch": 376.0, + "eval_accuracy": 0.9250050844010576, + "eval_auc": 0.9528953279275011, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7655380702591003, + "eval_f1_macro": 0.8604509839871686, + "eval_loss": 0.228745236992836, + "eval_pr_auc": 0.7809833435589818, + "eval_precision": 0.754858934169279, + "eval_precision_macro": 0.8564014297014619, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864660716229781, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.614, + "eval_steps_per_second": 3.881, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6768 + }, + { + "epoch": 377.0, + "eval_accuracy": 0.9248017083587553, + "eval_auc": 0.9529040482465667, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7651262505955216, + "eval_f1_macro": 0.8601804865980422, + "eval_loss": 0.2287396788597107, + "eval_pr_auc": 0.781048839864553, + "eval_precision": 0.7537546933667084, + "eval_precision_macro": 0.8558720042841312, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7768461786520477, + "eval_recall_macro": 0.8646710521436733, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.891, + "eval_steps_per_second": 3.901, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6786 + }, + { + "epoch": 378.0, + "eval_accuracy": 0.9248525523693308, + "eval_auc": 0.9529144620204507, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7653968253968254, + "eval_f1_macro": 0.8603288764349426, + "eval_loss": 0.22873102128505707, + "eval_pr_auc": 0.7810643100928254, + "eval_precision": 0.7536730228196311, + "eval_precision_macro": 0.8558880628094148, + "eval_pred_class_0": 16469, + "eval_pred_class_1": 3199, + "eval_predicted_binding_ratio": 0.16264998983119788, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8649633482848524, + "eval_runtime": 0.1843, + "eval_samples_per_second": 884.276, + "eval_steps_per_second": 5.425, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6804 + }, + { + "epoch": 379.0, + "eval_accuracy": 0.924954240390482, + "eval_auc": 0.9529275424990492, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7657142857142857, + "eval_f1_macro": 0.8605178766021483, + "eval_loss": 0.22870197892189026, + "eval_pr_auc": 0.7811376412515475, + "eval_precision": 0.7539856205064083, + "eval_precision_macro": 0.8560747217232387, + "eval_pred_class_0": 16469, + "eval_pred_class_1": 3199, + "eval_predicted_binding_ratio": 0.16264998983119788, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8651547670743409, + "eval_runtime": 0.2724, + "eval_samples_per_second": 598.424, + "eval_steps_per_second": 3.671, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6822 + }, + { + "epoch": 380.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9529545793811519, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7660047656870532, + "eval_f1_macro": 0.8607118952674847, + "eval_loss": 0.22864677011966705, + "eval_pr_auc": 0.7812048860219004, + "eval_precision": 0.7548528490920476, + "eval_precision_macro": 0.8564843339790698, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8651142506811824, + "eval_runtime": 0.1856, + "eval_samples_per_second": 878.062, + "eval_steps_per_second": 5.387, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6840 + }, + { + "epoch": 381.0, + "eval_accuracy": 0.9250559284116331, + "eval_auc": 0.9529760687388493, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7658087067047982, + "eval_f1_macro": 0.8605994081311654, + "eval_loss": 0.2286224663257599, + "eval_pr_auc": 0.7813066136655398, + "eval_precision": 0.7547760726589414, + "eval_precision_macro": 0.8564168678924449, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.86495301237096, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.403, + "eval_steps_per_second": 3.966, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6858 + }, + { + "epoch": 382.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9530162639595422, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7656324582338903, + "eval_f1_macro": 0.8605324858111449, + "eval_loss": 0.22855480015277863, + "eval_pr_auc": 0.7815011919374028, + "eval_precision": 0.7556532663316583, + "eval_precision_macro": 0.856745584876579, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8644589615264, + "eval_runtime": 0.267, + "eval_samples_per_second": 610.397, + "eval_steps_per_second": 3.745, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6876 + }, + { + "epoch": 383.0, + "eval_accuracy": 0.9251576164327843, + "eval_auc": 0.9530368384623376, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7658288259624563, + "eval_f1_macro": 0.8606451240251011, + "eval_loss": 0.22851014137268066, + "eval_pr_auc": 0.7815937686262128, + "eval_precision": 0.7557299843014129, + "eval_precision_macro": 0.8568129991882603, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646201998366225, + "eval_runtime": 0.2682, + "eval_samples_per_second": 607.766, + "eval_steps_per_second": 3.729, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6894 + }, + { + "epoch": 384.0, + "eval_accuracy": 0.9251576164327843, + "eval_auc": 0.9530639240069352, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7658288259624563, + "eval_f1_macro": 0.8606451240251011, + "eval_loss": 0.22846660017967224, + "eval_pr_auc": 0.7817366980630457, + "eval_precision": 0.7557299843014129, + "eval_precision_macro": 0.8568129991882603, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646201998366225, + "eval_runtime": 0.2509, + "eval_samples_per_second": 649.734, + "eval_steps_per_second": 3.986, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6912 + }, + { + "epoch": 385.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9530865131370146, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7656324582338903, + "eval_f1_macro": 0.8605324858111449, + "eval_loss": 0.22843268513679504, + "eval_pr_auc": 0.7818405401720232, + "eval_precision": 0.7556532663316583, + "eval_precision_macro": 0.856745584876579, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8644589615264, + "eval_runtime": 0.2627, + "eval_samples_per_second": 620.559, + "eval_steps_per_second": 3.807, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6930 + }, + { + "epoch": 386.0, + "eval_accuracy": 0.9251576164327843, + "eval_auc": 0.9530828926474026, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7659033078880407, + "eval_f1_macro": 0.8606810172942987, + "eval_loss": 0.22846029698848724, + "eval_pr_auc": 0.781765942321457, + "eval_precision": 0.7555695010982115, + "eval_precision_macro": 0.8567605408530922, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864751257667579, + "eval_runtime": 0.2616, + "eval_samples_per_second": 623.151, + "eval_steps_per_second": 3.823, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6948 + }, + { + "epoch": 387.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9530829315773984, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7665184243964421, + "eval_f1_macro": 0.8610134494863566, + "eval_loss": 0.22847168147563934, + "eval_pr_auc": 0.7818060776753192, + "eval_precision": 0.7552425665101722, + "eval_precision_macro": 0.8567386267869261, + "eval_pred_class_0": 16473, + "eval_pred_class_1": 3195, + "eval_predicted_binding_ratio": 0.16244661378889566, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8654669077808935, + "eval_runtime": 0.2089, + "eval_samples_per_second": 780.429, + "eval_steps_per_second": 4.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6966 + }, + { + "epoch": 388.0, + "eval_accuracy": 0.9254118364856619, + "eval_auc": 0.953101403860419, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7668838391863976, + "eval_f1_macro": 0.861243571985536, + "eval_loss": 0.2284410148859024, + "eval_pr_auc": 0.7818375459402719, + "eval_precision": 0.7559523809523809, + "eval_precision_macro": 0.8570973363853918, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8655574492186915, + "eval_runtime": 0.2357, + "eval_samples_per_second": 691.702, + "eval_steps_per_second": 4.244, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6984 + }, + { + "epoch": 388.8888888888889, + "grad_norm": 17393.9921875, + "learning_rate": 1.4317094954644378e-07, + "loss": 0.1876, + "step": 7000 + }, + { + "epoch": 389.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9531112726143616, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671537484116899, + "eval_f1_macro": 0.8613916441816318, + "eval_loss": 0.22843530774116516, + "eval_pr_auc": 0.7818710932290109, + "eval_precision": 0.755868544600939, + "eval_precision_macro": 0.8571123212290193, + "eval_pred_class_0": 16473, + "eval_pred_class_1": 3195, + "eval_predicted_binding_ratio": 0.16244661378889566, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8658497453598706, + "eval_runtime": 0.2628, + "eval_samples_per_second": 620.309, + "eval_steps_per_second": 3.806, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7002 + }, + { + "epoch": 390.0, + "eval_accuracy": 0.9253101484645109, + "eval_auc": 0.9531541734697645, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7664175544601686, + "eval_f1_macro": 0.8609828565716282, + "eval_loss": 0.2283545583486557, + "eval_pr_auc": 0.7820972392670548, + "eval_precision": 0.7559598494353826, + "eval_precision_macro": 0.8570151188924486, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651039147672901, + "eval_runtime": 0.2594, + "eval_samples_per_second": 628.253, + "eval_steps_per_second": 3.854, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7020 + }, + { + "epoch": 391.0, + "eval_accuracy": 0.9253609924750864, + "eval_auc": 0.9531671274258764, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7665394402035624, + "eval_f1_macro": 0.8610596014864338, + "eval_loss": 0.22832486033439636, + "eval_pr_auc": 0.7821584295330316, + "eval_precision": 0.7561970505177282, + "eval_precision_macro": 0.8571349914927091, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.865134095246556, + "eval_runtime": 0.2425, + "eval_samples_per_second": 672.118, + "eval_steps_per_second": 4.123, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7038 + }, + { + "epoch": 392.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.953186076601346, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7661469933184856, + "eval_f1_macro": 0.8608344648891975, + "eval_loss": 0.22829268872737885, + "eval_pr_auc": 0.7822636305739935, + "eval_precision": 0.756043956043956, + "eval_precision_macro": 0.8570003193433394, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864811618626111, + "eval_runtime": 0.241, + "eval_samples_per_second": 676.357, + "eval_steps_per_second": 4.149, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7056 + }, + { + "epoch": 393.0, + "eval_accuracy": 0.9252084604433598, + "eval_auc": 0.9532213958400613, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.765950676213206, + "eval_f1_macro": 0.860721851071415, + "eval_loss": 0.2282164841890335, + "eval_pr_auc": 0.782458419213003, + "eval_precision": 0.7559673366834171, + "eval_precision_macro": 0.8569329524960401, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646503803158885, + "eval_runtime": 0.21, + "eval_samples_per_second": 776.053, + "eval_steps_per_second": 4.761, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7074 + }, + { + "epoch": 394.0, + "eval_accuracy": 0.9251576164327843, + "eval_auc": 0.9532409095004704, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7657542966263526, + "eval_f1_macro": 0.8606092068875439, + "eval_loss": 0.22819304466247559, + "eval_pr_auc": 0.7825471573946872, + "eval_precision": 0.7558906691800189, + "eval_precision_macro": 0.8568655651025967, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.864489142005666, + "eval_runtime": 0.1952, + "eval_samples_per_second": 835.24, + "eval_steps_per_second": 5.124, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7092 + }, + { + "epoch": 395.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9532573574237078, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7659980897803247, + "eval_f1_macro": 0.8607627043564902, + "eval_loss": 0.2281719297170639, + "eval_pr_auc": 0.7826366824043385, + "eval_precision": 0.7563659226658284, + "eval_precision_macro": 0.8571057489837905, + "eval_pred_class_0": 16487, + "eval_pred_class_1": 3181, + "eval_predicted_binding_ratio": 0.1617347976408379, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8645495029641981, + "eval_runtime": 0.2003, + "eval_samples_per_second": 813.649, + "eval_steps_per_second": 4.992, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7110 + }, + { + "epoch": 396.0, + "eval_accuracy": 0.9252084604433598, + "eval_auc": 0.9532628173556228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.765950676213206, + "eval_f1_macro": 0.860721851071415, + "eval_loss": 0.22817298769950867, + "eval_pr_auc": 0.7826246984855115, + "eval_precision": 0.7559673366834171, + "eval_precision_macro": 0.8569329524960401, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646503803158885, + "eval_runtime": 0.2265, + "eval_samples_per_second": 719.545, + "eval_steps_per_second": 4.414, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7128 + }, + { + "epoch": 397.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9532649585053934, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7662213740458015, + "eval_f1_macro": 0.8608703093903662, + "eval_loss": 0.22816696763038635, + "eval_pr_auc": 0.7826996340764835, + "eval_precision": 0.7558832758079699, + "eval_precision_macro": 0.8569477661729006, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7768461786520477, + "eval_recall_macro": 0.8649426764570676, + "eval_runtime": 0.2363, + "eval_samples_per_second": 689.822, + "eval_steps_per_second": 4.232, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7146 + }, + { + "epoch": 398.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9532878980054353, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7661469933184856, + "eval_f1_macro": 0.8608344648891975, + "eval_loss": 0.22812943160533905, + "eval_pr_auc": 0.7828570635819191, + "eval_precision": 0.756043956043956, + "eval_precision_macro": 0.8570003193433394, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864811618626111, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.707, + "eval_steps_per_second": 3.888, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7164 + }, + { + "epoch": 399.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9533086671582098, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7660725652450668, + "eval_f1_macro": 0.860798596552099, + "eval_loss": 0.2281065434217453, + "eval_pr_auc": 0.7829274286728394, + "eval_precision": 0.7562048382029531, + "eval_precision_macro": 0.8570529802176428, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646805607951544, + "eval_runtime": 0.2483, + "eval_samples_per_second": 656.497, + "eval_steps_per_second": 4.028, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7182 + }, + { + "epoch": 400.0, + "eval_accuracy": 0.9253101484645109, + "eval_auc": 0.953327003186245, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7661944930765557, + "eval_f1_macro": 0.8608753604764983, + "eval_loss": 0.22807644307613373, + "eval_pr_auc": 0.7830440116061308, + "eval_precision": 0.7564424890006285, + "eval_precision_macro": 0.8571730824234005, + "eval_pred_class_0": 16486, + "eval_pred_class_1": 3182, + "eval_predicted_binding_ratio": 0.16178564165141346, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8647107412744205, + "eval_runtime": 0.221, + "eval_samples_per_second": 737.532, + "eval_steps_per_second": 4.525, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7200 + }, + { + "epoch": 401.0, + "eval_accuracy": 0.9253101484645109, + "eval_auc": 0.9533311297658027, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7664175544601686, + "eval_f1_macro": 0.8609828565716282, + "eval_loss": 0.22808308899402618, + "eval_pr_auc": 0.7830491617637381, + "eval_precision": 0.7559598494353826, + "eval_precision_macro": 0.8570151188924486, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651039147672901, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.406, + "eval_steps_per_second": 3.794, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7218 + }, + { + "epoch": 402.0, + "eval_accuracy": 0.9253609924750864, + "eval_auc": 0.9533512371086481, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.766390833863781, + "eval_f1_macro": 0.8609879862166538, + "eval_loss": 0.2280474752187729, + "eval_pr_auc": 0.7831121530747555, + "eval_precision": 0.7565190072258875, + "eval_precision_macro": 0.857240395332689, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864871979584643, + "eval_runtime": 0.2425, + "eval_samples_per_second": 672.294, + "eval_steps_per_second": 4.125, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7236 + }, + { + "epoch": 403.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9533569987480307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.766783328030544, + "eval_f1_macro": 0.86121314661739, + "eval_loss": 0.22804181277751923, + "eval_pr_auc": 0.7831234273165448, + "eval_precision": 0.7566718995290423, + "eval_precision_macro": 0.8573749596534976, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651944562050881, + "eval_runtime": 0.256, + "eval_samples_per_second": 636.798, + "eval_steps_per_second": 3.907, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7254 + }, + { + "epoch": 404.0, + "eval_accuracy": 0.9255135245068131, + "eval_auc": 0.9533826536152816, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7669053301511536, + "eval_f1_macro": 0.861289946852225, + "eval_loss": 0.2279965728521347, + "eval_pr_auc": 0.78327266335354, + "eval_precision": 0.7569095477386935, + "eval_precision_macro": 0.8574950553544232, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8652246366843541, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.619, + "eval_steps_per_second": 3.826, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7272 + }, + { + "epoch": 405.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9533846682425657, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.766783328030544, + "eval_f1_macro": 0.86121314661739, + "eval_loss": 0.22799374163150787, + "eval_pr_auc": 0.7832776890714148, + "eval_precision": 0.7566718995290423, + "eval_precision_macro": 0.8573749596534976, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651944562050881, + "eval_runtime": 0.2631, + "eval_samples_per_second": 619.577, + "eval_steps_per_second": 3.801, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7290 + }, + { + "epoch": 406.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9533931647141554, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.766783328030544, + "eval_f1_macro": 0.86121314661739, + "eval_loss": 0.2279902696609497, + "eval_pr_auc": 0.7833151045367318, + "eval_precision": 0.7566718995290423, + "eval_precision_macro": 0.8573749596534976, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651944562050881, + "eval_runtime": 0.2498, + "eval_samples_per_second": 652.534, + "eval_steps_per_second": 4.003, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7308 + }, + { + "epoch": 407.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9533809990904591, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7673235855054037, + "eval_f1_macro": 0.8615095109466251, + "eval_loss": 0.22801372408866882, + "eval_pr_auc": 0.7832695956978404, + "eval_precision": 0.7565026637417738, + "eval_precision_macro": 0.8574040902613707, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8657790484874461, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.354, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7326 + }, + { + "epoch": 408.0, + "eval_accuracy": 0.9254118364856619, + "eval_auc": 0.9533805708605049, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671058898237816, + "eval_f1_macro": 0.8613505657612415, + "eval_loss": 0.2280135303735733, + "eval_pr_auc": 0.7832983031509244, + "eval_precision": 0.7554721701063164, + "eval_precision_macro": 0.8569406995036744, + "eval_pred_class_0": 16470, + "eval_pred_class_1": 3198, + "eval_predicted_binding_ratio": 0.16259914582062232, + "eval_recall": 0.7791035149951628, + "eval_recall_macro": 0.865950622711561, + "eval_runtime": 0.2668, + "eval_samples_per_second": 610.9, + "eval_steps_per_second": 3.748, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7344 + }, + { + "epoch": 409.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9533953253289238, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7672276913305811, + "eval_f1_macro": 0.8614272726281818, + "eval_loss": 0.22799338400363922, + "eval_pr_auc": 0.7833637706556547, + "eval_precision": 0.7557084766969033, + "eval_precision_macro": 0.857060115344384, + "eval_pred_class_0": 16471, + "eval_pred_class_1": 3197, + "eval_predicted_binding_ratio": 0.16254830181004679, + "eval_recall": 0.7791035149951628, + "eval_recall_macro": 0.865980803190827, + "eval_runtime": 0.2998, + "eval_samples_per_second": 543.699, + "eval_steps_per_second": 3.336, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7362 + }, + { + "epoch": 410.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9534269948805303, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7672496025437202, + "eval_f1_macro": 0.8614738601594714, + "eval_loss": 0.22793784737586975, + "eval_pr_auc": 0.783492627588611, + "eval_precision": 0.7566635308874256, + "eval_precision_macro": 0.8574567123458305, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8656479906564896, + "eval_runtime": 0.2396, + "eval_samples_per_second": 680.309, + "eval_steps_per_second": 4.174, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7380 + }, + { + "epoch": 411.0, + "eval_accuracy": 0.9255135245068131, + "eval_auc": 0.9534421775789035, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7670535856256957, + "eval_f1_macro": 0.8613613920200376, + "eval_loss": 0.22790838778018951, + "eval_pr_auc": 0.7835790311478947, + "eval_precision": 0.7565872020075283, + "eval_precision_macro": 0.8573894747901719, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.865486752346267, + "eval_runtime": 0.2625, + "eval_samples_per_second": 620.852, + "eval_steps_per_second": 3.809, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7398 + }, + { + "epoch": 412.0, + "eval_accuracy": 0.9255135245068131, + "eval_auc": 0.9534503918080233, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7670535856256957, + "eval_f1_macro": 0.8613613920200376, + "eval_loss": 0.22789432108402252, + "eval_pr_auc": 0.7836065049249683, + "eval_precision": 0.7565872020075283, + "eval_precision_macro": 0.8573894747901719, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.865486752346267, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.102, + "eval_steps_per_second": 3.829, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7416 + }, + { + "epoch": 413.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9534621486667634, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7669316375198728, + "eval_f1_macro": 0.8612846167990333, + "eval_loss": 0.2278737723827362, + "eval_pr_auc": 0.7836530325514856, + "eval_precision": 0.7563499529633114, + "eval_precision_macro": 0.857269581736829, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8654565718670011, + "eval_runtime": 0.2015, + "eval_samples_per_second": 809.132, + "eval_steps_per_second": 4.964, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7434 + }, + { + "epoch": 414.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9534808739947569, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7668575063613231, + "eval_f1_macro": 0.8612488935825013, + "eval_loss": 0.22784681618213654, + "eval_pr_auc": 0.7837271248134856, + "eval_precision": 0.7565108252274867, + "eval_precision_macro": 0.8573222168125176, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653255140360445, + "eval_runtime": 0.2677, + "eval_samples_per_second": 608.938, + "eval_steps_per_second": 3.736, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7452 + }, + { + "epoch": 415.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535030056973854, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.2278076857328415, + "eval_pr_auc": 0.783829043372685, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2388, + "eval_samples_per_second": 682.596, + "eval_steps_per_second": 4.188, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7470 + }, + { + "epoch": 416.0, + "eval_accuracy": 0.9255135245068131, + "eval_auc": 0.9535103634665969, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7669053301511536, + "eval_f1_macro": 0.861289946852225, + "eval_loss": 0.22778868675231934, + "eval_pr_auc": 0.7838714010488458, + "eval_precision": 0.7569095477386935, + "eval_precision_macro": 0.8574950553544232, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8652246366843541, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.742, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7488 + }, + { + "epoch": 416.6666666666667, + "grad_norm": 16683.39453125, + "learning_rate": 8.236268949930852e-08, + "loss": 0.186, + "step": 7500 + }, + { + "epoch": 417.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535206409854957, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22777557373046875, + "eval_pr_auc": 0.7839160806088992, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.595, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7506 + }, + { + "epoch": 418.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.953516261360965, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22778432071208954, + "eval_pr_auc": 0.7838825807566469, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2552, + "eval_samples_per_second": 638.836, + "eval_steps_per_second": 3.919, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7524 + }, + { + "epoch": 419.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.953526694599847, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22777114808559418, + "eval_pr_auc": 0.783922204343384, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2628, + "eval_samples_per_second": 620.269, + "eval_steps_per_second": 3.805, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7542 + }, + { + "epoch": 420.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535399113334307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22775039076805115, + "eval_pr_auc": 0.7839806097795337, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2677, + "eval_samples_per_second": 608.883, + "eval_steps_per_second": 3.735, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7560 + }, + { + "epoch": 421.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535487289774859, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22773513197898865, + "eval_pr_auc": 0.7840321361391863, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2732, + "eval_samples_per_second": 596.635, + "eval_steps_per_second": 3.66, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7578 + }, + { + "epoch": 422.0, + "eval_accuracy": 0.9254118364856619, + "eval_auc": 0.9535455951128217, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7668097281831188, + "eval_f1_macro": 0.8612078600062212, + "eval_loss": 0.22774243354797363, + "eval_pr_auc": 0.7840612126983214, + "eval_precision": 0.7561128526645768, + "eval_precision_macro": 0.8571497629022605, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.865426391387735, + "eval_runtime": 0.1892, + "eval_samples_per_second": 861.347, + "eval_steps_per_second": 5.284, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7596 + }, + { + "epoch": 423.0, + "eval_accuracy": 0.9254118364856619, + "eval_auc": 0.9535476389376027, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7668097281831188, + "eval_f1_macro": 0.8612078600062212, + "eval_loss": 0.22774267196655273, + "eval_pr_auc": 0.7840628987467491, + "eval_precision": 0.7561128526645768, + "eval_precision_macro": 0.8571497629022605, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.865426391387735, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.36, + "eval_steps_per_second": 3.935, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7614 + }, + { + "epoch": 424.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9535528458245448, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7670057215511761, + "eval_f1_macro": 0.8613203162894483, + "eval_loss": 0.22773417830467224, + "eval_pr_auc": 0.7840834648119666, + "eval_precision": 0.756189282356628, + "eval_precision_macro": 0.8572170542389439, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8655876296979576, + "eval_runtime": 0.2571, + "eval_samples_per_second": 633.927, + "eval_steps_per_second": 3.889, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7632 + }, + { + "epoch": 425.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9535637559558758, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7669316375198728, + "eval_f1_macro": 0.8612846167990333, + "eval_loss": 0.22771182656288147, + "eval_pr_auc": 0.7841366739811415, + "eval_precision": 0.7563499529633114, + "eval_precision_macro": 0.857269581736829, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8654565718670011, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.859, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7650 + }, + { + "epoch": 426.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535682523703939, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7673235855054037, + "eval_f1_macro": 0.8615095109466251, + "eval_loss": 0.22770953178405762, + "eval_pr_auc": 0.7841596739317596, + "eval_precision": 0.7565026637417738, + "eval_precision_macro": 0.8574040902613707, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8657790484874461, + "eval_runtime": 0.2634, + "eval_samples_per_second": 618.83, + "eval_steps_per_second": 3.797, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7668 + }, + { + "epoch": 427.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535677852104438, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674714104193139, + "eval_f1_macro": 0.8615807415292696, + "eval_loss": 0.22770845890045166, + "eval_pr_auc": 0.7841620948509175, + "eval_precision": 0.7561815336463223, + "eval_precision_macro": 0.8572991684500658, + "eval_pred_class_0": 16473, + "eval_pred_class_1": 3195, + "eval_predicted_binding_ratio": 0.16244661378889566, + "eval_recall": 0.7791035149951628, + "eval_recall_macro": 0.866041164149359, + "eval_runtime": 0.2571, + "eval_samples_per_second": 633.948, + "eval_steps_per_second": 3.889, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7686 + }, + { + "epoch": 428.0, + "eval_accuracy": 0.9256152125279642, + "eval_auc": 0.9535825980738566, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767445557145128, + "eval_f1_macro": 0.8615862980157476, + "eval_loss": 0.22767424583435059, + "eval_pr_auc": 0.7842346625186999, + "eval_precision": 0.7567398119122257, + "eval_precision_macro": 0.8575239295026598, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658092289667121, + "eval_runtime": 0.254, + "eval_samples_per_second": 641.669, + "eval_steps_per_second": 3.937, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7704 + }, + { + "epoch": 429.0, + "eval_accuracy": 0.9256152125279642, + "eval_auc": 0.9535961651774029, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767445557145128, + "eval_f1_macro": 0.8615862980157476, + "eval_loss": 0.22765418887138367, + "eval_pr_auc": 0.7843006481928805, + "eval_precision": 0.7567398119122257, + "eval_precision_macro": 0.8575239295026598, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658092289667121, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.28, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7722 + }, + { + "epoch": 430.0, + "eval_accuracy": 0.9256660565385397, + "eval_auc": 0.9536132165155758, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674936386768448, + "eval_f1_macro": 0.8616274777746364, + "eval_loss": 0.22762420773506165, + "eval_pr_auc": 0.7844000578756268, + "eval_precision": 0.7571383746470034, + "eval_precision_macro": 0.8576966674521347, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657083516150216, + "eval_runtime": 0.2691, + "eval_samples_per_second": 605.732, + "eval_steps_per_second": 3.716, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7740 + }, + { + "epoch": 431.0, + "eval_accuracy": 0.9256660565385397, + "eval_auc": 0.9536164866352254, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674936386768448, + "eval_f1_macro": 0.8616274777746364, + "eval_loss": 0.22761479020118713, + "eval_pr_auc": 0.7844231309752159, + "eval_precision": 0.7571383746470034, + "eval_precision_macro": 0.8576966674521347, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657083516150216, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.783, + "eval_steps_per_second": 3.888, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7758 + }, + { + "epoch": 432.0, + "eval_accuracy": 0.9256660565385397, + "eval_auc": 0.9536227348995558, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674936386768448, + "eval_f1_macro": 0.8616274777746364, + "eval_loss": 0.22760987281799316, + "eval_pr_auc": 0.7844561314285999, + "eval_precision": 0.7571383746470034, + "eval_precision_macro": 0.8576966674521347, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657083516150216, + "eval_runtime": 0.2144, + "eval_samples_per_second": 760.142, + "eval_steps_per_second": 4.663, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7776 + }, + { + "epoch": 433.0, + "eval_accuracy": 0.9256660565385397, + "eval_auc": 0.9536278736590051, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674936386768448, + "eval_f1_macro": 0.8616274777746364, + "eval_loss": 0.22760248184204102, + "eval_pr_auc": 0.7844848515258783, + "eval_precision": 0.7571383746470034, + "eval_precision_macro": 0.8576966674521347, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657083516150216, + "eval_runtime": 0.2745, + "eval_samples_per_second": 593.759, + "eval_steps_per_second": 3.643, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7794 + }, + { + "epoch": 434.0, + "eval_accuracy": 0.9257169005491153, + "eval_auc": 0.9536346669432771, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676896167912227, + "eval_f1_macro": 0.861739927468447, + "eval_loss": 0.22759221494197845, + "eval_pr_auc": 0.7845111553142384, + "eval_precision": 0.7572145545796738, + "eval_precision_macro": 0.8577638306878952, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658695899252441, + "eval_runtime": 0.216, + "eval_samples_per_second": 754.683, + "eval_steps_per_second": 4.63, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7812 + }, + { + "epoch": 435.0, + "eval_accuracy": 0.9257169005491153, + "eval_auc": 0.9536369248830353, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676896167912227, + "eval_f1_macro": 0.861739927468447, + "eval_loss": 0.2275882065296173, + "eval_pr_auc": 0.7845220230482824, + "eval_precision": 0.7572145545796738, + "eval_precision_macro": 0.8577638306878952, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658695899252441, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.083, + "eval_steps_per_second": 3.816, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7830 + }, + { + "epoch": 436.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.9536390660328059, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7678855325914149, + "eval_f1_macro": 0.8618523468803471, + "eval_loss": 0.2275806963443756, + "eval_pr_auc": 0.7845372505398349, + "eval_precision": 0.7572906867356538, + "eval_precision_macro": 0.8578309735638339, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660308282354665, + "eval_runtime": 0.2506, + "eval_samples_per_second": 650.493, + "eval_steps_per_second": 3.991, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7848 + }, + { + "epoch": 437.0, + "eval_accuracy": 0.9257169005491153, + "eval_auc": 0.9536480004668485, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676896167912227, + "eval_f1_macro": 0.861739927468447, + "eval_loss": 0.2275666743516922, + "eval_pr_auc": 0.7845807540266186, + "eval_precision": 0.7572145545796738, + "eval_precision_macro": 0.8577638306878952, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658695899252441, + "eval_runtime": 0.1853, + "eval_samples_per_second": 879.496, + "eval_steps_per_second": 5.396, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7866 + }, + { + "epoch": 438.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9536552025160767, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.22755169868469238, + "eval_pr_auc": 0.7846223392518402, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2108, + "eval_samples_per_second": 773.198, + "eval_steps_per_second": 4.744, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7884 + }, + { + "epoch": 439.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.95366627809989, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676639083386378, + "eval_f1_macro": 0.8617455448748739, + "eval_loss": 0.2275334894657135, + "eval_pr_auc": 0.784667391259121, + "eval_precision": 0.7577756833176249, + "eval_precision_macro": 0.8579900557928737, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8656376547425971, + "eval_runtime": 0.259, + "eval_samples_per_second": 629.311, + "eval_steps_per_second": 3.861, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7902 + }, + { + "epoch": 440.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9536672708147835, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.22753211855888367, + "eval_pr_auc": 0.7846795397266301, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2424, + "eval_samples_per_second": 672.466, + "eval_steps_per_second": 4.126, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7920 + }, + { + "epoch": 441.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9536694119645541, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.22753164172172546, + "eval_pr_auc": 0.7846730527925044, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2647, + "eval_samples_per_second": 615.862, + "eval_steps_per_second": 3.778, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7938 + }, + { + "epoch": 442.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.9536803123633861, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7677378300986318, + "eval_f1_macro": 0.8617811692096791, + "eval_loss": 0.22751472890377045, + "eval_pr_auc": 0.7847227034562287, + "eval_precision": 0.7576138147566719, + "eval_precision_macro": 0.8579369201187351, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657687125735536, + "eval_runtime": 0.2667, + "eval_samples_per_second": 611.172, + "eval_steps_per_second": 3.75, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7956 + }, + { + "epoch": 443.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.9536880691650549, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676639083386378, + "eval_f1_macro": 0.8617455448748739, + "eval_loss": 0.22749866545200348, + "eval_pr_auc": 0.7847641543874231, + "eval_precision": 0.7577756833176249, + "eval_precision_macro": 0.8579900557928737, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8656376547425971, + "eval_runtime": 0.2405, + "eval_samples_per_second": 677.868, + "eval_steps_per_second": 4.159, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7974 + }, + { + "epoch": 444.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.9536937626769448, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676639083386378, + "eval_f1_macro": 0.8617455448748739, + "eval_loss": 0.2274913638830185, + "eval_pr_auc": 0.7847876605630844, + "eval_precision": 0.7577756833176249, + "eval_precision_macro": 0.8579900557928737, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8656376547425971, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.896, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7992 + }, + { + "epoch": 444.44444444444446, + "grad_norm": 19008.333984375, + "learning_rate": 3.72113927636733e-08, + "loss": 0.1854, + "step": 8000 + }, + { + "epoch": 445.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.953696935471605, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767859984089101, + "eval_f1_macro": 0.861858042633035, + "eval_loss": 0.22748790681362152, + "eval_pr_auc": 0.7848011703648987, + "eval_precision": 0.7578517587939698, + "eval_precision_macro": 0.8580571582128063, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657988930528197, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.861, + "eval_steps_per_second": 3.772, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8010 + }, + { + "epoch": 446.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9536995243163275, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767859984089101, + "eval_f1_macro": 0.861858042633035, + "eval_loss": 0.22748683393001556, + "eval_pr_auc": 0.7848132010793348, + "eval_precision": 0.7578517587939698, + "eval_precision_macro": 0.8580571582128063, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657988930528197, + "eval_runtime": 0.2471, + "eval_samples_per_second": 659.648, + "eval_steps_per_second": 4.047, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8028 + }, + { + "epoch": 447.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9537016265361022, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767859984089101, + "eval_f1_macro": 0.861858042633035, + "eval_loss": 0.22748340666294098, + "eval_pr_auc": 0.7848243277439235, + "eval_precision": 0.7578517587939698, + "eval_precision_macro": 0.8580571582128063, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657988930528197, + "eval_runtime": 0.2047, + "eval_samples_per_second": 796.254, + "eval_steps_per_second": 4.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8046 + }, + { + "epoch": 448.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9537030474809498, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.2274865061044693, + "eval_pr_auc": 0.7848313481583303, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.69, + "eval_steps_per_second": 3.826, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8064 + }, + { + "epoch": 449.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9537053443507039, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.22748111188411713, + "eval_pr_auc": 0.7848426067264029, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2031, + "eval_samples_per_second": 802.733, + "eval_steps_per_second": 4.925, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8082 + }, + { + "epoch": 450.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537081765079003, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22747540473937988, + "eval_pr_auc": 0.7848573104950377, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2266, + "eval_samples_per_second": 719.273, + "eval_steps_per_second": 4.413, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8100 + }, + { + "epoch": 451.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537093638727732, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22747638821601868, + "eval_pr_auc": 0.7848599055730325, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.395, + "eval_steps_per_second": 3.966, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8118 + }, + { + "epoch": 452.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.953713694834809, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22746768593788147, + "eval_pr_auc": 0.7848753398216984, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2493, + "eval_samples_per_second": 653.726, + "eval_steps_per_second": 4.011, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8136 + }, + { + "epoch": 453.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537144345047297, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22746726870536804, + "eval_pr_auc": 0.7848749928109487, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2607, + "eval_samples_per_second": 625.248, + "eval_steps_per_second": 3.836, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8154 + }, + { + "epoch": 454.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537162252845378, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22746579349040985, + "eval_pr_auc": 0.7848822771158814, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2463, + "eval_samples_per_second": 661.74, + "eval_steps_per_second": 4.06, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8172 + }, + { + "epoch": 455.0, + "eval_accuracy": 0.9260219646125687, + "eval_auc": 0.9537124685399404, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7687172150691464, + "eval_f1_macro": 0.8623431740348002, + "eval_loss": 0.22747564315795898, + "eval_pr_auc": 0.7848568499525364, + "eval_precision": 0.7579937304075235, + "eval_precision_macro": 0.8582722627034582, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665749041246662, + "eval_runtime": 0.2569, + "eval_samples_per_second": 634.446, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8190 + }, + { + "epoch": 456.0, + "eval_accuracy": 0.9260219646125687, + "eval_auc": 0.9537148724671828, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7687172150691464, + "eval_f1_macro": 0.8623431740348002, + "eval_loss": 0.22747227549552917, + "eval_pr_auc": 0.7848699531009984, + "eval_precision": 0.7579937304075235, + "eval_precision_macro": 0.8582722627034582, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665749041246662, + "eval_runtime": 0.2171, + "eval_samples_per_second": 750.821, + "eval_steps_per_second": 4.606, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8208 + }, + { + "epoch": 457.0, + "eval_accuracy": 0.925971120601993, + "eval_auc": 0.9537148043396901, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.768595041322314, + "eval_f1_macro": 0.8622662895753321, + "eval_loss": 0.22747254371643066, + "eval_pr_auc": 0.784871702941944, + "eval_precision": 0.7577561892823567, + "eval_precision_macro": 0.858152234351077, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665447236454001, + "eval_runtime": 0.2395, + "eval_samples_per_second": 680.611, + "eval_steps_per_second": 4.176, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8226 + }, + { + "epoch": 458.0, + "eval_accuracy": 0.925971120601993, + "eval_auc": 0.9537153006971368, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.768595041322314, + "eval_f1_macro": 0.8622662895753321, + "eval_loss": 0.22747036814689636, + "eval_pr_auc": 0.7848700532008381, + "eval_precision": 0.7577561892823567, + "eval_precision_macro": 0.858152234351077, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665447236454001, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.347, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8244 + }, + { + "epoch": 459.0, + "eval_accuracy": 0.925971120601993, + "eval_auc": 0.9537154856146172, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.768595041322314, + "eval_f1_macro": 0.8622662895753321, + "eval_loss": 0.22747208178043365, + "eval_pr_auc": 0.7848674676035483, + "eval_precision": 0.7577561892823567, + "eval_precision_macro": 0.858152234351077, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665447236454001, + "eval_runtime": 0.2162, + "eval_samples_per_second": 753.78, + "eval_steps_per_second": 4.624, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8262 + }, + { + "epoch": 460.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537164783295108, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22747138142585754, + "eval_pr_auc": 0.7848698987106607, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.29, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8280 + }, + { + "epoch": 461.0, + "eval_accuracy": 0.925971120601993, + "eval_auc": 0.9537188335942584, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.768595041322314, + "eval_f1_macro": 0.8622662895753321, + "eval_loss": 0.2274673730134964, + "eval_pr_auc": 0.7848777447449568, + "eval_precision": 0.7577561892823567, + "eval_precision_macro": 0.858152234351077, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665447236454001, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.971, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8298 + }, + { + "epoch": 462.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537204881190811, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22746412456035614, + "eval_pr_auc": 0.7848871874390749, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2183, + "eval_samples_per_second": 746.701, + "eval_steps_per_second": 4.581, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8316 + }, + { + "epoch": 463.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537223956888766, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274623066186905, + "eval_pr_auc": 0.784896188141376, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2673, + "eval_samples_per_second": 609.736, + "eval_steps_per_second": 3.741, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8334 + }, + { + "epoch": 464.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537243227236701, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274596393108368, + "eval_pr_auc": 0.7849067717237345, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2647, + "eval_samples_per_second": 615.711, + "eval_steps_per_second": 3.777, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8352 + }, + { + "epoch": 465.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537252959735659, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22745810449123383, + "eval_pr_auc": 0.7849091375774293, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2588, + "eval_samples_per_second": 629.717, + "eval_steps_per_second": 3.863, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8370 + }, + { + "epoch": 466.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537274565883342, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274550497531891, + "eval_pr_auc": 0.7849133909574995, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2576, + "eval_samples_per_second": 632.839, + "eval_steps_per_second": 3.882, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8388 + }, + { + "epoch": 467.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537294225531237, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274521142244339, + "eval_pr_auc": 0.7849233363238722, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2621, + "eval_samples_per_second": 621.858, + "eval_steps_per_second": 3.815, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8406 + }, + { + "epoch": 468.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537300065030612, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.227451354265213, + "eval_pr_auc": 0.7849296985834374, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.266, + "eval_samples_per_second": 612.691, + "eval_steps_per_second": 3.759, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8424 + }, + { + "epoch": 469.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.953731164670437, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744858264923096, + "eval_pr_auc": 0.7849332304496031, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.1831, + "eval_samples_per_second": 890.399, + "eval_steps_per_second": 5.463, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8442 + }, + { + "epoch": 470.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537301622230444, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22745059430599213, + "eval_pr_auc": 0.7849302221742441, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.282, + "eval_samples_per_second": 577.963, + "eval_steps_per_second": 3.546, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8460 + }, + { + "epoch": 471.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.95373253695279, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274470180273056, + "eval_pr_auc": 0.7849404295185345, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2547, + "eval_samples_per_second": 639.882, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8478 + }, + { + "epoch": 472.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537337243176629, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744475305080414, + "eval_pr_auc": 0.7849440080112278, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.737, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8496 + }, + { + "epoch": 472.22222222222223, + "grad_norm": 16415.080078125, + "learning_rate": 9.409753403698373e-09, + "loss": 0.185, + "step": 8500 + }, + { + "epoch": 473.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537344250575877, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744259238243103, + "eval_pr_auc": 0.7849494179086262, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2539, + "eval_samples_per_second": 642.078, + "eval_steps_per_second": 3.939, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8514 + }, + { + "epoch": 474.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537351647275085, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744259238243103, + "eval_pr_auc": 0.7849503915002546, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.1794, + "eval_samples_per_second": 908.686, + "eval_steps_per_second": 5.575, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8532 + }, + { + "epoch": 475.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537357876074417, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744180262088776, + "eval_pr_auc": 0.7849530625634429, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.1819, + "eval_samples_per_second": 895.935, + "eval_steps_per_second": 5.497, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8550 + }, + { + "epoch": 476.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537367997873333, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274399697780609, + "eval_pr_auc": 0.7849568926584508, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2575, + "eval_samples_per_second": 633.091, + "eval_steps_per_second": 3.884, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8568 + }, + { + "epoch": 477.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537370139023102, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274392694234848, + "eval_pr_auc": 0.784958754067368, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2669, + "eval_samples_per_second": 610.802, + "eval_steps_per_second": 3.747, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8586 + }, + { + "epoch": 478.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537376562472413, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22743819653987885, + "eval_pr_auc": 0.7849582401594454, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2266, + "eval_samples_per_second": 719.444, + "eval_steps_per_second": 4.414, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8604 + }, + { + "epoch": 479.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537377925022268, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743773460388184, + "eval_pr_auc": 0.7849574280187238, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2575, + "eval_samples_per_second": 632.921, + "eval_steps_per_second": 3.883, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8622 + }, + { + "epoch": 480.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537384932421518, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743669152259827, + "eval_pr_auc": 0.7849621773766102, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2648, + "eval_samples_per_second": 615.492, + "eval_steps_per_second": 3.776, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8640 + }, + { + "epoch": 481.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537382012671831, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743773460388184, + "eval_pr_auc": 0.7849637957606732, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2656, + "eval_samples_per_second": 613.647, + "eval_steps_per_second": 3.765, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8658 + }, + { + "epoch": 482.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537380066172038, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.227437824010849, + "eval_pr_auc": 0.7849623241007161, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.1901, + "eval_samples_per_second": 857.378, + "eval_steps_per_second": 5.26, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8676 + }, + { + "epoch": 483.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537382791271747, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743763029575348, + "eval_pr_auc": 0.7849636397572854, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2674, + "eval_samples_per_second": 609.618, + "eval_steps_per_second": 3.74, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8694 + }, + { + "epoch": 484.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537387365546257, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743697464466095, + "eval_pr_auc": 0.7849651242159179, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.398, + "eval_steps_per_second": 3.794, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8712 + }, + { + "epoch": 485.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537391745170787, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.2274360954761505, + "eval_pr_auc": 0.7849672499360805, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.1892, + "eval_samples_per_second": 861.673, + "eval_steps_per_second": 5.286, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8730 + }, + { + "epoch": 486.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.953739330237062, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743603587150574, + "eval_pr_auc": 0.784968007514094, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2358, + "eval_samples_per_second": 691.342, + "eval_steps_per_second": 4.241, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8748 + }, + { + "epoch": 487.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.95373934970206, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743603587150574, + "eval_pr_auc": 0.7849684807581643, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2101, + "eval_samples_per_second": 775.889, + "eval_steps_per_second": 4.76, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8766 + }, + { + "epoch": 488.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537393886320558, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.2274361550807953, + "eval_pr_auc": 0.7849697577570401, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.25, + "eval_samples_per_second": 652.0, + "eval_steps_per_second": 4.0, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8784 + }, + { + "epoch": 489.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537394859570454, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743597626686096, + "eval_pr_auc": 0.784969480914725, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.1914, + "eval_samples_per_second": 851.633, + "eval_steps_per_second": 5.225, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8802 + }, + { + "epoch": 490.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537395540845381, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.2274358570575714, + "eval_pr_auc": 0.784970228255774, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2687, + "eval_samples_per_second": 606.685, + "eval_steps_per_second": 3.722, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8820 + }, + { + "epoch": 491.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537395443520391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743581235408783, + "eval_pr_auc": 0.7849691356929127, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.272, + "eval_samples_per_second": 599.26, + "eval_steps_per_second": 3.676, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8838 + }, + { + "epoch": 492.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537396222120308, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743573784828186, + "eval_pr_auc": 0.7849709563979171, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2531, + "eval_samples_per_second": 644.069, + "eval_steps_per_second": 3.951, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8856 + }, + { + "epoch": 493.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.953739583282035, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743569314479828, + "eval_pr_auc": 0.7849695249246844, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.242, + "eval_samples_per_second": 673.593, + "eval_steps_per_second": 4.132, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8874 + }, + { + "epoch": 494.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537395443520391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743569314479828, + "eval_pr_auc": 0.7849692821960631, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2668, + "eval_samples_per_second": 610.934, + "eval_steps_per_second": 3.748, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8892 + }, + { + "epoch": 495.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537395248870412, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743569314479828, + "eval_pr_auc": 0.7849694151476083, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2644, + "eval_samples_per_second": 616.534, + "eval_steps_per_second": 3.782, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8910 + }, + { + "epoch": 496.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537396027470327, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743569314479828, + "eval_pr_auc": 0.7849695246002066, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2501, + "eval_samples_per_second": 651.692, + "eval_steps_per_second": 3.998, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8928 + }, + { + "epoch": 497.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.953739563817037, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743569314479828, + "eval_pr_auc": 0.7849694727996652, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2737, + "eval_samples_per_second": 595.465, + "eval_steps_per_second": 3.653, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8946 + }, + { + "epoch": 498.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537395832820348, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743569314479828, + "eval_pr_auc": 0.7849694859174267, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2635, + "eval_samples_per_second": 618.524, + "eval_steps_per_second": 3.795, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8964 + }, + { + "epoch": 499.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537396027470328, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.2274356633424759, + "eval_pr_auc": 0.784969582259249, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2533, + "eval_samples_per_second": 643.57, + "eval_steps_per_second": 3.948, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8982 + } + ], + "logging_steps": 500, + "max_steps": 9000, + "num_input_tokens_seen": 0, + "num_train_epochs": 500, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6751958238244128.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/training_args.bin b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8903358100d3be09ad49078090c6e572b3ddef68 --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:499ba8a39afec206dd7194e2d216bf0be2633330bfcda3d90a12ddcbc04cdaca +size 5368 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/config.json b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a424582b16776120730808f810ad9081375d580e --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/config.json @@ -0,0 +1,52 @@ +{ + "architectures": [ + "GloMeModelForTokenClassification" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "bos_token_id": 28, + "cdr_weight": 0.0, + "class_weights": [ + 0.1, + 0.9 + ], + "classifier_activation": "gelu", + "classifier_bias": false, + "classifier_dropout": 0.1, + "classifier_pooling": "cls", + "cls_token_id": 28, + "compress_block_size": 16, + "compress_block_sliding_stride": 16, + "decoder_bias": true, + "dice_weight": 0.1, + "embedding_dropout": 0.1, + "eos_token_id": 29, + "hidden_activation": "gelu", + "hidden_size": 320, + "inner_rank": 32, + "intermediate_size": 1280, + "kv_heads": 10, + "mask_token_id": 31, + "mlp_bias": false, + "mlp_dropout": 0.1, + "model_size": "tiny", + "model_type": "glome", + "norm_bias": false, + "norm_eps": 1e-05, + "num_attention_heads": 20, + "num_hidden_layers": 6, + "num_selected_blocks": 8, + "num_slots": 64, + "pad_token_id": 30, + "reference_compile": null, + "selection_block_size": 16, + "sep_token_id": 29, + "sliding_window_size": 0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.52.3", + "unk_token_id": 27, + "use_glome": true, + "use_nsa": true, + "vocab_size": 36 +} diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/model.safetensors b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..426642c8e78b3fc8a910a2aa287de469af270abf --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:423cf42c264d2567fd289ade1e6c1f0e506d54000000d68db8e861743d892f63 +size 61385376 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/optimizer.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad0d550d08ca48892fb9f60694caceeeee59c1f1 --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e048a6921928cf224d4195130617e539f1adeb7a4f1611cbdb14ccefd71846 +size 122881658 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/rng_state.pth b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c11cf2d89486615ca4d9ed2951aa5910e62fcb32 --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09c0fb83ad832f6cdeeca05a528dfc9d2b3afad5151c7ac2d59232a6247360ef +size 14244 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/scaler.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363 +size 988 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/scheduler.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c99a097a5743d551787f21bf50846f9e07dd1b2f --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d23023944b37031b4b7a09c8ee9683bdb1128842a447d5968d6407368b043a9 +size 1064 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/trainer_state.json b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..39a41914fca7c4ef3cb7ab594a14b89f74fb1db6 --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/trainer_state.json @@ -0,0 +1,12160 @@ +{ + "best_global_step": 8856, + "best_metric": 0.7849709563979171, + "best_model_checkpoint": "./results/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856", + "epoch": 500.0, + "eval_steps": 500, + "global_step": 9000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy": 0.3401972747610332, + "eval_auc": 0.39064302367564674, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25900759435847653, + "eval_f1_macro": 0.3321798728791878, + "eval_loss": 1.0617570877075195, + "eval_pr_auc": 0.1212308124824295, + "eval_precision": 0.15736885928393005, + "eval_precision_macro": 0.49944165947453734, + "eval_pred_class_0": 5256, + "eval_pred_class_1": 14412, + "eval_predicted_binding_ratio": 0.7327638804148872, + "eval_recall": 0.7313769751693002, + "eval_recall_macro": 0.4991767473782156, + "eval_runtime": 0.304, + "eval_samples_per_second": 536.239, + "eval_steps_per_second": 3.29, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 18 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.341010778930242, + "eval_auc": 0.39081343973238586, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2590750585948665, + "eval_f1_macro": 0.33285209231533375, + "eval_loss": 1.0604556798934937, + "eval_pr_auc": 0.12126612292918731, + "eval_precision": 0.1574485825458588, + "eval_precision_macro": 0.4995923731531417, + "eval_pred_class_0": 5276, + "eval_pred_class_1": 14392, + "eval_predicted_binding_ratio": 0.731747000203376, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.4993975193845588, + "eval_runtime": 0.2793, + "eval_samples_per_second": 583.516, + "eval_steps_per_second": 3.58, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 36 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.34223103518405534, + "eval_auc": 0.3911369382652214, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2591765446944969, + "eval_f1_macro": 0.33385837704253485, + "eval_loss": 1.058252215385437, + "eval_pr_auc": 0.12133107613942488, + "eval_precision": 0.15756858376270713, + "eval_precision_macro": 0.4998170849458089, + "eval_pred_class_0": 5306, + "eval_pred_class_1": 14362, + "eval_predicted_binding_ratio": 0.7302216798861094, + "eval_recall": 0.7297645920670751, + "eval_recall_macro": 0.49972867739407356, + "eval_runtime": 0.2676, + "eval_samples_per_second": 609.165, + "eval_steps_per_second": 3.737, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 54 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.3441122635753508, + "eval_auc": 0.3915867840995182, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.259556882103088, + "eval_f1_macro": 0.33544605079873757, + "eval_loss": 1.0551481246948242, + "eval_pr_auc": 0.12142208631760734, + "eval_precision": 0.15788003631031353, + "eval_precision_macro": 0.500391299247358, + "eval_pred_class_0": 5347, + "eval_pred_class_1": 14321, + "eval_predicted_binding_ratio": 0.7281370754525117, + "eval_recall": 0.7291196388261851, + "eval_recall_macro": 0.5005832394650029, + "eval_runtime": 0.2336, + "eval_samples_per_second": 697.796, + "eval_steps_per_second": 4.281, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 72 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.3457392719137686, + "eval_auc": 0.39218283153314865, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2597791072250345, + "eval_f1_macro": 0.3367955302889021, + "eval_loss": 1.0511513948440552, + "eval_pr_auc": 0.12154600341235242, + "eval_precision": 0.15809003710705033, + "eval_precision_macro": 0.5007720380521324, + "eval_pred_class_0": 5385, + "eval_pred_class_1": 14283, + "eval_predicted_binding_ratio": 0.7262050030506406, + "eval_recall": 0.72815220896485, + "eval_recall_macro": 0.5011558413086458, + "eval_runtime": 0.2629, + "eval_samples_per_second": 620.078, + "eval_steps_per_second": 3.804, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 90 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.34873906853772624, + "eval_auc": 0.39291782012189097, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.259894840238054, + "eval_f1_macro": 0.33921701928376435, + "eval_loss": 1.0462485551834106, + "eval_pr_auc": 0.1216940029412557, + "eval_precision": 0.1583133887089962, + "eval_precision_macro": 0.5011632853468087, + "eval_pred_class_0": 5462, + "eval_pred_class_1": 14206, + "eval_predicted_binding_ratio": 0.722290014236323, + "eval_recall": 0.7252499193808449, + "eval_recall_macro": 0.5017569691067321, + "eval_runtime": 0.2393, + "eval_samples_per_second": 681.219, + "eval_steps_per_second": 4.179, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 108 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.3517897091722595, + "eval_auc": 0.3937714770704174, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2599407906193766, + "eval_f1_macro": 0.3416488972772128, + "eval_loss": 1.0405118465423584, + "eval_pr_auc": 0.12187498322705145, + "eval_precision": 0.1585020529520034, + "eval_precision_macro": 0.5014812682659693, + "eval_pred_class_0": 5542, + "eval_pred_class_1": 14126, + "eval_predicted_binding_ratio": 0.7182224933902787, + "eval_recall": 0.7220251531763947, + "eval_recall_macro": 0.5022572195531276, + "eval_runtime": 0.2765, + "eval_samples_per_second": 589.571, + "eval_steps_per_second": 3.617, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 126 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.354586129753915, + "eval_auc": 0.3947741191129793, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2591338858410179, + "eval_f1_macro": 0.3436917965373002, + "eval_loss": 1.0338975191116333, + "eval_pr_auc": 0.12208733120990471, + "eval_precision": 0.1581985320316397, + "eval_precision_macro": 0.5009271275952343, + "eval_pred_class_0": 5635, + "eval_pred_class_1": 14033, + "eval_predicted_binding_ratio": 0.713494000406752, + "eval_recall": 0.7158980973879394, + "eval_recall_macro": 0.5014270471245847, + "eval_runtime": 0.2385, + "eval_samples_per_second": 683.567, + "eval_steps_per_second": 4.194, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 144 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.3598230628431971, + "eval_auc": 0.39592411118975185, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25913503971756396, + "eval_f1_macro": 0.3477761944928628, + "eval_loss": 1.0263975858688354, + "eval_pr_auc": 0.12233214426039367, + "eval_precision": 0.15848567727076435, + "eval_precision_macro": 0.5013938604573427, + "eval_pred_class_0": 5774, + "eval_pred_class_1": 13894, + "eval_predicted_binding_ratio": 0.70642668293675, + "eval_recall": 0.710093518219929, + "eval_recall_macro": 0.502176595531767, + "eval_runtime": 0.2745, + "eval_samples_per_second": 593.873, + "eval_steps_per_second": 3.643, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 162 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.3636872076469392, + "eval_auc": 0.3972021050928084, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25801861623288075, + "eval_f1_macro": 0.35051449303788773, + "eval_loss": 1.0180495977401733, + "eval_pr_auc": 0.12260540333611444, + "eval_precision": 0.15807060874618625, + "eval_precision_macro": 0.5006720376838353, + "eval_pred_class_0": 5902, + "eval_pred_class_1": 13766, + "eval_predicted_binding_ratio": 0.6999186495830791, + "eval_recall": 0.7017091260883586, + "eval_recall_macro": 0.5010628083511147, + "eval_runtime": 0.2708, + "eval_samples_per_second": 601.838, + "eval_steps_per_second": 3.692, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 180 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.3691275167785235, + "eval_auc": 0.39866621357342186, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.258604206500956, + "eval_f1_macro": 0.35478882891419483, + "eval_loss": 1.0087939500808716, + "eval_pr_auc": 0.12292033936081492, + "eval_precision": 0.1587092042537587, + "eval_precision_macro": 0.5016983780261003, + "eval_pred_class_0": 6033, + "eval_pred_class_1": 13635, + "eval_predicted_binding_ratio": 0.6932580841976815, + "eval_recall": 0.6978394066430184, + "eval_recall_macro": 0.5027194256611, + "eval_runtime": 0.2616, + "eval_samples_per_second": 623.126, + "eval_steps_per_second": 3.823, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 198 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.3740593858043523, + "eval_auc": 0.4002664991794433, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25823944086280654, + "eval_f1_macro": 0.35841740282728696, + "eval_loss": 0.9987770318984985, + "eval_pr_auc": 0.12326823892822446, + "eval_precision": 0.15878778897451096, + "eval_precision_macro": 0.5017853397238077, + "eval_pred_class_0": 6172, + "eval_pred_class_1": 13496, + "eval_predicted_binding_ratio": 0.6861907667276794, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.5028947176998165, + "eval_runtime": 0.258, + "eval_samples_per_second": 631.708, + "eval_steps_per_second": 3.876, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 216 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.37980475899938987, + "eval_auc": 0.40207323055334293, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25811945018854154, + "eval_f1_macro": 0.36265804779890953, + "eval_loss": 0.987876832485199, + "eval_pr_auc": 0.12366119818610516, + "eval_precision": 0.15905854133873024, + "eval_precision_macro": 0.5021624301446299, + "eval_pred_class_0": 6327, + "eval_pred_class_1": 13341, + "eval_predicted_binding_ratio": 0.6783099450884685, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.5035528974067893, + "eval_runtime": 0.254, + "eval_samples_per_second": 641.68, + "eval_steps_per_second": 3.937, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 234 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.3867703884482408, + "eval_auc": 0.40404318566725245, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2582866982350409, + "eval_f1_macro": 0.36779990383041317, + "eval_loss": 0.9760332107543945, + "eval_pr_auc": 0.12409453800524387, + "eval_precision": 0.1595744680851064, + "eval_precision_macro": 0.5028818867776484, + "eval_pred_class_0": 6508, + "eval_pred_class_1": 13160, + "eval_predicted_binding_ratio": 0.6691071791742933, + "eval_recall": 0.6772009029345373, + "eval_recall_macro": 0.5048043507851898, + "eval_runtime": 0.2717, + "eval_samples_per_second": 599.919, + "eval_steps_per_second": 3.68, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 252 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.39429530201342283, + "eval_auc": 0.40629005957398867, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2577107607950651, + "eval_f1_macro": 0.37306889008104693, + "eval_loss": 0.9632152915000916, + "eval_pr_auc": 0.12458802431940903, + "eval_precision": 0.15971578622181032, + "eval_precision_macro": 0.5029977740632862, + "eval_pred_class_0": 6720, + "eval_pred_class_1": 12948, + "eval_predicted_binding_ratio": 0.6583282489322758, + "eval_recall": 0.6668816510802967, + "eval_recall_macro": 0.5050772111259515, + "eval_runtime": 0.253, + "eval_samples_per_second": 644.248, + "eval_steps_per_second": 3.952, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 270 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.4020744356314826, + "eval_auc": 0.408681470822737, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25738822935084615, + "eval_f1_macro": 0.37848134871797623, + "eval_loss": 0.9496278166770935, + "eval_pr_auc": 0.12511500176534787, + "eval_precision": 0.16003140950137418, + "eval_precision_macro": 0.5033533652151325, + "eval_pred_class_0": 6933, + "eval_pred_class_1": 12735, + "eval_predicted_binding_ratio": 0.6474984746796827, + "eval_recall": 0.6572073524669462, + "eval_recall_macro": 0.5057630895249562, + "eval_runtime": 0.269, + "eval_samples_per_second": 606.014, + "eval_steps_per_second": 3.718, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 288 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.408989221069758, + "eval_auc": 0.4113766625614338, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.255349135169763, + "eval_f1_macro": 0.38271123621844805, + "eval_loss": 0.9350630640983582, + "eval_pr_auc": 0.1257149536327416, + "eval_precision": 0.15932528579422817, + "eval_precision_macro": 0.5022775332449281, + "eval_pred_class_0": 7159, + "eval_pred_class_1": 12509, + "eval_predicted_binding_ratio": 0.6360077282896075, + "eval_recall": 0.6426959045469204, + "eval_recall_macro": 0.5039700323120913, + "eval_runtime": 0.2687, + "eval_samples_per_second": 606.61, + "eval_steps_per_second": 3.722, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 306 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.4161582265609111, + "eval_auc": 0.41440477389195646, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2538176619663396, + "eval_f1_macro": 0.3871501973338609, + "eval_loss": 0.9196970462799072, + "eval_pr_auc": 0.12640556118775828, + "eval_precision": 0.158935546875, + "eval_precision_macro": 0.5016899956597223, + "eval_pred_class_0": 7380, + "eval_pred_class_1": 12288, + "eval_predicted_binding_ratio": 0.62477120195241, + "eval_recall": 0.6297968397291196, + "eval_recall_macro": 0.5029831666503388, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.105, + "eval_steps_per_second": 3.915, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 324 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.4237848281472443, + "eval_auc": 0.4176888499450513, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25100786464873437, + "eval_f1_macro": 0.3913994084656603, + "eval_loss": 0.9033117294311523, + "eval_pr_auc": 0.12714405405007598, + "eval_precision": 0.15785536159600996, + "eval_precision_macro": 0.5002421610284318, + "eval_pred_class_0": 7638, + "eval_pred_class_1": 12030, + "eval_predicted_binding_ratio": 0.611653447223917, + "eval_recall": 0.6123831022250886, + "eval_recall_macro": 0.5004331156685895, + "eval_runtime": 0.2637, + "eval_samples_per_second": 618.078, + "eval_steps_per_second": 3.792, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 342 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.4312080536912752, + "eval_auc": 0.4212995000006521, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24904343156340203, + "eval_f1_macro": 0.39564573885956833, + "eval_loss": 0.8857852220535278, + "eval_pr_auc": 0.12799421494868934, + "eval_precision": 0.1572566971854866, + "eval_precision_macro": 0.49948708843014167, + "eval_pred_class_0": 7872, + "eval_pred_class_1": 11796, + "eval_predicted_binding_ratio": 0.5997559487492373, + "eval_recall": 0.5981941309255079, + "eval_recall_macro": 0.4990729210793411, + "eval_runtime": 0.2709, + "eval_samples_per_second": 601.789, + "eval_steps_per_second": 3.692, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 360 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.4394956274150905, + "eval_auc": 0.42538333442304876, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24493150684931506, + "eval_f1_macro": 0.399632635701501, + "eval_loss": 0.8671084642410278, + "eval_pr_auc": 0.12894871744717554, + "eval_precision": 0.15549178189407775, + "eval_precision_macro": 0.4973810972146359, + "eval_pred_class_0": 8169, + "eval_pred_class_1": 11499, + "eval_predicted_binding_ratio": 0.5846552776082977, + "eval_recall": 0.5765881973556917, + "eval_recall_macro": 0.4952114645256155, + "eval_runtime": 0.2681, + "eval_samples_per_second": 607.95, + "eval_steps_per_second": 3.73, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 378 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.44824079723408583, + "eval_auc": 0.42976391273864795, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24080033580523297, + "eval_f1_macro": 0.4037241835563183, + "eval_loss": 0.8476783633232117, + "eval_pr_auc": 0.13001972671009082, + "eval_precision": 0.15375681229339766, + "eval_precision_macro": 0.495462476943159, + "eval_pred_class_0": 8475, + "eval_pred_class_1": 11193, + "eval_predicted_binding_ratio": 0.5690970103721782, + "eval_recall": 0.5549822637858756, + "eval_recall_macro": 0.491621632285284, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.096, + "eval_steps_per_second": 3.817, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 396 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.46044335977221884, + "eval_auc": 0.4345819960798662, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.23819095477386934, + "eval_f1_macro": 0.4102471738365922, + "eval_loss": 0.8271914720535278, + "eval_pr_auc": 0.1312038077210987, + "eval_precision": 0.15319974143503556, + "eval_precision_macro": 0.49502955733365084, + "eval_pred_class_0": 8839, + "eval_pred_class_1": 10829, + "eval_predicted_binding_ratio": 0.5505897905226764, + "eval_recall": 0.5349887133182845, + "eval_recall_macro": 0.4907393617898237, + "eval_runtime": 0.2699, + "eval_samples_per_second": 603.817, + "eval_steps_per_second": 3.704, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 414 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.4719849501728696, + "eval_auc": 0.4399397854182523, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2337489854644728, + "eval_f1_macro": 0.4154821023975197, + "eval_loss": 0.8054794669151306, + "eval_pr_auc": 0.13253606290408437, + "eval_precision": 0.1515499425947187, + "eval_precision_macro": 0.4934724539362483, + "eval_pred_class_0": 9216, + "eval_pred_class_1": 10452, + "eval_predicted_binding_ratio": 0.5314215985356925, + "eval_recall": 0.5108029667849081, + "eval_recall_macro": 0.48776099326147077, + "eval_runtime": 0.2459, + "eval_samples_per_second": 662.968, + "eval_steps_per_second": 4.067, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 432 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.48713646532438476, + "eval_auc": 0.4457222328836341, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.23099794160250056, + "eval_f1_macro": 0.42313846887516615, + "eval_loss": 0.7826969027519226, + "eval_pr_auc": 0.13403779679155806, + "eval_precision": 0.15125798722044728, + "eval_precision_macro": 0.4934698556077371, + "eval_pred_class_0": 9652, + "eval_pred_class_1": 10016, + "eval_predicted_binding_ratio": 0.5092536099247509, + "eval_recall": 0.48855207997420186, + "eval_recall_macro": 0.48771178574674356, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.772, + "eval_steps_per_second": 3.772, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 450 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.506152125279642, + "eval_auc": 0.452125351005008, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22648721828462212, + "eval_f1_macro": 0.43188881620754876, + "eval_loss": 0.7587484121322632, + "eval_pr_auc": 0.13570124162691763, + "eval_precision": 0.15038071065989847, + "eval_precision_macro": 0.492983148122742, + "eval_pred_class_0": 10212, + "eval_pred_class_1": 9456, + "eval_predicted_binding_ratio": 0.4807809640024405, + "eval_recall": 0.4585617542728152, + "eval_recall_macro": 0.48681090671327726, + "eval_runtime": 0.2665, + "eval_samples_per_second": 611.566, + "eval_steps_per_second": 3.752, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 468 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.529997966239577, + "eval_auc": 0.4588746151842906, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22371514947934162, + "eval_f1_macro": 0.4433436473661838, + "eval_loss": 0.7342172861099243, + "eval_pr_auc": 0.13749280910612846, + "eval_precision": 0.15124332916997843, + "eval_precision_macro": 0.4941834913044441, + "eval_pred_class_0": 10861, + "eval_pred_class_1": 8807, + "eval_predicted_binding_ratio": 0.44778320113890585, + "eval_recall": 0.4295388584327636, + "eval_recall_macro": 0.4891703467029515, + "eval_runtime": 0.2591, + "eval_samples_per_second": 629.173, + "eval_steps_per_second": 3.86, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 486 + }, + { + "epoch": 27.77777777777778, + "grad_norm": 191838.453125, + "learning_rate": 5.544444444444443e-07, + "loss": 0.954, + "step": 500 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.5579621720561317, + "eval_auc": 0.46628288633295734, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22208303507516106, + "eval_f1_macro": 0.4566736198103078, + "eval_loss": 0.7085328102111816, + "eval_pr_auc": 0.1397221721421834, + "eval_precision": 0.15368421052631578, + "eval_precision_macro": 0.4966212823527809, + "eval_pred_class_0": 11593, + "eval_pred_class_1": 8075, + "eval_predicted_binding_ratio": 0.4105653853976002, + "eval_recall": 0.400193485972267, + "eval_recall_macro": 0.49384334768221605, + "eval_runtime": 0.245, + "eval_samples_per_second": 665.264, + "eval_steps_per_second": 4.081, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 504 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.5890278625177954, + "eval_auc": 0.47432292318642716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21926011784023955, + "eval_f1_macro": 0.4701862470303913, + "eval_loss": 0.6820237636566162, + "eval_pr_auc": 0.1419831923592407, + "eval_precision": 0.15650854936569222, + "eval_precision_macro": 0.4990822386003719, + "eval_pred_class_0": 12416, + "eval_pred_class_1": 7252, + "eval_predicted_binding_ratio": 0.36872076469391907, + "eval_recall": 0.36601096420509516, + "eval_recall_macro": 0.49839149043235986, + "eval_runtime": 0.239, + "eval_samples_per_second": 681.884, + "eval_steps_per_second": 4.183, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 522 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.62385600976205, + "eval_auc": 0.4829859859606367, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21797040169133192, + "eval_f1_macro": 0.4851734455906117, + "eval_loss": 0.6552286148071289, + "eval_pr_auc": 0.144533301478986, + "eval_precision": 0.16213241075640825, + "eval_precision_macro": 0.5032992807407407, + "eval_pred_class_0": 13309, + "eval_pred_class_1": 6359, + "eval_predicted_binding_ratio": 0.32331706324994913, + "eval_recall": 0.3324733956788133, + "eval_recall_macro": 0.5054351043101014, + "eval_runtime": 0.2289, + "eval_samples_per_second": 712.13, + "eval_steps_per_second": 4.369, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 540 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.6593959731543624, + "eval_auc": 0.4923438323703967, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21308586867144366, + "eval_f1_macro": 0.49787408315316334, + "eval_loss": 0.6283431053161621, + "eval_pr_auc": 0.14738118302130468, + "eval_precision": 0.16759053954175906, + "eval_precision_macro": 0.5068452136541568, + "eval_pred_class_0": 14256, + "eval_pred_class_1": 5412, + "eval_predicted_binding_ratio": 0.2751677852348993, + "eval_recall": 0.2924862947436311, + "eval_recall_macro": 0.5102800882784371, + "eval_runtime": 0.2716, + "eval_samples_per_second": 600.185, + "eval_steps_per_second": 3.682, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 558 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.6967154769168192, + "eval_auc": 0.5026651961769109, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21316449017280042, + "eval_f1_macro": 0.5126600281126953, + "eval_loss": 0.6013967990875244, + "eval_pr_auc": 0.15068393162958252, + "eval_precision": 0.18035714285714285, + "eval_precision_macro": 0.5146913446706046, + "eval_pred_class_0": 15188, + "eval_pred_class_1": 4480, + "eval_predicted_binding_ratio": 0.2277811673784828, + "eval_recall": 0.2605611093195743, + "eval_recall_macro": 0.5194578347949956, + "eval_runtime": 0.2705, + "eval_samples_per_second": 602.56, + "eval_steps_per_second": 3.697, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 576 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.7287472035794184, + "eval_auc": 0.5136348320064595, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20692730786383232, + "eval_f1_macro": 0.5216610840892347, + "eval_loss": 0.575495719909668, + "eval_pr_auc": 0.15441446935423722, + "eval_precision": 0.19194704908990623, + "eval_precision_macro": 0.5210140431835268, + "eval_pred_class_0": 16042, + "eval_pred_class_1": 3626, + "eval_predicted_binding_ratio": 0.18436038234695953, + "eval_recall": 0.22444372782973235, + "eval_recall_macro": 0.5237930596654548, + "eval_runtime": 0.2647, + "eval_samples_per_second": 615.887, + "eval_steps_per_second": 3.778, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 594 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.7577791336180598, + "eval_auc": 0.5256758602512032, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20013431833445267, + "eval_f1_macro": 0.5287070633014385, + "eval_loss": 0.5502753853797913, + "eval_pr_auc": 0.15881070257620672, + "eval_precision": 0.20875656742556917, + "eval_precision_macro": 0.5298823579410603, + "eval_pred_class_0": 16813, + "eval_pred_class_1": 2855, + "eval_predicted_binding_ratio": 0.14515965019320723, + "eval_recall": 0.19219606578523057, + "eval_recall_macro": 0.5279203302306971, + "eval_runtime": 0.2621, + "eval_samples_per_second": 621.974, + "eval_steps_per_second": 3.816, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 612 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.7844213951596501, + "eval_auc": 0.5388391234856937, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.19575113808801214, + "eval_f1_macro": 0.5356397775926205, + "eval_loss": 0.5265588164329529, + "eval_pr_auc": 0.16395620275178963, + "eval_precision": 0.23767848917549517, + "eval_precision_macro": 0.5449694383352471, + "eval_pred_class_0": 17497, + "eval_pred_class_1": 2171, + "eval_predicted_binding_ratio": 0.11038234695952817, + "eval_recall": 0.16639793614962914, + "eval_recall_macro": 0.5332502748895668, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.866, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 630 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.8094366483628228, + "eval_auc": 0.5531888075405533, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.19189305735230702, + "eval_f1_macro": 0.5419376520838427, + "eval_loss": 0.5044229626655579, + "eval_pr_auc": 0.16987983494600534, + "eval_precision": 0.28952504879635654, + "eval_precision_macro": 0.5715178054086024, + "eval_pred_class_0": 18131, + "eval_pred_class_1": 1537, + "eval_predicted_binding_ratio": 0.07814724425462681, + "eval_recall": 0.14350209609803288, + "eval_recall_macro": 0.5387939646905326, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.446, + "eval_steps_per_second": 3.794, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 648 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.827791336180598, + "eval_auc": 0.5689342779333475, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.18796451690242147, + "eval_f1_macro": 0.5458235779450257, + "eval_loss": 0.4842270016670227, + "eval_pr_auc": 0.177326879876991, + "eval_precision": 0.3663551401869159, + "eval_precision_macro": 0.6103471582212137, + "eval_pred_class_0": 18598, + "eval_pred_class_1": 1070, + "eval_predicted_binding_ratio": 0.05440309131584299, + "eval_recall": 0.12641083521444696, + "eval_recall_macro": 0.5427430526648682, + "eval_runtime": 0.2392, + "eval_samples_per_second": 681.483, + "eval_steps_per_second": 4.181, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 666 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.8386719544437665, + "eval_auc": 0.5868017348062602, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.1820056715648363, + "eval_f1_macro": 0.5462584975699354, + "eval_loss": 0.46564891934394836, + "eval_pr_auc": 0.18685168882837525, + "eval_precision": 0.4537275064267352, + "eval_precision_macro": 0.6541268553838282, + "eval_pred_class_0": 18890, + "eval_pred_class_1": 778, + "eval_predicted_binding_ratio": 0.039556640227781166, + "eval_recall": 0.11383424701709126, + "eval_recall_macro": 0.5440904198204911, + "eval_runtime": 0.2648, + "eval_samples_per_second": 615.584, + "eval_steps_per_second": 3.777, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 684 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.8441631075859264, + "eval_auc": 0.6057814605899876, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.17982338774417983, + "eval_f1_macro": 0.5468627318225942, + "eval_loss": 0.4492926001548767, + "eval_pr_auc": 0.19848375437748741, + "eval_precision": 0.5283018867924528, + "eval_precision_macro": 0.6915101279275421, + "eval_pred_class_0": 19032, + "eval_pred_class_1": 636, + "eval_predicted_binding_ratio": 0.03233679072605247, + "eval_recall": 0.10835214446952596, + "eval_recall_macro": 0.5451219284549598, + "eval_runtime": 0.2567, + "eval_samples_per_second": 635.049, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 702 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.8456375838926175, + "eval_auc": 0.6262280880815292, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.1758957654723127, + "eval_f1_macro": 0.5453696262568565, + "eval_loss": 0.43461790680885315, + "eval_pr_auc": 0.21275175506055685, + "eval_precision": 0.5557461406518011, + "eval_precision_macro": 0.7051195990133514, + "eval_pred_class_0": 19085, + "eval_pred_class_1": 583, + "eval_predicted_binding_ratio": 0.0296420581655481, + "eval_recall": 0.10448242502418574, + "eval_recall_macro": 0.544424468382196, + "eval_runtime": 0.2668, + "eval_samples_per_second": 610.873, + "eval_steps_per_second": 3.748, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 720 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.8483323164531218, + "eval_auc": 0.6481986497247736, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.18519530183010108, + "eval_f1_macro": 0.5507896621273841, + "eval_loss": 0.42107364535331726, + "eval_pr_auc": 0.23051421419341214, + "eval_precision": 0.6053571428571428, + "eval_precision_macro": 0.730405178085469, + "eval_pred_class_0": 19108, + "eval_pred_class_1": 560, + "eval_predicted_binding_ratio": 0.02847264592231035, + "eval_recall": 0.10931957433086101, + "eval_recall_macro": 0.5479899012476421, + "eval_runtime": 0.2619, + "eval_samples_per_second": 622.385, + "eval_steps_per_second": 3.818, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 738 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.8502135448444174, + "eval_auc": 0.6709813300109956, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20292207792207792, + "eval_f1_macro": 0.5601310726310726, + "eval_loss": 0.4084097743034363, + "eval_pr_auc": 0.25093797354762637, + "eval_precision": 0.6302521008403361, + "eval_precision_macro": 0.7436637739036264, + "eval_pred_class_0": 19073, + "eval_pred_class_1": 595, + "eval_predicted_binding_ratio": 0.03025218629245475, + "eval_recall": 0.12092873266688164, + "eval_recall_macro": 0.5538246608949184, + "eval_runtime": 0.2691, + "eval_samples_per_second": 605.746, + "eval_steps_per_second": 3.716, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 756 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.8526540573520439, + "eval_auc": 0.6936772353365158, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22263948497854077, + "eval_f1_macro": 0.5706266398157138, + "eval_loss": 0.39666271209716797, + "eval_pr_auc": 0.2738840395423864, + "eval_precision": 0.6618819776714514, + "eval_precision_macro": 0.7604089789622948, + "eval_pred_class_0": 19041, + "eval_pred_class_1": 627, + "eval_predicted_binding_ratio": 0.031879194630872486, + "eval_recall": 0.13382779748468235, + "eval_recall_macro": 0.5605156371379469, + "eval_runtime": 0.2268, + "eval_samples_per_second": 718.638, + "eval_steps_per_second": 4.409, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 774 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.8544844417327638, + "eval_auc": 0.7158095511124275, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24325753569539926, + "eval_f1_macro": 0.58138013196988, + "eval_loss": 0.38576817512512207, + "eval_pr_auc": 0.29905525248581355, + "eval_precision": 0.6754772393538914, + "eval_precision_macro": 0.7681910344870789, + "eval_pred_class_0": 18987, + "eval_pred_class_1": 681, + "eval_predicted_binding_ratio": 0.03462477120195241, + "eval_recall": 0.14833924540470816, + "eval_recall_macro": 0.5674997367845657, + "eval_runtime": 0.2567, + "eval_samples_per_second": 635.061, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 792 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.8575859263778727, + "eval_auc": 0.737084336405228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2808729139922978, + "eval_f1_macro": 0.60092007766148, + "eval_loss": 0.37560486793518066, + "eval_pr_auc": 0.3260256629572295, + "eval_precision": 0.6889168765743073, + "eval_precision_macro": 0.7767992245539758, + "eval_pred_class_0": 18874, + "eval_pred_class_1": 794, + "eval_predicted_binding_ratio": 0.040370144396990035, + "eval_recall": 0.1763947113834247, + "eval_recall_macro": 0.5807427773130077, + "eval_runtime": 0.2685, + "eval_samples_per_second": 607.1, + "eval_steps_per_second": 3.725, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 810 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.8611450071181614, + "eval_auc": 0.7571642141385685, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.32451150136037593, + "eval_f1_macro": 0.623565358817779, + "eval_loss": 0.36611661314964294, + "eval_pr_auc": 0.3532860869347882, + "eval_precision": 0.6963906581740976, + "eval_precision_macro": 0.7829117661264593, + "eval_pred_class_0": 18726, + "eval_pred_class_1": 942, + "eval_predicted_binding_ratio": 0.047895057962172055, + "eval_recall": 0.21154466301193164, + "eval_recall_macro": 0.5971407144358867, + "eval_runtime": 0.2698, + "eval_samples_per_second": 604.115, + "eval_steps_per_second": 3.706, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 828 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.8646532438478747, + "eval_auc": 0.7759390708192488, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.3694931312174325, + "eval_f1_macro": 0.6468414565354121, + "eval_loss": 0.3574466407299042, + "eval_pr_auc": 0.380672409235741, + "eval_precision": 0.695807314897413, + "eval_precision_macro": 0.7853328912870632, + "eval_pred_class_0": 18547, + "eval_pred_class_1": 1121, + "eval_predicted_binding_ratio": 0.05699613585519626, + "eval_recall": 0.25153176394711385, + "eval_recall_macro": 0.6154743385438473, + "eval_runtime": 0.2501, + "eval_samples_per_second": 651.868, + "eval_steps_per_second": 3.999, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 846 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.8670937563555013, + "eval_auc": 0.7932697219796829, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.4004587155963303, + "eval_f1_macro": 0.6628608765538834, + "eval_loss": 0.34963178634643555, + "eval_pr_auc": 0.40879055918048346, + "eval_precision": 0.69340746624305, + "eval_precision_macro": 0.7861898540406407, + "eval_pred_class_0": 18409, + "eval_pred_class_1": 1259, + "eval_predicted_binding_ratio": 0.06401260931462274, + "eval_recall": 0.2815220896485005, + "eval_recall_macro": 0.6291113798275701, + "eval_runtime": 0.2518, + "eval_samples_per_second": 647.281, + "eval_steps_per_second": 3.971, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 864 + }, + { + "epoch": 49.0, + "eval_accuracy": 0.8693308928208257, + "eval_auc": 0.8090460638591691, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.43491644678979774, + "eval_f1_macro": 0.6805201987887128, + "eval_loss": 0.3424255847930908, + "eval_pr_auc": 0.43548439720530613, + "eval_precision": 0.6834830684174154, + "eval_precision_macro": 0.783786427463743, + "eval_pred_class_0": 18221, + "eval_pred_class_1": 1447, + "eval_predicted_binding_ratio": 0.07357128330282693, + "eval_recall": 0.3189293776201225, + "eval_recall_macro": 0.6456420293062284, + "eval_runtime": 0.2567, + "eval_samples_per_second": 635.097, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 882 + }, + { + "epoch": 50.0, + "eval_accuracy": 0.8711612772015457, + "eval_auc": 0.8231584209269593, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.46153846153846156, + "eval_f1_macro": 0.6941824562962304, + "eval_loss": 0.3358187675476074, + "eval_pr_auc": 0.46013674866792464, + "eval_precision": 0.6766355140186916, + "eval_precision_macro": 0.7825407542966181, + "eval_pred_class_0": 18063, + "eval_pred_class_1": 1605, + "eval_predicted_binding_ratio": 0.0816046369737645, + "eval_recall": 0.35020960980328925, + "eval_recall_macro": 0.659441136162585, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.797, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 900 + }, + { + "epoch": 51.0, + "eval_accuracy": 0.8745169818995322, + "eval_auc": 0.8357514570475526, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.49363972096840375, + "eval_f1_macro": 0.7110123043354003, + "eval_loss": 0.32985639572143555, + "eval_pr_auc": 0.48277553791567623, + "eval_precision": 0.6785109983079526, + "eval_precision_macro": 0.7862239260888744, + "eval_pred_class_0": 17895, + "eval_pred_class_1": 1773, + "eval_predicted_binding_ratio": 0.0901464307504576, + "eval_recall": 0.38793937439535636, + "eval_recall_macro": 0.6767668140160521, + "eval_runtime": 0.2496, + "eval_samples_per_second": 653.0, + "eval_steps_per_second": 4.006, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 918 + }, + { + "epoch": 52.0, + "eval_accuracy": 0.8772117144600367, + "eval_auc": 0.8463606400457255, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.523574669560071, + "eval_f1_macro": 0.7265493507137326, + "eval_loss": 0.3246362507343292, + "eval_pr_auc": 0.5014434788718165, + "eval_precision": 0.6742886178861789, + "eval_precision_macro": 0.787031314592807, + "eval_pred_class_0": 17700, + "eval_pred_class_1": 1968, + "eval_predicted_binding_ratio": 0.10006101281269067, + "eval_recall": 0.4279264753305385, + "eval_recall_macro": 0.6946175504557563, + "eval_runtime": 0.2301, + "eval_samples_per_second": 708.248, + "eval_steps_per_second": 4.345, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 936 + }, + { + "epoch": 53.0, + "eval_accuracy": 0.878991254830181, + "eval_auc": 0.8556500280578212, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5445847684653655, + "eval_f1_macro": 0.7374052543587455, + "eval_loss": 0.3201504647731781, + "eval_pr_auc": 0.5184804467620471, + "eval_precision": 0.6696470588235294, + "eval_precision_macro": 0.786998185969936, + "eval_pred_class_0": 17543, + "eval_pred_class_1": 2125, + "eval_predicted_binding_ratio": 0.10804352247305267, + "eval_recall": 0.45888423089326025, + "eval_recall_macro": 0.7082554190018906, + "eval_runtime": 0.2666, + "eval_samples_per_second": 611.344, + "eval_steps_per_second": 3.751, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 954 + }, + { + "epoch": 54.0, + "eval_accuracy": 0.8792454748830588, + "eval_auc": 0.8636336358823378, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5594509367464292, + "eval_f1_macro": 0.744742407539513, + "eval_loss": 0.31616976857185364, + "eval_pr_auc": 0.5331175601979875, + "eval_precision": 0.6585152838427948, + "eval_precision_macro": 0.7834238290545543, + "eval_pred_class_0": 17378, + "eval_pred_class_1": 2290, + "eval_predicted_binding_ratio": 0.11643278421801911, + "eval_recall": 0.48629474363108677, + "eval_recall_macro": 0.719546237029523, + "eval_runtime": 0.2694, + "eval_samples_per_second": 604.964, + "eval_steps_per_second": 3.711, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 972 + }, + { + "epoch": 55.0, + "eval_accuracy": 0.8814317673378076, + "eval_auc": 0.8703512791725087, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.578757225433526, + "eval_f1_macro": 0.7548815712966447, + "eval_loss": 0.31280621886253357, + "eval_pr_auc": 0.5453871030590061, + "eval_precision": 0.657905544147844, + "eval_precision_macro": 0.7854606348952531, + "eval_pred_class_0": 17233, + "eval_pred_class_1": 2435, + "eval_predicted_binding_ratio": 0.12380516575147447, + "eval_recall": 0.5166075459529185, + "eval_recall_macro": 0.7331634337478723, + "eval_runtime": 0.3648, + "eval_samples_per_second": 446.874, + "eval_steps_per_second": 2.742, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 990 + }, + { + "epoch": 55.55555555555556, + "grad_norm": 18517.669921875, + "learning_rate": 9.996314582053105e-07, + "loss": 0.4604, + "step": 1000 + }, + { + "epoch": 56.0, + "eval_accuracy": 0.8834146837502542, + "eval_auc": 0.8759527216222862, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5956621407159232, + "eval_f1_macro": 0.7637749289648232, + "eval_loss": 0.3100614845752716, + "eval_pr_auc": 0.5551596710183998, + "eval_precision": 0.6571984435797665, + "eval_precision_macro": 0.7873078426812156, + "eval_pred_class_0": 17098, + "eval_pred_class_1": 2570, + "eval_predicted_binding_ratio": 0.1306691071791743, + "eval_recall": 0.5446630119316349, + "eval_recall_macro": 0.745742503732462, + "eval_runtime": 0.2507, + "eval_samples_per_second": 650.302, + "eval_steps_per_second": 3.99, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1008 + }, + { + "epoch": 57.0, + "eval_accuracy": 0.8840756558877364, + "eval_auc": 0.8809651824326759, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6064894718674491, + "eval_f1_macro": 0.7692574960553631, + "eval_loss": 0.30748215317726135, + "eval_pr_auc": 0.5634298069700459, + "eval_precision": 0.6524322317118455, + "eval_precision_macro": 0.7866284869899434, + "eval_pred_class_0": 16975, + "eval_pred_class_1": 2693, + "eval_predicted_binding_ratio": 0.13692292047996746, + "eval_recall": 0.5665914221218962, + "eval_recall_macro": 0.7550467824679621, + "eval_runtime": 0.265, + "eval_samples_per_second": 615.163, + "eval_steps_per_second": 3.774, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1026 + }, + { + "epoch": 58.0, + "eval_accuracy": 0.8849908480780964, + "eval_auc": 0.8853361353068065, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6121399176954733, + "eval_f1_macro": 0.7723127955239544, + "eval_loss": 0.30511021614074707, + "eval_pr_auc": 0.5712324508006517, + "eval_precision": 0.6536067374588063, + "eval_precision_macro": 0.7879535133831199, + "eval_pred_class_0": 16937, + "eval_pred_class_1": 2731, + "eval_predicted_binding_ratio": 0.13885499288183853, + "eval_recall": 0.5756207674943566, + "eval_recall_macro": 0.7592596503615321, + "eval_runtime": 0.2627, + "eval_samples_per_second": 620.515, + "eval_steps_per_second": 3.807, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1044 + }, + { + "epoch": 59.0, + "eval_accuracy": 0.8853467561521253, + "eval_auc": 0.8892084338643703, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.616822429906542, + "eval_f1_macro": 0.7747051971959543, + "eval_loss": 0.3030014634132385, + "eval_pr_auc": 0.5778370115776272, + "eval_precision": 0.6519396551724138, + "eval_precision_macro": 0.7878864350252024, + "eval_pred_class_0": 16884, + "eval_pred_class_1": 2784, + "eval_predicted_binding_ratio": 0.1415497254423429, + "eval_recall": 0.5852950661077072, + "eval_recall_macro": 0.7634026486450891, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.008, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1062 + }, + { + "epoch": 60.0, + "eval_accuracy": 0.884685784014643, + "eval_auc": 0.8924710108272688, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6202277294038848, + "eval_f1_macro": 0.7761251343338811, + "eval_loss": 0.3011925220489502, + "eval_pr_auc": 0.5832812236308141, + "eval_precision": 0.6450714036920934, + "eval_precision_macro": 0.7853564436451774, + "eval_pred_class_0": 16797, + "eval_pred_class_1": 2871, + "eval_predicted_binding_ratio": 0.14597315436241612, + "eval_recall": 0.5972267010641729, + "eval_recall_macro": 0.7678594421600216, + "eval_runtime": 0.2437, + "eval_samples_per_second": 668.98, + "eval_steps_per_second": 4.104, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1080 + }, + { + "epoch": 61.0, + "eval_accuracy": 0.8856518202155786, + "eval_auc": 0.8954398707041407, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6242272347535506, + "eval_f1_macro": 0.7783964874556335, + "eval_loss": 0.29942846298217773, + "eval_pr_auc": 0.5886815510653964, + "eval_precision": 0.6477115117891817, + "eval_precision_macro": 0.7871243450270979, + "eval_pred_class_0": 16784, + "eval_pred_class_1": 2884, + "eval_predicted_binding_ratio": 0.14663412649989832, + "eval_recall": 0.6023863269912931, + "eval_recall_macro": 0.7705297965613797, + "eval_runtime": 0.2695, + "eval_samples_per_second": 604.921, + "eval_steps_per_second": 3.711, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1098 + }, + { + "epoch": 62.0, + "eval_accuracy": 0.8865161683953631, + "eval_auc": 0.8978366542923133, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6288659793814433, + "eval_f1_macro": 0.7809416026191173, + "eval_loss": 0.29785510897636414, + "eval_pr_auc": 0.593021329597711, + "eval_precision": 0.6491589426707861, + "eval_precision_macro": 0.7884708470441367, + "eval_pred_class_0": 16755, + "eval_pred_class_1": 2913, + "eval_predicted_binding_ratio": 0.14810860280658938, + "eval_recall": 0.6098032892615285, + "eval_recall_macro": 0.7740571948209012, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.719, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1116 + }, + { + "epoch": 63.0, + "eval_accuracy": 0.8871262965222697, + "eval_auc": 0.8998632136201572, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6315964155326916, + "eval_f1_macro": 0.7824748814379159, + "eval_loss": 0.2964444160461426, + "eval_pr_auc": 0.5970041919243015, + "eval_precision": 0.6505982905982906, + "eval_precision_macro": 0.789523000044412, + "eval_pred_class_0": 16743, + "eval_pred_class_1": 2925, + "eval_predicted_binding_ratio": 0.14871873093349602, + "eval_recall": 0.6136730087068688, + "eval_recall_macro": 0.7759920545435715, + "eval_runtime": 0.2695, + "eval_samples_per_second": 604.792, + "eval_steps_per_second": 3.71, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1134 + }, + { + "epoch": 64.0, + "eval_accuracy": 0.8876855806386008, + "eval_auc": 0.9017429582012333, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6356589147286822, + "eval_f1_macro": 0.7846343742639293, + "eval_loss": 0.2951850891113281, + "eval_pr_auc": 0.6005268804358049, + "eval_precision": 0.650573936529372, + "eval_precision_macro": 0.7901498917652248, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.6214124475975492, + "eval_recall_macro": 0.7794694277584535, + "eval_runtime": 0.2675, + "eval_samples_per_second": 609.376, + "eval_steps_per_second": 3.739, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1152 + }, + { + "epoch": 65.0, + "eval_accuracy": 0.8881940207443563, + "eval_auc": 0.9033021142666618, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6377861966727063, + "eval_f1_macro": 0.785840340182137, + "eval_loss": 0.2939698398113251, + "eval_pr_auc": 0.6035211605243039, + "eval_precision": 0.6518518518518519, + "eval_precision_macro": 0.7910415086304414, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.6243147371815544, + "eval_recall_macro": 0.7809507530297222, + "eval_runtime": 0.2684, + "eval_samples_per_second": 607.415, + "eval_steps_per_second": 3.726, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1170 + }, + { + "epoch": 66.0, + "eval_accuracy": 0.8882448647549319, + "eval_auc": 0.9048048023731414, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6389618922470434, + "eval_f1_macro": 0.7864263262967652, + "eval_loss": 0.29283198714256287, + "eval_pr_auc": 0.6066927627742578, + "eval_precision": 0.6511550050217609, + "eval_precision_macro": 0.7909273016835919, + "eval_pred_class_0": 16681, + "eval_pred_class_1": 2987, + "eval_predicted_binding_ratio": 0.1518710595891804, + "eval_recall": 0.6272170267655595, + "eval_recall_macro": 0.7821604539875966, + "eval_runtime": 0.2764, + "eval_samples_per_second": 589.68, + "eval_steps_per_second": 3.618, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1188 + }, + { + "epoch": 67.0, + "eval_accuracy": 0.888346552776083, + "eval_auc": 0.9061457752769495, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6402359108781127, + "eval_f1_macro": 0.7870775124924988, + "eval_loss": 0.29169291257858276, + "eval_pr_auc": 0.6096183698390041, + "eval_precision": 0.6506826506826506, + "eval_precision_macro": 0.7909278839971909, + "eval_pred_class_0": 16665, + "eval_pred_class_1": 3003, + "eval_predicted_binding_ratio": 0.15268456375838926, + "eval_recall": 0.6301193163495646, + "eval_recall_macro": 0.783400335424737, + "eval_runtime": 0.2635, + "eval_samples_per_second": 618.516, + "eval_steps_per_second": 3.795, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1206 + }, + { + "epoch": 68.0, + "eval_accuracy": 0.8887533048606874, + "eval_auc": 0.9074730837522218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.641781270464964, + "eval_f1_macro": 0.7879665952661885, + "eval_loss": 0.2904839515686035, + "eval_pr_auc": 0.6127271933864005, + "eval_precision": 0.6518124376454939, + "eval_precision_macro": 0.7916645766644131, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6320541760722348, + "eval_recall_macro": 0.7844281262446042, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.958, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1224 + }, + { + "epoch": 69.0, + "eval_accuracy": 0.889261744966443, + "eval_auc": 0.9085174295528148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6431847968545217, + "eval_f1_macro": 0.788822778783544, + "eval_loss": 0.28959015011787415, + "eval_pr_auc": 0.6152976575518759, + "eval_precision": 0.6536796536796536, + "eval_precision_macro": 0.7926964124983926, + "eval_pred_class_0": 16665, + "eval_pred_class_1": 3003, + "eval_predicted_binding_ratio": 0.15268456375838926, + "eval_recall": 0.6330216059335698, + "eval_recall_macro": 0.7851231045301337, + "eval_runtime": 0.2452, + "eval_samples_per_second": 664.894, + "eval_steps_per_second": 4.079, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1242 + }, + { + "epoch": 70.0, + "eval_accuracy": 0.889363432987594, + "eval_auc": 0.9095093658465239, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6446766819072501, + "eval_f1_macro": 0.7895790973067505, + "eval_loss": 0.2887136936187744, + "eval_pr_auc": 0.6176593727552148, + "eval_precision": 0.6529937148527952, + "eval_precision_macro": 0.7926428472131204, + "eval_pred_class_0": 16645, + "eval_pred_class_1": 3023, + "eval_predicted_binding_ratio": 0.15370144396990035, + "eval_recall": 0.636568848758465, + "eval_recall_macro": 0.7866251016291872, + "eval_runtime": 0.2634, + "eval_samples_per_second": 618.817, + "eval_steps_per_second": 3.796, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1260 + }, + { + "epoch": 71.0, + "eval_accuracy": 0.889821029082774, + "eval_auc": 0.9104295930879169, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6464349812367434, + "eval_f1_macro": 0.7905888279869988, + "eval_loss": 0.28783899545669556, + "eval_pr_auc": 0.619972501272285, + "eval_precision": 0.6542272126816381, + "eval_precision_macro": 0.7934597601869728, + "eval_pred_class_0": 16640, + "eval_pred_class_1": 3028, + "eval_predicted_binding_ratio": 0.1539556640227781, + "eval_recall": 0.6388261851015802, + "eval_recall_macro": 0.7878141307592769, + "eval_runtime": 0.2615, + "eval_samples_per_second": 623.334, + "eval_steps_per_second": 3.824, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1278 + }, + { + "epoch": 72.0, + "eval_accuracy": 0.8899735611145008, + "eval_auc": 0.911281293804153, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.646289637136319, + "eval_f1_macro": 0.7905721170208057, + "eval_loss": 0.28697267174720764, + "eval_pr_auc": 0.6225153426830469, + "eval_precision": 0.6552867086509778, + "eval_precision_macro": 0.7938916277024632, + "eval_pred_class_0": 16651, + "eval_pred_class_1": 3017, + "eval_predicted_binding_ratio": 0.15339637990644703, + "eval_recall": 0.6375362786198001, + "eval_recall_macro": 0.7873804408732489, + "eval_runtime": 0.2764, + "eval_samples_per_second": 589.762, + "eval_steps_per_second": 3.618, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1296 + }, + { + "epoch": 73.0, + "eval_accuracy": 0.890736221273134, + "eval_auc": 0.911925575502615, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6505122784192552, + "eval_f1_macro": 0.7928790035842321, + "eval_loss": 0.2863345444202423, + "eval_pr_auc": 0.6235765349975187, + "eval_precision": 0.6561679790026247, + "eval_precision_macro": 0.7949612458190018, + "eval_pred_class_0": 16620, + "eval_pred_class_1": 3048, + "eval_predicted_binding_ratio": 0.1549725442342892, + "eval_recall": 0.6449532408900355, + "eval_recall_macro": 0.7908474781742385, + "eval_runtime": 0.2631, + "eval_samples_per_second": 619.574, + "eval_steps_per_second": 3.801, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1314 + }, + { + "epoch": 74.0, + "eval_accuracy": 0.891193817368314, + "eval_auc": 0.9126060328997004, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6506692784851453, + "eval_f1_macro": 0.793115428161573, + "eval_loss": 0.28558436036109924, + "eval_pr_auc": 0.625919370718976, + "eval_precision": 0.6588429752066116, + "eval_precision_macro": 0.7961342196828587, + "eval_pred_class_0": 16643, + "eval_pred_class_1": 3025, + "eval_predicted_binding_ratio": 0.15380313199105144, + "eval_recall": 0.6426959045469204, + "eval_recall_macro": 0.7902016976709372, + "eval_runtime": 0.265, + "eval_samples_per_second": 615.167, + "eval_steps_per_second": 3.774, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1332 + }, + { + "epoch": 75.0, + "eval_accuracy": 0.8916005694529184, + "eval_auc": 0.9132873078266985, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6526555881394591, + "eval_f1_macro": 0.794217425975266, + "eval_loss": 0.28493690490722656, + "eval_pr_auc": 0.6278320531638758, + "eval_precision": 0.6595324333223576, + "eval_precision_macro": 0.7967555738856391, + "eval_pred_class_0": 16631, + "eval_pred_class_1": 3037, + "eval_predicted_binding_ratio": 0.1544132601179581, + "eval_recall": 0.6459206707513705, + "eval_recall_macro": 0.7917537198146302, + "eval_runtime": 0.2564, + "eval_samples_per_second": 635.773, + "eval_steps_per_second": 3.9, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1350 + }, + { + "epoch": 76.0, + "eval_accuracy": 0.8920073215375229, + "eval_auc": 0.9139370494570753, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6551948051948052, + "eval_f1_macro": 0.7955863102414826, + "eval_loss": 0.28430166840553284, + "eval_pr_auc": 0.6292546024902547, + "eval_precision": 0.6596927100359594, + "eval_precision_macro": 0.7972435493102309, + "eval_pred_class_0": 16609, + "eval_pred_class_1": 3059, + "eval_predicted_binding_ratio": 0.1555318283506203, + "eval_recall": 0.6507578200580458, + "eval_recall_macro": 0.7939610311131058, + "eval_runtime": 0.2569, + "eval_samples_per_second": 634.459, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1368 + }, + { + "epoch": 77.0, + "eval_accuracy": 0.8921598535692495, + "eval_auc": 0.9146080371326758, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6542787286063569, + "eval_f1_macro": 0.7951975553215214, + "eval_loss": 0.283497154712677, + "eval_pr_auc": 0.6315022943889131, + "eval_precision": 0.6615029663810151, + "eval_precision_macro": 0.7978670296615908, + "eval_pred_class_0": 16634, + "eval_pred_class_1": 3034, + "eval_predicted_binding_ratio": 0.15426072808623145, + "eval_recall": 0.6472105772331506, + "eval_recall_macro": 0.7926099364103822, + "eval_runtime": 0.2203, + "eval_samples_per_second": 740.067, + "eval_steps_per_second": 4.54, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1386 + }, + { + "epoch": 78.0, + "eval_accuracy": 0.8924140736221273, + "eval_auc": 0.9151027497871649, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.655037495924356, + "eval_f1_macro": 0.7956531976941219, + "eval_loss": 0.2829034626483917, + "eval_pr_auc": 0.6331226154536788, + "eval_precision": 0.6623804813715793, + "eval_precision_macro": 0.7983678781970611, + "eval_pred_class_0": 16635, + "eval_pred_class_1": 3033, + "eval_predicted_binding_ratio": 0.1542098840756559, + "eval_recall": 0.6478555304740407, + "eval_recall_macro": 0.7930229544686254, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.571, + "eval_steps_per_second": 3.899, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1404 + }, + { + "epoch": 79.0, + "eval_accuracy": 0.8925157616432784, + "eval_auc": 0.9156585533376076, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6559244791666666, + "eval_f1_macro": 0.7961172166862497, + "eval_loss": 0.28232645988464355, + "eval_pr_auc": 0.6343326075351273, + "eval_precision": 0.6621754847190273, + "eval_precision_macro": 0.7984260882241754, + "eval_pred_class_0": 16625, + "eval_pred_class_1": 3043, + "eval_predicted_binding_ratio": 0.15471832418141143, + "eval_recall": 0.6497903901967107, + "eval_recall_macro": 0.7938696624128962, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.665, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1422 + }, + { + "epoch": 80.0, + "eval_accuracy": 0.8929733577384584, + "eval_auc": 0.9162826303682348, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6572219508223416, + "eval_f1_macro": 0.7969043930945569, + "eval_loss": 0.28162533044815063, + "eval_pr_auc": 0.6363164977912346, + "eval_precision": 0.6638157894736842, + "eval_precision_macro": 0.7993423426560147, + "eval_pred_class_0": 16628, + "eval_pred_class_1": 3040, + "eval_predicted_binding_ratio": 0.15456579214968477, + "eval_recall": 0.6507578200580458, + "eval_recall_macro": 0.79453446021916, + "eval_runtime": 0.272, + "eval_samples_per_second": 599.266, + "eval_steps_per_second": 3.676, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1440 + }, + { + "epoch": 81.0, + "eval_accuracy": 0.8928208257067317, + "eval_auc": 0.9167705886684476, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.657124268054652, + "eval_f1_macro": 0.7968036671115732, + "eval_loss": 0.2811121940612793, + "eval_pr_auc": 0.6378571407612313, + "eval_precision": 0.6629471611421069, + "eval_precision_macro": 0.7989544782306408, + "eval_pred_class_0": 16621, + "eval_pred_class_1": 3047, + "eval_predicted_binding_ratio": 0.15492170022371365, + "eval_recall": 0.6514027732989358, + "eval_recall_macro": 0.7947060344432748, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.342, + "eval_steps_per_second": 3.843, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1458 + }, + { + "epoch": 82.0, + "eval_accuracy": 0.893125889770185, + "eval_auc": 0.9172343422437541, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6581001951854262, + "eval_f1_macro": 0.7973820247953165, + "eval_loss": 0.2804972231388092, + "eval_pr_auc": 0.6394259907419034, + "eval_precision": 0.6639317361339022, + "eval_precision_macro": 0.7995370130040789, + "eval_pred_class_0": 16621, + "eval_pred_class_1": 3047, + "eval_predicted_binding_ratio": 0.15492170022371365, + "eval_recall": 0.6523702031602708, + "eval_recall_macro": 0.7952802908117405, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.86, + "eval_steps_per_second": 3.827, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1476 + }, + { + "epoch": 83.0, + "eval_accuracy": 0.8933801098230628, + "eval_auc": 0.9176662705474707, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6597436313483693, + "eval_f1_macro": 0.7982647858607822, + "eval_loss": 0.28001976013183594, + "eval_pr_auc": 0.6407525699560299, + "eval_precision": 0.6639451338994121, + "eval_precision_macro": 0.7998155152816343, + "eval_pred_class_0": 16606, + "eval_pred_class_1": 3062, + "eval_predicted_binding_ratio": 0.15568436038234695, + "eval_recall": 0.6555949693647211, + "eval_recall_macro": 0.7967417715176355, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.967, + "eval_steps_per_second": 3.902, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1494 + }, + { + "epoch": 83.33333333333333, + "grad_norm": 11845.0048828125, + "learning_rate": 9.86567120987093e-07, + "loss": 0.2741, + "step": 1500 + }, + { + "epoch": 84.0, + "eval_accuracy": 0.8932275777913362, + "eval_auc": 0.9181069872977458, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6592015579357352, + "eval_f1_macro": 0.7979494857864604, + "eval_loss": 0.2794816195964813, + "eval_pr_auc": 0.6421665674351047, + "eval_precision": 0.6635086573015354, + "eval_precision_macro": 0.799538997766201, + "eval_pred_class_0": 16607, + "eval_pred_class_1": 3061, + "eval_predicted_binding_ratio": 0.15563351637177142, + "eval_recall": 0.654950016123831, + "eval_recall_macro": 0.7963891144179245, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.121, + "eval_steps_per_second": 3.841, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1512 + }, + { + "epoch": 85.0, + "eval_accuracy": 0.8932275777913362, + "eval_auc": 0.9184344761551537, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6595330739299611, + "eval_f1_macro": 0.7981095181516664, + "eval_loss": 0.27904370427131653, + "eval_pr_auc": 0.6429990318434126, + "eval_precision": 0.6631887838278449, + "eval_precision_macro": 0.7994577736379149, + "eval_pred_class_0": 16601, + "eval_pred_class_1": 3067, + "eval_predicted_binding_ratio": 0.15593858043522474, + "eval_recall": 0.6559174459851661, + "eval_recall_macro": 0.796782287910794, + "eval_runtime": 0.2526, + "eval_samples_per_second": 645.288, + "eval_steps_per_second": 3.959, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1530 + }, + { + "epoch": 86.0, + "eval_accuracy": 0.8933292658124873, + "eval_auc": 0.9189190864757253, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6585286458333334, + "eval_f1_macro": 0.797660321952579, + "eval_loss": 0.27837634086608887, + "eval_pr_auc": 0.6447520419594072, + "eval_precision": 0.664804469273743, + "eval_precision_macro": 0.7999811820052926, + "eval_pred_class_0": 16625, + "eval_pred_class_1": 3043, + "eval_predicted_binding_ratio": 0.15471832418141143, + "eval_recall": 0.6523702031602708, + "eval_recall_macro": 0.7954010127288045, + "eval_runtime": 0.2401, + "eval_samples_per_second": 678.873, + "eval_steps_per_second": 4.165, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1548 + }, + { + "epoch": 87.0, + "eval_accuracy": 0.8936851738865161, + "eval_auc": 0.9193183038504471, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6602761982128351, + "eval_f1_macro": 0.7986291029941847, + "eval_loss": 0.27788689732551575, + "eval_pr_auc": 0.6462039154555116, + "eval_precision": 0.6653569089718402, + "eval_precision_macro": 0.8005067920325675, + "eval_pred_class_0": 16614, + "eval_pred_class_1": 3054, + "eval_predicted_binding_ratio": 0.15527760829774254, + "eval_recall": 0.655272492744276, + "eval_recall_macro": 0.7967917965622751, + "eval_runtime": 0.2547, + "eval_samples_per_second": 640.0, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1566 + }, + { + "epoch": 88.0, + "eval_accuracy": 0.8941427699816962, + "eval_auc": 0.9197055599839506, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.662012987012987, + "eval_f1_macro": 0.7996283888525269, + "eval_loss": 0.2774609923362732, + "eval_pr_auc": 0.6474319229516793, + "eval_precision": 0.6665576985943119, + "eval_precision_macro": 0.8013082309577014, + "eval_pred_class_0": 16609, + "eval_pred_class_1": 3059, + "eval_predicted_binding_ratio": 0.1555318283506203, + "eval_recall": 0.6575298290873912, + "eval_recall_macro": 0.7979808256923646, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.753, + "eval_steps_per_second": 4.005, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1584 + }, + { + "epoch": 89.0, + "eval_accuracy": 0.8946003660768761, + "eval_auc": 0.9201319018332661, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6636378387149116, + "eval_f1_macro": 0.8005736295133055, + "eval_loss": 0.27695581316947937, + "eval_pr_auc": 0.6490255599400047, + "eval_precision": 0.6678641410842586, + "eval_precision_macro": 0.8021363340613392, + "eval_pred_class_0": 16606, + "eval_pred_class_1": 3062, + "eval_predicted_binding_ratio": 0.15568436038234695, + "eval_recall": 0.6594646888100613, + "eval_recall_macro": 0.7990387969914977, + "eval_runtime": 0.269, + "eval_samples_per_second": 606.045, + "eval_steps_per_second": 3.718, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1602 + }, + { + "epoch": 90.0, + "eval_accuracy": 0.8950579621720561, + "eval_auc": 0.920403555344157, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6648262422864566, + "eval_f1_macro": 0.8013081720805965, + "eval_loss": 0.27657878398895264, + "eval_pr_auc": 0.6500340451314033, + "eval_precision": 0.6696107294733399, + "eval_precision_macro": 0.8030794000144978, + "eval_pred_class_0": 16611, + "eval_pred_class_1": 3057, + "eval_predicted_binding_ratio": 0.1554301403294692, + "eval_recall": 0.6601096420509514, + "eval_recall_macro": 0.7995725369668047, + "eval_runtime": 0.2609, + "eval_samples_per_second": 624.642, + "eval_steps_per_second": 3.832, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1620 + }, + { + "epoch": 91.0, + "eval_accuracy": 0.8953630262355095, + "eval_auc": 0.9207465675374016, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6654746423927178, + "eval_f1_macro": 0.8017284012349317, + "eval_loss": 0.27611449360847473, + "eval_pr_auc": 0.6512249430736048, + "eval_precision": 0.6709275647328745, + "eval_precision_macro": 0.8037492731289094, + "eval_pred_class_0": 16617, + "eval_pred_class_1": 3051, + "eval_predicted_binding_ratio": 0.15512507626601588, + "eval_recall": 0.6601096420509514, + "eval_recall_macro": 0.7997536198424009, + "eval_runtime": 0.2605, + "eval_samples_per_second": 625.736, + "eval_steps_per_second": 3.839, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1638 + }, + { + "epoch": 92.0, + "eval_accuracy": 0.8954647142566605, + "eval_auc": 0.9211146506479597, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6662337662337663, + "eval_f1_macro": 0.802130627992697, + "eval_loss": 0.275691956281662, + "eval_pr_auc": 0.6524138573777828, + "eval_precision": 0.6708074534161491, + "eval_precision_macro": 0.8038244624537546, + "eval_pred_class_0": 16609, + "eval_pred_class_1": 3059, + "eval_predicted_binding_ratio": 0.1555318283506203, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8004692699557154, + "eval_runtime": 0.2702, + "eval_samples_per_second": 603.318, + "eval_steps_per_second": 3.701, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1656 + }, + { + "epoch": 93.0, + "eval_accuracy": 0.8956680902989628, + "eval_auc": 0.9214137790034065, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6666666666666666, + "eval_f1_macro": 0.8024110910186859, + "eval_loss": 0.27531710267066956, + "eval_pr_auc": 0.6535015844647576, + "eval_precision": 0.6716857610474631, + "eval_precision_macro": 0.8042712197761243, + "eval_pred_class_0": 16613, + "eval_pred_class_1": 3055, + "eval_predicted_binding_ratio": 0.15532845230831807, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8005899918727795, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.638, + "eval_steps_per_second": 3.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1674 + }, + { + "epoch": 94.0, + "eval_accuracy": 0.8956172462883872, + "eval_auc": 0.9217489565349906, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6664500406173842, + "eval_f1_macro": 0.802288641055505, + "eval_loss": 0.27483227849006653, + "eval_pr_auc": 0.6546621751515824, + "eval_precision": 0.6715782580222659, + "eval_precision_macro": 0.8041892734676155, + "eval_pred_class_0": 16614, + "eval_pred_class_1": 3054, + "eval_predicted_binding_ratio": 0.15527760829774254, + "eval_recall": 0.6613995485327314, + "eval_recall_macro": 0.8004287535625569, + "eval_runtime": 0.2382, + "eval_samples_per_second": 684.396, + "eval_steps_per_second": 4.199, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1692 + }, + { + "epoch": 95.0, + "eval_accuracy": 0.8959731543624161, + "eval_auc": 0.9220882801111304, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6672088484059857, + "eval_f1_macro": 0.8027800298435859, + "eval_loss": 0.27437010407447815, + "eval_pr_auc": 0.6561534178394561, + "eval_precision": 0.6731211027239908, + "eval_precision_macro": 0.8049740042228342, + "eval_pred_class_0": 16621, + "eval_pred_class_1": 3047, + "eval_predicted_binding_ratio": 0.15492170022371365, + "eval_recall": 0.6613995485327314, + "eval_recall_macro": 0.800640016917419, + "eval_runtime": 0.2619, + "eval_samples_per_second": 622.273, + "eval_steps_per_second": 3.818, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1710 + }, + { + "epoch": 96.0, + "eval_accuracy": 0.8958206223306895, + "eval_auc": 0.9223952917907324, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6666666666666666, + "eval_f1_macro": 0.8024646720298894, + "eval_loss": 0.27396437525749207, + "eval_pr_auc": 0.6571939169041311, + "eval_precision": 0.6726854891661195, + "eval_precision_macro": 0.8046979364973901, + "eval_pred_class_0": 16622, + "eval_pred_class_1": 3046, + "eval_predicted_binding_ratio": 0.1548708562131381, + "eval_recall": 0.6607545952918413, + "eval_recall_macro": 0.8002873598177079, + "eval_runtime": 0.2627, + "eval_samples_per_second": 620.423, + "eval_steps_per_second": 3.806, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1728 + }, + { + "epoch": 97.0, + "eval_accuracy": 0.8960239983729916, + "eval_auc": 0.9226829844599047, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6679655788277318, + "eval_f1_macro": 0.8031632457541016, + "eval_loss": 0.2735843360424042, + "eval_pr_auc": 0.6581443956834908, + "eval_precision": 0.6726618705035972, + "eval_precision_macro": 0.8049040839573975, + "eval_pred_class_0": 16610, + "eval_pred_class_1": 3058, + "eval_predicted_binding_ratio": 0.15548098434004473, + "eval_recall": 0.6633344082554015, + "eval_recall_macro": 0.801456544382424, + "eval_runtime": 0.2325, + "eval_samples_per_second": 701.163, + "eval_steps_per_second": 4.302, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1746 + }, + { + "epoch": 98.0, + "eval_accuracy": 0.8961765304047183, + "eval_auc": 0.9230679047936586, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.668075422626788, + "eval_f1_macro": 0.8032698713905396, + "eval_loss": 0.2730555534362793, + "eval_pr_auc": 0.6597232694357793, + "eval_precision": 0.6735496558505408, + "eval_precision_macro": 0.8053010360254088, + "eval_pred_class_0": 16617, + "eval_pred_class_1": 3051, + "eval_predicted_binding_ratio": 0.15512507626601588, + "eval_recall": 0.6626894550145115, + "eval_recall_macro": 0.801284970158309, + "eval_runtime": 0.2564, + "eval_samples_per_second": 635.817, + "eval_steps_per_second": 3.901, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1764 + }, + { + "epoch": 99.0, + "eval_accuracy": 0.8961256863941428, + "eval_auc": 0.9233850285396773, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6689353427321342, + "eval_f1_macro": 0.8036671286252258, + "eval_loss": 0.272703617811203, + "eval_pr_auc": 0.6608360801532023, + "eval_precision": 0.6723127035830619, + "eval_precision_macro": 0.8049176483332829, + "eval_pred_class_0": 16598, + "eval_pred_class_1": 3070, + "eval_predicted_binding_ratio": 0.1560911124669514, + "eval_recall": 0.6655917445985166, + "eval_recall_macro": 0.8024343101576514, + "eval_runtime": 0.2609, + "eval_samples_per_second": 624.703, + "eval_steps_per_second": 3.833, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1782 + }, + { + "epoch": 100.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9236704438040937, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6696891191709845, + "eval_f1_macro": 0.804084607836397, + "eval_loss": 0.27227067947387695, + "eval_pr_auc": 0.661859432748859, + "eval_precision": 0.672520325203252, + "eval_precision_macro": 0.8051325786806955, + "eval_pred_class_0": 16593, + "eval_pred_class_1": 3075, + "eval_predicted_binding_ratio": 0.15634533251982916, + "eval_recall": 0.6668816510802967, + "eval_recall_macro": 0.8030490829192756, + "eval_runtime": 0.2555, + "eval_samples_per_second": 638.071, + "eval_steps_per_second": 3.915, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1800 + }, + { + "epoch": 101.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9239191675474416, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6695821185617103, + "eval_f1_macro": 0.8040329626642457, + "eval_loss": 0.2719270884990692, + "eval_pr_auc": 0.6626733311213273, + "eval_precision": 0.6726326065733811, + "eval_precision_macro": 0.8051623412499325, + "eval_pred_class_0": 16595, + "eval_pred_class_1": 3073, + "eval_predicted_binding_ratio": 0.15624364449867806, + "eval_recall": 0.6665591744598517, + "eval_recall_macro": 0.802918025088319, + "eval_runtime": 0.2736, + "eval_samples_per_second": 595.794, + "eval_steps_per_second": 3.655, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1818 + }, + { + "epoch": 102.0, + "eval_accuracy": 0.896786658531625, + "eval_auc": 0.9242257996595844, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6714146973130463, + "eval_f1_macro": 0.805096334723234, + "eval_loss": 0.271486759185791, + "eval_pr_auc": 0.6637007390734015, + "eval_precision": 0.6740331491712708, + "eval_precision_macro": 0.8060660592459934, + "eval_pred_class_0": 16591, + "eval_pred_class_1": 3077, + "eval_predicted_binding_ratio": 0.15644702054098028, + "eval_recall": 0.6688165108029668, + "eval_recall_macro": 0.8041372346976746, + "eval_runtime": 0.2489, + "eval_samples_per_second": 654.758, + "eval_steps_per_second": 4.017, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1836 + }, + { + "epoch": 103.0, + "eval_accuracy": 0.8973459426479561, + "eval_auc": 0.9244978230054357, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.673248098397799, + "eval_f1_macro": 0.8061779895433214, + "eval_loss": 0.2711206376552582, + "eval_pr_auc": 0.6646142778873767, + "eval_precision": 0.6757634827810266, + "eval_precision_macro": 0.8071101922645338, + "eval_pred_class_0": 16590, + "eval_pred_class_1": 3078, + "eval_predicted_binding_ratio": 0.15649786455155582, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.8052555669553397, + "eval_runtime": 0.2242, + "eval_samples_per_second": 727.107, + "eval_steps_per_second": 4.461, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1854 + }, + { + "epoch": 104.0, + "eval_accuracy": 0.8971934106162294, + "eval_auc": 0.9248346842593396, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6732385261797027, + "eval_f1_macro": 0.8061196854381076, + "eval_loss": 0.2707342207431793, + "eval_pr_auc": 0.6659244139495173, + "eval_precision": 0.6747651441528992, + "eval_precision_macro": 0.8066847854532062, + "eval_pred_class_0": 16581, + "eval_pred_class_1": 3087, + "eval_predicted_binding_ratio": 0.15695546064673582, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8055581990104113, + "eval_runtime": 0.255, + "eval_samples_per_second": 639.197, + "eval_steps_per_second": 3.921, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1872 + }, + { + "epoch": 105.0, + "eval_accuracy": 0.8975493186902583, + "eval_auc": 0.9250955152313902, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6746326497658647, + "eval_f1_macro": 0.8069177490147248, + "eval_loss": 0.2704195976257324, + "eval_pr_auc": 0.666907608407933, + "eval_precision": 0.675614489003881, + "eval_precision_macro": 0.8072811827258788, + "eval_pred_class_0": 16576, + "eval_pred_class_1": 3092, + "eval_predicted_binding_ratio": 0.15720968069961358, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8065558093510122, + "eval_runtime": 0.2579, + "eval_samples_per_second": 631.977, + "eval_steps_per_second": 3.877, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1890 + }, + { + "epoch": 106.0, + "eval_accuracy": 0.8977018507219849, + "eval_auc": 0.9254069649305167, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6749596122778675, + "eval_f1_macro": 0.8071292359343842, + "eval_loss": 0.26996490359306335, + "eval_pr_auc": 0.6681450085536085, + "eval_precision": 0.6762706377468436, + "eval_precision_macro": 0.8076147808433838, + "eval_pred_class_0": 16579, + "eval_pred_class_1": 3089, + "eval_predicted_binding_ratio": 0.15705714866788692, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8066463507888102, + "eval_runtime": 0.2667, + "eval_samples_per_second": 611.086, + "eval_steps_per_second": 3.749, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1908 + }, + { + "epoch": 107.0, + "eval_accuracy": 0.8980577587960138, + "eval_auc": 0.9256414889578861, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6758286176232822, + "eval_f1_macro": 0.8076738937412058, + "eval_loss": 0.2696084678173065, + "eval_pr_auc": 0.6691135839215693, + "eval_precision": 0.6776913099870299, + "eval_precision_macro": 0.8083644683075526, + "eval_pred_class_0": 16584, + "eval_pred_class_1": 3084, + "eval_predicted_binding_ratio": 0.15680292861500916, + "eval_recall": 0.673976136730087, + "eval_recall_macro": 0.8069886719746289, + "eval_runtime": 0.2591, + "eval_samples_per_second": 629.158, + "eval_steps_per_second": 3.86, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1926 + }, + { + "epoch": 108.0, + "eval_accuracy": 0.8980069147854383, + "eval_auc": 0.9258192627838369, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6769726247987118, + "eval_f1_macro": 0.8082079811791663, + "eval_loss": 0.26944610476493835, + "eval_pr_auc": 0.6696389857739906, + "eval_precision": 0.676101640398842, + "eval_precision_macro": 0.8078859551713395, + "eval_pred_class_0": 16559, + "eval_pred_class_1": 3109, + "eval_predicted_binding_ratio": 0.158074028879398, + "eval_recall": 0.6778458561754273, + "eval_recall_macro": 0.8085311854668409, + "eval_runtime": 0.261, + "eval_samples_per_second": 624.512, + "eval_steps_per_second": 3.831, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1944 + }, + { + "epoch": 109.0, + "eval_accuracy": 0.898159446817165, + "eval_auc": 0.9261110139050743, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.676779086654833, + "eval_f1_macro": 0.8081683537924276, + "eval_loss": 0.26896923780441284, + "eval_pr_auc": 0.6708410126864026, + "eval_precision": 0.6773255813953488, + "eval_precision_macro": 0.8083707318031536, + "eval_pred_class_0": 16572, + "eval_pred_class_1": 3096, + "eval_predicted_binding_ratio": 0.1574130567419158, + "eval_recall": 0.6762334730732021, + "eval_recall_macro": 0.8079664377498563, + "eval_runtime": 0.2579, + "eval_samples_per_second": 632.096, + "eval_steps_per_second": 3.878, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1962 + }, + { + "epoch": 110.0, + "eval_accuracy": 0.8985153548911938, + "eval_auc": 0.9263511243868452, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6783757653883339, + "eval_f1_macro": 0.809064127789247, + "eval_loss": 0.26861709356307983, + "eval_pr_auc": 0.6718712574127733, + "eval_precision": 0.677938808373591, + "eval_precision_macro": 0.808902387342021, + "eval_pred_class_0": 16563, + "eval_pred_class_1": 3105, + "eval_predicted_binding_ratio": 0.15787065283709578, + "eval_recall": 0.6788132860367624, + "eval_recall_macro": 0.8092261637523704, + "eval_runtime": 0.2543, + "eval_samples_per_second": 640.936, + "eval_steps_per_second": 3.932, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1980 + }, + { + "epoch": 111.0, + "eval_accuracy": 0.8989729509863738, + "eval_auc": 0.9265832055569767, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6794644297467334, + "eval_f1_macro": 0.8097506232989936, + "eval_loss": 0.2682516574859619, + "eval_pr_auc": 0.6727910026400046, + "eval_precision": 0.6797934151065204, + "eval_precision_macro": 0.8098725675411902, + "eval_pred_class_0": 16570, + "eval_pred_class_1": 3098, + "eval_predicted_binding_ratio": 0.1575147447630669, + "eval_recall": 0.6791357626572073, + "eval_recall_macro": 0.809628845896721, + "eval_runtime": 0.2091, + "eval_samples_per_second": 779.644, + "eval_steps_per_second": 4.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1998 + }, + { + "epoch": 111.11111111111111, + "grad_norm": 13330.4609375, + "learning_rate": 9.552616846852138e-07, + "loss": 0.252, + "step": 2000 + }, + { + "epoch": 112.0, + "eval_accuracy": 0.899176327028676, + "eval_auc": 0.9269119888367457, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6793856103476152, + "eval_f1_macro": 0.8097842051315767, + "eval_loss": 0.2677896022796631, + "eval_pr_auc": 0.6743175064299574, + "eval_precision": 0.6812581063553826, + "eval_precision_macro": 0.8104795114507255, + "eval_pred_class_0": 16584, + "eval_pred_class_1": 3084, + "eval_predicted_binding_ratio": 0.15680292861500916, + "eval_recall": 0.6775233795549823, + "eval_recall_macro": 0.8090942786590025, + "eval_runtime": 0.2454, + "eval_samples_per_second": 664.193, + "eval_steps_per_second": 4.075, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2016 + }, + { + "epoch": 113.0, + "eval_accuracy": 0.8994813910921293, + "eval_auc": 0.927058521341044, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6812832500403031, + "eval_f1_macro": 0.8108073208521016, + "eval_loss": 0.26760444045066833, + "eval_pr_auc": 0.6746134464200654, + "eval_precision": 0.6811734364925854, + "eval_precision_macro": 0.8107666047608406, + "eval_pred_class_0": 16566, + "eval_pred_class_1": 3102, + "eval_predicted_binding_ratio": 0.15771812080536912, + "eval_recall": 0.6813930990003225, + "eval_recall_macro": 0.8108480555060766, + "eval_runtime": 0.2588, + "eval_samples_per_second": 629.901, + "eval_steps_per_second": 3.864, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2034 + }, + { + "epoch": 114.0, + "eval_accuracy": 0.8993797030709783, + "eval_auc": 0.9272620862892311, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6811664250040277, + "eval_f1_macro": 0.81071512110173, + "eval_loss": 0.267299622297287, + "eval_pr_auc": 0.6753372489001316, + "eval_precision": 0.6806181584030908, + "eval_precision_macro": 0.8105119532505733, + "eval_pred_class_0": 16562, + "eval_pred_class_1": 3106, + "eval_predicted_binding_ratio": 0.15792149684767134, + "eval_recall": 0.6817155756207675, + "eval_recall_macro": 0.8109187523785011, + "eval_runtime": 0.2924, + "eval_samples_per_second": 557.453, + "eval_steps_per_second": 3.42, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2052 + }, + { + "epoch": 115.0, + "eval_accuracy": 0.8994813910921293, + "eval_auc": 0.9274648142425077, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6817962337035248, + "eval_f1_macro": 0.8110548055574955, + "eval_loss": 0.2670327126979828, + "eval_pr_auc": 0.6759104665767571, + "eval_precision": 0.6805912596401028, + "eval_precision_macro": 0.8106085073266955, + "eval_pred_class_0": 16556, + "eval_pred_class_1": 3112, + "eval_predicted_binding_ratio": 0.15822656091112466, + "eval_recall": 0.6830054821025475, + "eval_recall_macro": 0.811503344660859, + "eval_runtime": 0.2501, + "eval_samples_per_second": 651.841, + "eval_steps_per_second": 3.999, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2070 + }, + { + "epoch": 116.0, + "eval_accuracy": 0.8995830791132805, + "eval_auc": 0.927707601161492, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6824248271426274, + "eval_f1_macro": 0.8113938913621764, + "eval_loss": 0.2667410373687744, + "eval_pr_auc": 0.676645416517246, + "eval_precision": 0.6805644644002565, + "eval_precision_macro": 0.8107051929252038, + "eval_pred_class_0": 16550, + "eval_pred_class_1": 3118, + "eval_predicted_binding_ratio": 0.15853162497457798, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.812087936943217, + "eval_runtime": 0.2676, + "eval_samples_per_second": 609.094, + "eval_steps_per_second": 3.737, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2088 + }, + { + "epoch": 117.0, + "eval_accuracy": 0.8998881431767338, + "eval_auc": 0.927944061956154, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6828796907714608, + "eval_f1_macro": 0.8117208850210732, + "eval_loss": 0.26635268330574036, + "eval_pr_auc": 0.6777081363329963, + "eval_precision": 0.6821106821106822, + "eval_precision_macro": 0.8114357758379498, + "eval_pred_class_0": 16560, + "eval_pred_class_1": 3108, + "eval_predicted_binding_ratio": 0.15802318486882244, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8120069041569002, + "eval_runtime": 0.2624, + "eval_samples_per_second": 621.137, + "eval_steps_per_second": 3.811, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2106 + }, + { + "epoch": 118.0, + "eval_accuracy": 0.9001932072401871, + "eval_auc": 0.9282739839383012, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6831315577078289, + "eval_f1_macro": 0.8119498952052617, + "eval_loss": 0.2659379541873932, + "eval_pr_auc": 0.6790830976589266, + "eval_precision": 0.6839043309631545, + "eval_precision_macro": 0.8122369488772572, + "eval_pred_class_0": 16574, + "eval_pred_class_1": 3094, + "eval_predicted_binding_ratio": 0.1573113687207647, + "eval_recall": 0.6823605288616575, + "eval_recall_macro": 0.8116637557086703, + "eval_runtime": 0.2592, + "eval_samples_per_second": 628.745, + "eval_steps_per_second": 3.857, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2124 + }, + { + "epoch": 119.0, + "eval_accuracy": 0.9001932072401871, + "eval_auc": 0.9284926050623749, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6838460299565148, + "eval_f1_macro": 0.812294615183528, + "eval_loss": 0.2656570076942444, + "eval_pr_auc": 0.6798372835892805, + "eval_precision": 0.6830759330759331, + "eval_precision_macro": 0.8120089810307202, + "eval_pred_class_0": 16560, + "eval_pred_class_1": 3108, + "eval_predicted_binding_ratio": 0.15802318486882244, + "eval_recall": 0.6846178652047726, + "eval_recall_macro": 0.8125811605253657, + "eval_runtime": 0.2617, + "eval_samples_per_second": 622.824, + "eval_steps_per_second": 3.821, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2142 + }, + { + "epoch": 120.0, + "eval_accuracy": 0.9001932072401871, + "eval_auc": 0.9287595480437707, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6841512469831054, + "eval_f1_macro": 0.8124418563951485, + "eval_loss": 0.26531943678855896, + "eval_pr_auc": 0.6808986584956077, + "eval_precision": 0.682723185613359, + "eval_precision_macro": 0.8119125170545953, + "eval_pred_class_0": 16554, + "eval_pred_class_1": 3114, + "eval_predicted_binding_ratio": 0.1583282489322758, + "eval_recall": 0.6855852950661077, + "eval_recall_macro": 0.8129743340182352, + "eval_runtime": 0.2686, + "eval_samples_per_second": 606.857, + "eval_steps_per_second": 3.723, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2160 + }, + { + "epoch": 121.0, + "eval_accuracy": 0.9005999593247915, + "eval_auc": 0.9289936341086871, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6851344822032533, + "eval_f1_macro": 0.8130595887334677, + "eval_loss": 0.26495063304901123, + "eval_pr_auc": 0.6818525990139518, + "eval_precision": 0.6843629343629344, + "eval_precision_macro": 0.8127732546210807, + "eval_pred_class_0": 16560, + "eval_pred_class_1": 3108, + "eval_predicted_binding_ratio": 0.15802318486882244, + "eval_recall": 0.6859077716865527, + "eval_recall_macro": 0.8133468356833198, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.159, + "eval_steps_per_second": 3.817, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2178 + }, + { + "epoch": 122.0, + "eval_accuracy": 0.9010067114093959, + "eval_auc": 0.9291471740122346, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6880307643005928, + "eval_f1_macro": 0.8146000626156236, + "eval_loss": 0.2648627460002899, + "eval_pr_auc": 0.6821768129155847, + "eval_precision": 0.6837579617834395, + "eval_precision_macro": 0.8130188647252145, + "eval_pred_class_0": 16528, + "eval_pred_class_1": 3140, + "eval_predicted_binding_ratio": 0.1596501932072402, + "eval_recall": 0.692357304095453, + "eval_recall_macro": 0.8162094361365779, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.373, + "eval_steps_per_second": 3.849, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2196 + }, + { + "epoch": 123.0, + "eval_accuracy": 0.9011083994305471, + "eval_auc": 0.9293803062922532, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.687851067244423, + "eval_f1_macro": 0.8145493064661928, + "eval_loss": 0.26444998383522034, + "eval_pr_auc": 0.6832369783380787, + "eval_precision": 0.6846645367412141, + "eval_precision_macro": 0.8133686693864494, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.8157455657712839, + "eval_runtime": 0.2527, + "eval_samples_per_second": 645.052, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2214 + }, + { + "epoch": 124.0, + "eval_accuracy": 0.9017185275574537, + "eval_auc": 0.929644952403895, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6885774125986789, + "eval_f1_macro": 0.815114870687036, + "eval_loss": 0.26401567459106445, + "eval_pr_auc": 0.6842971435384069, + "eval_precision": 0.6880231809401159, + "eval_precision_macro": 0.8149088251035563, + "eval_pred_class_0": 16562, + "eval_pred_class_1": 3106, + "eval_predicted_binding_ratio": 0.15792149684767134, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8153213845367371, + "eval_runtime": 0.2233, + "eval_samples_per_second": 729.933, + "eval_steps_per_second": 4.478, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2232 + }, + { + "epoch": 125.0, + "eval_accuracy": 0.9017185275574537, + "eval_auc": 0.9298876614628875, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6892782510850346, + "eval_f1_macro": 0.8154529561328843, + "eval_loss": 0.26374292373657227, + "eval_pr_auc": 0.6852698748697685, + "eval_precision": 0.6871794871794872, + "eval_precision_macro": 0.8146738625165021, + "eval_pred_class_0": 16548, + "eval_pred_class_1": 3120, + "eval_predicted_binding_ratio": 0.1586333129957291, + "eval_recall": 0.691389874234118, + "eval_recall_macro": 0.8162387893534325, + "eval_runtime": 0.2352, + "eval_samples_per_second": 692.981, + "eval_steps_per_second": 4.251, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2250 + }, + { + "epoch": 126.0, + "eval_accuracy": 0.9020235916209071, + "eval_auc": 0.9300761410376911, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6909382518043304, + "eval_f1_macro": 0.8163612439650636, + "eval_loss": 0.2635449767112732, + "eval_pr_auc": 0.685931037905785, + "eval_precision": 0.6873005743458839, + "eval_precision_macro": 0.815012329026093, + "eval_pred_class_0": 16534, + "eval_pred_class_1": 3134, + "eval_predicted_binding_ratio": 0.15934512914378687, + "eval_recall": 0.6946146404385682, + "eval_recall_macro": 0.8177304505385936, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.44, + "eval_steps_per_second": 3.88, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2268 + }, + { + "epoch": 127.0, + "eval_accuracy": 0.902837095790116, + "eval_auc": 0.9303049910181687, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6925181013676589, + "eval_f1_macro": 0.8174102840403102, + "eval_loss": 0.263118714094162, + "eval_pr_auc": 0.6869422132706717, + "eval_precision": 0.691072575465639, + "eval_precision_macro": 0.8168725206674576, + "eval_pred_class_0": 16554, + "eval_pred_class_1": 3114, + "eval_predicted_binding_ratio": 0.1583282489322758, + "eval_recall": 0.6939696871976782, + "eval_recall_macro": 0.8179512225449368, + "eval_runtime": 0.2583, + "eval_samples_per_second": 630.976, + "eval_steps_per_second": 3.871, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2286 + }, + { + "epoch": 128.0, + "eval_accuracy": 0.9027354077689648, + "eval_auc": 0.9304756990498765, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6929866795056974, + "eval_f1_macro": 0.8176004232749752, + "eval_loss": 0.2629205286502838, + "eval_pr_auc": 0.6875064110834537, + "eval_precision": 0.689776357827476, + "eval_precision_macro": 0.8164083143593783, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.6962270235407932, + "eval_recall_macro": 0.8188082664031002, + "eval_runtime": 0.2527, + "eval_samples_per_second": 644.934, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2304 + }, + { + "epoch": 129.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9306633221647718, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6937530110807772, + "eval_f1_macro": 0.818077689508494, + "eval_loss": 0.2625824213027954, + "eval_pr_auc": 0.6881187823465719, + "eval_precision": 0.690978886756238, + "eval_precision_macro": 0.8170466915947796, + "eval_pred_class_0": 16542, + "eval_pred_class_1": 3126, + "eval_predicted_binding_ratio": 0.15893837705918243, + "eval_recall": 0.6965495001612383, + "eval_recall_macro": 0.8191204071096527, + "eval_runtime": 0.2718, + "eval_samples_per_second": 599.717, + "eval_steps_per_second": 3.679, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2322 + }, + { + "epoch": 130.0, + "eval_accuracy": 0.902938783811267, + "eval_auc": 0.9308322783466673, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6933333333333334, + "eval_f1_macro": 0.817839388722781, + "eval_loss": 0.2622954547405243, + "eval_pr_auc": 0.6887035254510873, + "eval_precision": 0.6907810499359796, + "eval_precision_macro": 0.816890766747487, + "eval_pred_class_0": 16544, + "eval_pred_class_1": 3124, + "eval_predicted_binding_ratio": 0.15883668903803133, + "eval_recall": 0.6959045469203483, + "eval_recall_macro": 0.8187979304892078, + "eval_runtime": 0.2604, + "eval_samples_per_second": 625.881, + "eval_steps_per_second": 3.84, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2340 + }, + { + "epoch": 131.0, + "eval_accuracy": 0.9027354077689648, + "eval_auc": 0.9309924460820045, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6929866795056974, + "eval_f1_macro": 0.8176004232749752, + "eval_loss": 0.26203182339668274, + "eval_pr_auc": 0.6893090005690568, + "eval_precision": 0.689776357827476, + "eval_precision_macro": 0.8164083143593783, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.6962270235407932, + "eval_recall_macro": 0.8188082664031002, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.443, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2358 + }, + { + "epoch": 132.0, + "eval_accuracy": 0.902938783811267, + "eval_auc": 0.9312000694822565, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6936286310383566, + "eval_f1_macro": 0.817981812876073, + "eval_loss": 0.26177045702934265, + "eval_pr_auc": 0.6902838377634022, + "eval_precision": 0.6904153354632588, + "eval_precision_macro": 0.8167882699809945, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.6968719767816833, + "eval_recall_macro": 0.8191911039820772, + "eval_runtime": 0.2756, + "eval_samples_per_second": 591.34, + "eval_steps_per_second": 3.628, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2376 + }, + { + "epoch": 133.0, + "eval_accuracy": 0.902938783811267, + "eval_auc": 0.9314264084780033, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6933333333333334, + "eval_f1_macro": 0.817839388722781, + "eval_loss": 0.2614164650440216, + "eval_pr_auc": 0.6912123921690412, + "eval_precision": 0.6907810499359796, + "eval_precision_macro": 0.816890766747487, + "eval_pred_class_0": 16544, + "eval_pred_class_1": 3124, + "eval_predicted_binding_ratio": 0.15883668903803133, + "eval_recall": 0.6959045469203483, + "eval_recall_macro": 0.8187979304892078, + "eval_runtime": 0.2552, + "eval_samples_per_second": 638.769, + "eval_steps_per_second": 3.919, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2394 + }, + { + "epoch": 134.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9316330780933575, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6934576434656807, + "eval_f1_macro": 0.8179352236654993, + "eval_loss": 0.26109954714775085, + "eval_pr_auc": 0.6919150376493911, + "eval_precision": 0.6913461538461538, + "eval_precision_macro": 0.817149992562429, + "eval_pred_class_0": 16548, + "eval_pred_class_1": 3120, + "eval_predicted_binding_ratio": 0.1586333129957291, + "eval_recall": 0.6955820702999033, + "eval_recall_macro": 0.8187272336167832, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.557, + "eval_steps_per_second": 3.844, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2412 + }, + { + "epoch": 135.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9318176062735843, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.694733472066592, + "eval_f1_macro": 0.8185505131193367, + "eval_loss": 0.2609698474407196, + "eval_pr_auc": 0.69244594350898, + "eval_precision": 0.6897647806738716, + "eval_precision_macro": 0.8167078351983328, + "eval_pred_class_0": 16522, + "eval_pred_class_1": 3146, + "eval_predicted_binding_ratio": 0.1599552572706935, + "eval_recall": 0.6997742663656885, + "eval_recall_macro": 0.8204309854192178, + "eval_runtime": 0.2491, + "eval_samples_per_second": 654.333, + "eval_steps_per_second": 4.014, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2430 + }, + { + "epoch": 136.0, + "eval_accuracy": 0.9034472239170226, + "eval_auc": 0.9320372006475538, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6941536479304236, + "eval_f1_macro": 0.8184144035830462, + "eval_loss": 0.2604828178882599, + "eval_pr_auc": 0.6935318303087233, + "eval_precision": 0.6933719433719434, + "eval_precision_macro": 0.8181231697536046, + "eval_pred_class_0": 16560, + "eval_pred_class_1": 3108, + "eval_predicted_binding_ratio": 0.15802318486882244, + "eval_recall": 0.6949371170590132, + "eval_recall_macro": 0.8187065617889984, + "eval_runtime": 0.2485, + "eval_samples_per_second": 655.847, + "eval_steps_per_second": 4.024, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2448 + }, + { + "epoch": 137.0, + "eval_accuracy": 0.9033963799064471, + "eval_auc": 0.9321443165310757, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6960972488803583, + "eval_f1_macro": 0.8193338378363828, + "eval_loss": 0.2604370415210724, + "eval_pr_auc": 0.693643099537468, + "eval_precision": 0.6905744208187877, + "eval_precision_macro": 0.8172857573610195, + "eval_pred_class_0": 16517, + "eval_pred_class_1": 3151, + "eval_predicted_binding_ratio": 0.16020947732357127, + "eval_recall": 0.7017091260883586, + "eval_recall_macro": 0.8214285957598189, + "eval_runtime": 0.2215, + "eval_samples_per_second": 736.024, + "eval_steps_per_second": 4.515, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2466 + }, + { + "epoch": 138.0, + "eval_accuracy": 0.9034980679275981, + "eval_auc": 0.9323432098797633, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.695638229634381, + "eval_f1_macro": 0.8191484199531422, + "eval_loss": 0.2601032257080078, + "eval_pr_auc": 0.694473981068256, + "eval_precision": 0.691866028708134, + "eval_precision_macro": 0.817746962215919, + "eval_pred_class_0": 16533, + "eval_pred_class_1": 3135, + "eval_predicted_binding_ratio": 0.1593959731543624, + "eval_recall": 0.6994517897452435, + "eval_recall_macro": 0.8205715519016554, + "eval_runtime": 0.2592, + "eval_samples_per_second": 628.928, + "eval_steps_per_second": 3.858, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2484 + }, + { + "epoch": 138.88888888888889, + "grad_norm": 12954.3583984375, + "learning_rate": 9.068887706579789e-07, + "loss": 0.2385, + "step": 2500 + }, + { + "epoch": 139.0, + "eval_accuracy": 0.904311572096807, + "eval_auc": 0.9325454122780963, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6969404186795491, + "eval_f1_macro": 0.8200635197304043, + "eval_loss": 0.25969693064689636, + "eval_pr_auc": 0.6954050242581626, + "eval_precision": 0.6960437439691219, + "eval_precision_macro": 0.819729100681946, + "eval_pred_class_0": 16559, + "eval_pred_class_1": 3109, + "eval_predicted_binding_ratio": 0.158074028879398, + "eval_recall": 0.6978394066430184, + "eval_recall_macro": 0.820399150415129, + "eval_runtime": 0.2657, + "eval_samples_per_second": 613.439, + "eval_steps_per_second": 3.763, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2502 + }, + { + "epoch": 140.0, + "eval_accuracy": 0.903853976001627, + "eval_auc": 0.9326487714170208, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6971977582065653, + "eval_f1_macro": 0.8200261554019741, + "eval_loss": 0.2596379518508911, + "eval_pr_auc": 0.6956168134976223, + "eval_precision": 0.6924300254452926, + "eval_precision_macro": 0.8182556808417458, + "eval_pred_class_0": 16524, + "eval_pred_class_1": 3144, + "eval_predicted_binding_ratio": 0.15985356924954242, + "eval_recall": 0.7020316027088036, + "eval_recall_macro": 0.8218312779041694, + "eval_runtime": 0.2604, + "eval_samples_per_second": 626.028, + "eval_steps_per_second": 3.841, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2520 + }, + { + "epoch": 141.0, + "eval_accuracy": 0.904311572096807, + "eval_auc": 0.9329799294265357, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6962556488056811, + "eval_f1_macro": 0.819733135205496, + "eval_loss": 0.25904589891433716, + "eval_pr_auc": 0.6970902220334548, + "eval_precision": 0.6969305331179322, + "eval_precision_macro": 0.8199852086334245, + "eval_pred_class_0": 16573, + "eval_pred_class_1": 3095, + "eval_predicted_binding_ratio": 0.15736221273134024, + "eval_recall": 0.6955820702999033, + "eval_recall_macro": 0.8194817455984336, + "eval_runtime": 0.2524, + "eval_samples_per_second": 645.771, + "eval_steps_per_second": 3.962, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2538 + }, + { + "epoch": 142.0, + "eval_accuracy": 0.9047691681919869, + "eval_auc": 0.9330630060376336, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994061948322902, + "eval_f1_macro": 0.8214143192859533, + "eval_loss": 0.2590080201625824, + "eval_pr_auc": 0.697179333543334, + "eval_precision": 0.6961661341853035, + "eval_precision_macro": 0.8202078705755396, + "eval_pred_class_0": 16538, + "eval_pred_class_1": 3130, + "eval_predicted_binding_ratio": 0.15914175310148465, + "eval_recall": 0.7026765559496937, + "eval_recall_macro": 0.8226366421928706, + "eval_runtime": 0.2495, + "eval_samples_per_second": 653.345, + "eval_steps_per_second": 4.008, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2556 + }, + { + "epoch": 143.0, + "eval_accuracy": 0.9047691681919869, + "eval_auc": 0.9332962551076398, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991164658634538, + "eval_f1_macro": 0.8212745809731632, + "eval_loss": 0.25865858793258667, + "eval_pr_auc": 0.6981950669404262, + "eval_precision": 0.6965428937259923, + "eval_precision_macro": 0.8203156925109651, + "eval_pred_class_0": 16544, + "eval_pred_class_1": 3124, + "eval_predicted_binding_ratio": 0.15883668903803133, + "eval_recall": 0.7017091260883586, + "eval_recall_macro": 0.8222434687000011, + "eval_runtime": 0.2633, + "eval_samples_per_second": 619.149, + "eval_steps_per_second": 3.798, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2574 + }, + { + "epoch": 144.0, + "eval_accuracy": 0.9050233882448647, + "eval_auc": 0.9334749729859892, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995818591186876, + "eval_f1_macro": 0.8215887434369935, + "eval_loss": 0.2584296464920044, + "eval_pr_auc": 0.6986932637956595, + "eval_precision": 0.697786333012512, + "eval_precision_macro": 0.8209190259709409, + "eval_pred_class_0": 16551, + "eval_pred_class_1": 3117, + "eval_predicted_binding_ratio": 0.15848078096400245, + "eval_recall": 0.7013866494679136, + "eval_recall_macro": 0.8222633132653747, + "eval_runtime": 0.2769, + "eval_samples_per_second": 588.703, + "eval_steps_per_second": 3.612, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2592 + }, + { + "epoch": 145.0, + "eval_accuracy": 0.9050742322554403, + "eval_auc": 0.9336868786857826, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991136180499598, + "eval_f1_macro": 0.8213807805320277, + "eval_loss": 0.25803840160369873, + "eval_pr_auc": 0.699660351667112, + "eval_precision": 0.6987757731958762, + "eval_precision_macro": 0.8212545854629465, + "eval_pred_class_0": 16564, + "eval_pred_class_1": 3104, + "eval_predicted_binding_ratio": 0.15781980882652025, + "eval_recall": 0.6994517897452435, + "eval_recall_macro": 0.8215071467589017, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.163, + "eval_steps_per_second": 3.891, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2610 + }, + { + "epoch": 146.0, + "eval_accuracy": 0.9047183241814114, + "eval_auc": 0.9337996783486955, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995831997435076, + "eval_f1_macro": 0.8214817322060338, + "eval_loss": 0.2579362094402313, + "eval_pr_auc": 0.6999214829412527, + "eval_precision": 0.6955690149824674, + "eval_precision_macro": 0.8199882459220607, + "eval_pred_class_0": 16531, + "eval_pred_class_1": 3137, + "eval_predicted_binding_ratio": 0.15949766117551353, + "eval_recall": 0.7036439858110287, + "eval_recall_macro": 0.8229996352064741, + "eval_runtime": 0.2538, + "eval_samples_per_second": 642.317, + "eval_steps_per_second": 3.941, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2628 + }, + { + "epoch": 147.0, + "eval_accuracy": 0.9051759202765914, + "eval_auc": 0.9339161666287131, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.701647736362182, + "eval_f1_macro": 0.8226388900943448, + "eval_loss": 0.2578524649143219, + "eval_pr_auc": 0.7001081914566338, + "eval_precision": 0.6961904761904761, + "eval_precision_macro": 0.820610070399391, + "eval_pred_class_0": 16518, + "eval_pred_class_1": 3150, + "eval_predicted_binding_ratio": 0.16015863331299574, + "eval_recall": 0.7071912286359239, + "eval_recall_macro": 0.8247128956603896, + "eval_runtime": 0.2529, + "eval_samples_per_second": 644.43, + "eval_steps_per_second": 3.954, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2646 + }, + { + "epoch": 148.0, + "eval_accuracy": 0.9050233882448647, + "eval_auc": 0.9341703697689739, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995818591186876, + "eval_f1_macro": 0.8215887434369935, + "eval_loss": 0.25731131434440613, + "eval_pr_auc": 0.7012902373057504, + "eval_precision": 0.697786333012512, + "eval_precision_macro": 0.8209190259709409, + "eval_pred_class_0": 16551, + "eval_pred_class_1": 3117, + "eval_predicted_binding_ratio": 0.15848078096400245, + "eval_recall": 0.7013866494679136, + "eval_recall_macro": 0.8222633132653747, + "eval_runtime": 0.2512, + "eval_samples_per_second": 648.858, + "eval_steps_per_second": 3.981, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2664 + }, + { + "epoch": 149.0, + "eval_accuracy": 0.9052776082977425, + "eval_auc": 0.9343408636857047, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003377834968635, + "eval_f1_macro": 0.8220430425380088, + "eval_loss": 0.25706911087036133, + "eval_pr_auc": 0.7018656885391451, + "eval_precision": 0.6986521181001284, + "eval_precision_macro": 0.8214140242506442, + "eval_pred_class_0": 16552, + "eval_pred_class_1": 3116, + "eval_predicted_binding_ratio": 0.1584299369534269, + "eval_recall": 0.7020316027088036, + "eval_recall_macro": 0.8226763313236177, + "eval_runtime": 0.2513, + "eval_samples_per_second": 648.672, + "eval_steps_per_second": 3.98, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2682 + }, + { + "epoch": 150.0, + "eval_accuracy": 0.9054301403294692, + "eval_auc": 0.9345392996069414, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7006758931445124, + "eval_f1_macro": 0.822259931959612, + "eval_loss": 0.2567782402038574, + "eval_pr_auc": 0.7026441101697649, + "eval_precision": 0.6993254095727593, + "eval_precision_macro": 0.8217557280421937, + "eval_pred_class_0": 16555, + "eval_pred_class_1": 3113, + "eval_predicted_binding_ratio": 0.15827740492170023, + "eval_recall": 0.7020316027088036, + "eval_recall_macro": 0.8227668727614157, + "eval_runtime": 0.2591, + "eval_samples_per_second": 629.188, + "eval_steps_per_second": 3.86, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2700 + }, + { + "epoch": 151.0, + "eval_accuracy": 0.9051250762660159, + "eval_auc": 0.934685296823797, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003853564547207, + "eval_f1_macro": 0.8220121780461352, + "eval_loss": 0.25664329528808594, + "eval_pr_auc": 0.7029644506395569, + "eval_precision": 0.6974736168851935, + "eval_precision_macro": 0.8209271234175075, + "eval_pred_class_0": 16541, + "eval_pred_class_1": 3127, + "eval_predicted_binding_ratio": 0.158989221069758, + "eval_recall": 0.7033215091905837, + "eval_recall_macro": 0.8231100212096456, + "eval_runtime": 0.2408, + "eval_samples_per_second": 676.856, + "eval_steps_per_second": 4.152, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2718 + }, + { + "epoch": 152.0, + "eval_accuracy": 0.9055826723611958, + "eval_auc": 0.9349076649599691, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7008216529724505, + "eval_f1_macro": 0.8223840221758023, + "eval_loss": 0.25626465678215027, + "eval_pr_auc": 0.7040942732775656, + "eval_precision": 0.7002575660012879, + "eval_precision_macro": 0.82217322207805, + "eval_pred_class_0": 16562, + "eval_pred_class_1": 3106, + "eval_predicted_binding_ratio": 0.15792149684767134, + "eval_recall": 0.7013866494679136, + "eval_recall_macro": 0.8225952985373008, + "eval_runtime": 0.2564, + "eval_samples_per_second": 635.699, + "eval_steps_per_second": 3.9, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2736 + }, + { + "epoch": 153.0, + "eval_accuracy": 0.905226764287167, + "eval_auc": 0.9350594919437002, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7014734144778988, + "eval_f1_macro": 0.8225728005545544, + "eval_loss": 0.2562600076198578, + "eval_pr_auc": 0.7046022469135804, + "eval_precision": 0.6967865097041044, + "eval_precision_macro": 0.8208289583316286, + "eval_pred_class_0": 16525, + "eval_pred_class_1": 3143, + "eval_predicted_binding_ratio": 0.15980272523896685, + "eval_recall": 0.7062237987745889, + "eval_recall_macro": 0.8243499026467862, + "eval_runtime": 0.2358, + "eval_samples_per_second": 691.279, + "eval_steps_per_second": 4.241, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2754 + }, + { + "epoch": 154.0, + "eval_accuracy": 0.9054809843400448, + "eval_auc": 0.9352728672508359, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7010773436243769, + "eval_f1_macro": 0.8224715159707777, + "eval_loss": 0.25581732392311096, + "eval_pr_auc": 0.7056629719926804, + "eval_precision": 0.6991661321359846, + "eval_precision_macro": 0.821758292654095, + "eval_pred_class_0": 16550, + "eval_pred_class_1": 3118, + "eval_predicted_binding_ratio": 0.15853162497457798, + "eval_recall": 0.7029990325701386, + "eval_recall_macro": 0.8231902267335512, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.5, + "eval_steps_per_second": 3.844, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2772 + }, + { + "epoch": 155.0, + "eval_accuracy": 0.9057860484034981, + "eval_auc": 0.9354356530283926, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7025204687750842, + "eval_f1_macro": 0.8232752161134611, + "eval_loss": 0.25560733675956726, + "eval_pr_auc": 0.7062858637533224, + "eval_precision": 0.6994884910485933, + "eval_precision_macro": 0.8221444873622652, + "eval_pred_class_0": 16540, + "eval_pred_class_1": 3128, + "eval_predicted_binding_ratio": 0.15904006508033353, + "eval_recall": 0.7055788455336988, + "eval_recall_macro": 0.8244197722567993, + "eval_runtime": 0.2671, + "eval_samples_per_second": 610.27, + "eval_steps_per_second": 3.744, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2790 + }, + { + "epoch": 156.0, + "eval_accuracy": 0.9058877364246491, + "eval_auc": 0.9356189159837551, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7028415475999358, + "eval_f1_macro": 0.8234659606184656, + "eval_loss": 0.2553412616252899, + "eval_pr_auc": 0.7070636650718337, + "eval_precision": 0.6998081841432225, + "eval_precision_macro": 0.8223345636556499, + "eval_pred_class_0": 16540, + "eval_pred_class_1": 3128, + "eval_predicted_binding_ratio": 0.15904006508033353, + "eval_recall": 0.7059013221541438, + "eval_recall_macro": 0.8246111910462879, + "eval_runtime": 0.2539, + "eval_samples_per_second": 642.017, + "eval_steps_per_second": 3.939, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2808 + }, + { + "epoch": 157.0, + "eval_accuracy": 0.9061419564775269, + "eval_auc": 0.9357918624902231, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7031199742682535, + "eval_f1_macro": 0.8236899466727462, + "eval_loss": 0.25509998202323914, + "eval_pr_auc": 0.7077594222055618, + "eval_precision": 0.7013153673403913, + "eval_precision_macro": 0.8230158493399438, + "eval_pred_class_0": 16551, + "eval_pred_class_1": 3117, + "eval_predicted_binding_ratio": 0.15848078096400245, + "eval_recall": 0.7049338922928088, + "eval_recall_macro": 0.8243689199497484, + "eval_runtime": 0.255, + "eval_samples_per_second": 639.309, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2826 + }, + { + "epoch": 158.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9359727409833409, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7034593724859212, + "eval_f1_macro": 0.8239074586532139, + "eval_loss": 0.2548539340496063, + "eval_pr_auc": 0.7084539339673516, + "eval_precision": 0.7019910083493899, + "eval_precision_macro": 0.8233586792381238, + "eval_pred_class_0": 16554, + "eval_pred_class_1": 3114, + "eval_predicted_binding_ratio": 0.1583282489322758, + "eval_recall": 0.7049338922928088, + "eval_recall_macro": 0.8244594613875464, + "eval_runtime": 0.2566, + "eval_samples_per_second": 635.242, + "eval_steps_per_second": 3.897, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2844 + }, + { + "epoch": 159.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9360812778117108, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7043380822794941, + "eval_f1_macro": 0.8242594639388645, + "eval_loss": 0.2548294961452484, + "eval_pr_auc": 0.708809734771939, + "eval_precision": 0.6993006993006993, + "eval_precision_macro": 0.822383674913635, + "eval_pred_class_0": 16522, + "eval_pred_class_1": 3146, + "eval_predicted_binding_ratio": 0.1599552572706935, + "eval_recall": 0.709448564979039, + "eval_recall_macro": 0.8261735491038733, + "eval_runtime": 0.27, + "eval_samples_per_second": 603.758, + "eval_steps_per_second": 3.704, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2862 + }, + { + "epoch": 160.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9362697476540152, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7040308334671591, + "eval_f1_macro": 0.82418310527748, + "eval_loss": 0.25451889634132385, + "eval_pr_auc": 0.70979034812957, + "eval_precision": 0.7012156110044786, + "eval_precision_macro": 0.8231322886360805, + "eval_pred_class_0": 16542, + "eval_pred_class_1": 3126, + "eval_predicted_binding_ratio": 0.15893837705918243, + "eval_recall": 0.7068687520154788, + "eval_recall_macro": 0.8252458083732854, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.302, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2880 + }, + { + "epoch": 161.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9364340127714132, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7047863796980405, + "eval_f1_macro": 0.824637224883753, + "eval_loss": 0.25425252318382263, + "eval_pr_auc": 0.7104452483887299, + "eval_precision": 0.70208, + "eval_precision_macro": 0.8236265925164723, + "eval_pred_class_0": 16543, + "eval_pred_class_1": 3125, + "eval_predicted_binding_ratio": 0.15888753304860687, + "eval_recall": 0.7075137052563689, + "eval_recall_macro": 0.8256588264315284, + "eval_runtime": 0.2609, + "eval_samples_per_second": 624.767, + "eval_steps_per_second": 3.833, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2898 + }, + { + "epoch": 162.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9366581522223957, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7045965927354548, + "eval_f1_macro": 0.8245456841795291, + "eval_loss": 0.25392982363700867, + "eval_pr_auc": 0.7114160919166963, + "eval_precision": 0.7023389939122077, + "eval_precision_macro": 0.8237022823552698, + "eval_pred_class_0": 16547, + "eval_pred_class_1": 3121, + "eval_predicted_binding_ratio": 0.15868415700630464, + "eval_recall": 0.7068687520154788, + "eval_recall_macro": 0.8253967107696154, + "eval_runtime": 0.2487, + "eval_samples_per_second": 655.473, + "eval_steps_per_second": 4.021, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2916 + }, + { + "epoch": 163.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.936698911928028, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7069544364508393, + "eval_f1_macro": 0.8257724934589374, + "eval_loss": 0.25399070978164673, + "eval_pr_auc": 0.7113652786898896, + "eval_precision": 0.7010145846544071, + "eval_precision_macro": 0.8235604593370134, + "eval_pred_class_0": 16514, + "eval_pred_class_1": 3154, + "eval_predicted_binding_ratio": 0.16036200935529796, + "eval_recall": 0.7129958078039342, + "eval_recall_macro": 0.8280377119541189, + "eval_runtime": 0.2519, + "eval_samples_per_second": 647.019, + "eval_steps_per_second": 3.969, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2934 + }, + { + "epoch": 164.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9369248810888143, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7062780269058296, + "eval_f1_macro": 0.8254283885284618, + "eval_loss": 0.25362086296081543, + "eval_pr_auc": 0.7123203296069245, + "eval_precision": 0.7015590200445434, + "eval_precision_macro": 0.8236690712930735, + "eval_pred_class_0": 16525, + "eval_pred_class_1": 3143, + "eval_predicted_binding_ratio": 0.15980272523896685, + "eval_recall": 0.7110609480812641, + "eval_recall_macro": 0.827221184489114, + "eval_runtime": 0.261, + "eval_samples_per_second": 624.444, + "eval_steps_per_second": 3.831, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2952 + }, + { + "epoch": 165.0, + "eval_accuracy": 0.9072096806996136, + "eval_auc": 0.9371562809840187, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7070155723230053, + "eval_f1_macro": 0.8259456391835222, + "eval_loss": 0.2532632350921631, + "eval_pr_auc": 0.7133697274093473, + "eval_precision": 0.7039641943734015, + "eval_precision_macro": 0.8248055554696512, + "eval_pred_class_0": 16540, + "eval_pred_class_1": 3128, + "eval_predicted_binding_ratio": 0.15904006508033353, + "eval_recall": 0.710093518219929, + "eval_recall_macro": 0.8270996353096386, + "eval_runtime": 0.2545, + "eval_samples_per_second": 640.417, + "eval_steps_per_second": 3.929, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2970 + }, + { + "epoch": 166.0, + "eval_accuracy": 0.9071588366890381, + "eval_auc": 0.9372476886142239, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7072779737095223, + "eval_f1_macro": 0.8260542385315996, + "eval_loss": 0.2531469762325287, + "eval_pr_auc": 0.7136781835058345, + "eval_precision": 0.7032196365954734, + "eval_precision_macro": 0.8245394656269969, + "eval_pred_class_0": 16531, + "eval_pred_class_1": 3137, + "eval_predicted_binding_ratio": 0.15949766117551353, + "eval_recall": 0.7113834247017091, + "eval_recall_macro": 0.8275936861541986, + "eval_runtime": 0.2653, + "eval_samples_per_second": 614.378, + "eval_steps_per_second": 3.769, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2988 + }, + { + "epoch": 166.66666666666666, + "grad_norm": 14056.4111328125, + "learning_rate": 8.432618494003656e-07, + "loss": 0.2279, + "step": 3000 + }, + { + "epoch": 167.0, + "eval_accuracy": 0.9072605247101891, + "eval_auc": 0.9373718655684177, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7077859660365268, + "eval_f1_macro": 0.8263351175441593, + "eval_loss": 0.25298377871513367, + "eval_pr_auc": 0.7140728736694715, + "eval_precision": 0.7032792104425343, + "eval_precision_macro": 0.8246534613355044, + "eval_pred_class_0": 16527, + "eval_pred_class_1": 3141, + "eval_predicted_binding_ratio": 0.15970103721781573, + "eval_recall": 0.7123508545630441, + "eval_recall_macro": 0.8280472206056, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.943, + "eval_steps_per_second": 3.773, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3006 + }, + { + "epoch": 168.0, + "eval_accuracy": 0.9075655887736425, + "eval_auc": 0.9374749229998747, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7094918504314478, + "eval_f1_macro": 0.8272654245808608, + "eval_loss": 0.25285741686820984, + "eval_pr_auc": 0.7143683695359583, + "eval_precision": 0.7031992397846056, + "eval_precision_macro": 0.8249204363177162, + "eval_pred_class_0": 16511, + "eval_pred_class_1": 3157, + "eval_predicted_binding_ratio": 0.16051454138702462, + "eval_recall": 0.7158980973879394, + "eval_recall_macro": 0.8296699396217176, + "eval_runtime": 0.2383, + "eval_samples_per_second": 683.908, + "eval_steps_per_second": 4.196, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3024 + }, + { + "epoch": 169.0, + "eval_accuracy": 0.9076672767947935, + "eval_auc": 0.9377269168628721, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7087876844130853, + "eval_f1_macro": 0.8269618180373584, + "eval_loss": 0.25237372517585754, + "eval_pr_auc": 0.7154735567799367, + "eval_precision": 0.7049441786283892, + "eval_precision_macro": 0.8255259815297635, + "eval_pred_class_0": 16533, + "eval_pred_class_1": 3135, + "eval_predicted_binding_ratio": 0.1593959731543624, + "eval_recall": 0.7126733311834892, + "eval_recall_macro": 0.8284197222706846, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.364, + "eval_steps_per_second": 3.843, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3042 + }, + { + "epoch": 170.0, + "eval_accuracy": 0.9079214968476713, + "eval_auc": 0.9378333125414714, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7099151049175076, + "eval_f1_macro": 0.8275952704051472, + "eval_loss": 0.2522483766078949, + "eval_pr_auc": 0.7157732693940176, + "eval_precision": 0.7052832590706556, + "eval_precision_macro": 0.8258656401852128, + "eval_pred_class_0": 16526, + "eval_pred_class_1": 3142, + "eval_predicted_binding_ratio": 0.1597518812283913, + "eval_recall": 0.7146081909061593, + "eval_recall_macro": 0.8293569716527538, + "eval_runtime": 0.2212, + "eval_samples_per_second": 736.855, + "eval_steps_per_second": 4.521, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3060 + }, + { + "epoch": 171.0, + "eval_accuracy": 0.9079214968476713, + "eval_auc": 0.9379969158489405, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7098221438871976, + "eval_f1_macro": 0.8275504434498686, + "eval_loss": 0.25200438499450684, + "eval_pr_auc": 0.7163721445757975, + "eval_precision": 0.7054140127388535, + "eval_precision_macro": 0.8259040054013725, + "eval_pred_class_0": 16528, + "eval_pred_class_1": 3140, + "eval_predicted_binding_ratio": 0.1596501932072402, + "eval_recall": 0.7142857142857143, + "eval_recall_macro": 0.8292259138217972, + "eval_runtime": 0.2637, + "eval_samples_per_second": 618.089, + "eval_steps_per_second": 3.792, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3078 + }, + { + "epoch": 172.0, + "eval_accuracy": 0.9083282489322758, + "eval_auc": 0.9382153715205318, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7102683593122289, + "eval_f1_macro": 0.8279092226905722, + "eval_loss": 0.25164341926574707, + "eval_pr_auc": 0.7173810220120935, + "eval_precision": 0.7078795643818065, + "eval_precision_macro": 0.8270148456503497, + "eval_pred_class_0": 16546, + "eval_pred_class_1": 3122, + "eval_predicted_binding_ratio": 0.1587350010168802, + "eval_recall": 0.7126733311834892, + "eval_recall_macro": 0.8288120685011429, + "eval_runtime": 0.2387, + "eval_samples_per_second": 682.942, + "eval_steps_per_second": 4.19, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3096 + }, + { + "epoch": 173.0, + "eval_accuracy": 0.9084807809640024, + "eval_auc": 0.9383315970230777, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.711168164313222, + "eval_f1_macro": 0.8283970352740591, + "eval_loss": 0.2515103816986084, + "eval_pr_auc": 0.7177908159595219, + "eval_precision": 0.7077610986905142, + "eval_precision_macro": 0.8271223707155178, + "eval_pred_class_0": 16537, + "eval_pred_class_1": 3131, + "eval_predicted_binding_ratio": 0.15919259711206019, + "eval_recall": 0.7146081909061593, + "eval_recall_macro": 0.8296889569246799, + "eval_runtime": 0.2615, + "eval_samples_per_second": 623.31, + "eval_steps_per_second": 3.824, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3114 + }, + { + "epoch": 174.0, + "eval_accuracy": 0.9088366890380313, + "eval_auc": 0.9384487568455233, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7133493205435651, + "eval_f1_macro": 0.8295745121505045, + "eval_loss": 0.25142860412597656, + "eval_pr_auc": 0.7181535155231535, + "eval_precision": 0.7073557387444515, + "eval_precision_macro": 0.8273365831908039, + "eval_pred_class_0": 16514, + "eval_pred_class_1": 3154, + "eval_predicted_binding_ratio": 0.16036200935529796, + "eval_recall": 0.7194453402128346, + "eval_recall_macro": 0.8318660877438894, + "eval_runtime": 0.27, + "eval_samples_per_second": 603.694, + "eval_steps_per_second": 3.704, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3132 + }, + { + "epoch": 175.0, + "eval_accuracy": 0.9090400650803335, + "eval_auc": 0.9386484385266265, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7135308246597278, + "eval_f1_macro": 0.8297338931856857, + "eval_loss": 0.2510823905467987, + "eval_pr_auc": 0.7190368898285651, + "eval_precision": 0.7086513994910941, + "eval_precision_macro": 0.8279095777411898, + "eval_pred_class_0": 16524, + "eval_pred_class_1": 3144, + "eval_predicted_binding_ratio": 0.15985356924954242, + "eval_recall": 0.7184779103514995, + "eval_recall_macro": 0.8315936361680839, + "eval_runtime": 0.2642, + "eval_samples_per_second": 616.851, + "eval_steps_per_second": 3.784, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3150 + }, + { + "epoch": 176.0, + "eval_accuracy": 0.9090909090909091, + "eval_auc": 0.9388042461024309, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7136450992953235, + "eval_f1_macro": 0.8298069567551197, + "eval_loss": 0.25084683299064636, + "eval_pr_auc": 0.7196986972872739, + "eval_precision": 0.7088768692332167, + "eval_precision_macro": 0.8280239111672891, + "eval_pred_class_0": 16525, + "eval_pred_class_1": 3143, + "eval_predicted_binding_ratio": 0.15980272523896685, + "eval_recall": 0.7184779103514995, + "eval_recall_macro": 0.83162381664735, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.711, + "eval_steps_per_second": 3.869, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3168 + }, + { + "epoch": 177.0, + "eval_accuracy": 0.9093451291437868, + "eval_auc": 0.9389100189010969, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7144001281435207, + "eval_f1_macro": 0.8302608322100373, + "eval_loss": 0.25058600306510925, + "eval_pr_auc": 0.7201232901620662, + "eval_precision": 0.7097390197326544, + "eval_precision_macro": 0.8285170954889824, + "eval_pred_class_0": 16526, + "eval_pred_class_1": 3142, + "eval_predicted_binding_ratio": 0.1597518812283913, + "eval_recall": 0.7191228635923895, + "eval_recall_macro": 0.832036834705593, + "eval_runtime": 0.2429, + "eval_samples_per_second": 671.088, + "eval_steps_per_second": 4.117, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3186 + }, + { + "epoch": 178.0, + "eval_accuracy": 0.9091417531014846, + "eval_auc": 0.9390935349014322, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.713392141138733, + "eval_f1_macro": 0.8297029283682245, + "eval_loss": 0.2502758800983429, + "eval_pr_auc": 0.7210120001490467, + "eval_precision": 0.7096362476068921, + "eval_precision_macro": 0.8282970157836081, + "eval_pred_class_0": 16534, + "eval_pred_class_1": 3134, + "eval_predicted_binding_ratio": 0.15934512914378687, + "eval_recall": 0.7171880038697195, + "eval_recall_macro": 0.83112976580279, + "eval_runtime": 0.27, + "eval_samples_per_second": 603.763, + "eval_steps_per_second": 3.704, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3204 + }, + { + "epoch": 179.0, + "eval_accuracy": 0.9097518812283913, + "eval_auc": 0.939160767004228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7162270183852918, + "eval_f1_macro": 0.8312854205617097, + "eval_loss": 0.2502012550830841, + "eval_pr_auc": 0.7211443774602971, + "eval_precision": 0.7102092580849715, + "eval_precision_macro": 0.8290358389250096, + "eval_pred_class_0": 16514, + "eval_pred_class_1": 3154, + "eval_predicted_binding_ratio": 0.16036200935529796, + "eval_recall": 0.7223476297968398, + "eval_recall_macro": 0.8335888568492861, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.383, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3222 + }, + { + "epoch": 180.0, + "eval_accuracy": 0.9099552572706935, + "eval_auc": 0.9392979952395233, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7171378374061651, + "eval_f1_macro": 0.8317964319305957, + "eval_loss": 0.2500424385070801, + "eval_pr_auc": 0.7216701546304439, + "eval_precision": 0.7104430379746836, + "eval_precision_macro": 0.8292946956289701, + "eval_pred_class_0": 16508, + "eval_pred_class_1": 3160, + "eval_predicted_binding_ratio": 0.16066707341875128, + "eval_recall": 0.7239600128990649, + "eval_recall_macro": 0.8343648679211326, + "eval_runtime": 0.2706, + "eval_samples_per_second": 602.263, + "eval_steps_per_second": 3.695, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3240 + }, + { + "epoch": 181.0, + "eval_accuracy": 0.910006101281269, + "eval_auc": 0.9394494621207929, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7173427020121367, + "eval_f1_macro": 0.8319131723763289, + "eval_loss": 0.24980410933494568, + "eval_pr_auc": 0.722382187322872, + "eval_precision": 0.7105346409364125, + "eval_precision_macro": 0.8293692166334694, + "eval_pred_class_0": 16507, + "eval_pred_class_1": 3161, + "eval_predicted_binding_ratio": 0.1607179174293268, + "eval_recall": 0.7242824895195098, + "eval_recall_macro": 0.8345261062313551, + "eval_runtime": 0.2696, + "eval_samples_per_second": 604.516, + "eval_steps_per_second": 3.709, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3258 + }, + { + "epoch": 182.0, + "eval_accuracy": 0.9102094773235713, + "eval_auc": 0.9395796926893379, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7179814755669115, + "eval_f1_macro": 0.8322930296138966, + "eval_loss": 0.2495713084936142, + "eval_pr_auc": 0.7229232947500771, + "eval_precision": 0.7111673521037646, + "eval_precision_macro": 0.8297461525769929, + "eval_pred_class_0": 16507, + "eval_pred_class_1": 3161, + "eval_predicted_binding_ratio": 0.1607179174293268, + "eval_recall": 0.7249274427603999, + "eval_recall_macro": 0.8349089438103321, + "eval_runtime": 0.2615, + "eval_samples_per_second": 623.403, + "eval_steps_per_second": 3.825, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3276 + }, + { + "epoch": 183.0, + "eval_accuracy": 0.910362009355298, + "eval_auc": 0.9396445500623882, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7187749242303397, + "eval_f1_macro": 0.8327294647159501, + "eval_loss": 0.2494269460439682, + "eval_pr_auc": 0.7232071190390562, + "eval_precision": 0.7111742424242424, + "eval_precision_macro": 0.8298901515151516, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.726539825862625, + "eval_recall_macro": 0.8356547744029127, + "eval_runtime": 0.2535, + "eval_samples_per_second": 643.015, + "eval_steps_per_second": 3.945, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3294 + }, + { + "epoch": 184.0, + "eval_accuracy": 0.9106670734187513, + "eval_auc": 0.9397990826808293, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7191945021575835, + "eval_f1_macro": 0.8330396163256251, + "eval_loss": 0.24917152523994446, + "eval_pr_auc": 0.7237798760580164, + "eval_precision": 0.7129277566539924, + "eval_precision_macro": 0.830694740730097, + "eval_pred_class_0": 16512, + "eval_pred_class_1": 3156, + "eval_predicted_binding_ratio": 0.16046369737644905, + "eval_recall": 0.7255723960012899, + "eval_recall_macro": 0.8354426837856392, + "eval_runtime": 0.2605, + "eval_samples_per_second": 625.837, + "eval_steps_per_second": 3.839, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3312 + }, + { + "epoch": 185.0, + "eval_accuracy": 0.9110738255033557, + "eval_auc": 0.9399604475135382, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7199359487590072, + "eval_f1_macro": 0.8335408491792982, + "eval_loss": 0.24888525903224945, + "eval_pr_auc": 0.7244599264333298, + "eval_precision": 0.7150127226463104, + "eval_precision_macro": 0.8316954196625403, + "eval_pred_class_0": 16524, + "eval_pred_class_1": 3144, + "eval_predicted_binding_ratio": 0.15985356924954242, + "eval_recall": 0.7249274427603999, + "eval_recall_macro": 0.8354220119578544, + "eval_runtime": 0.2549, + "eval_samples_per_second": 639.342, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3330 + }, + { + "epoch": 186.0, + "eval_accuracy": 0.9109721374822046, + "eval_auc": 0.9401063571379032, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7194359878224643, + "eval_f1_macro": 0.8332638467590944, + "eval_loss": 0.2486649453639984, + "eval_pr_auc": 0.7249907062273525, + "eval_precision": 0.714968152866242, + "eval_precision_macro": 0.8315886262879129, + "eval_pred_class_0": 16528, + "eval_pred_class_1": 3140, + "eval_predicted_binding_ratio": 0.1596501932072402, + "eval_recall": 0.7239600128990649, + "eval_recall_macro": 0.8349684775064528, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.8, + "eval_steps_per_second": 3.944, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3348 + }, + { + "epoch": 187.0, + "eval_accuracy": 0.9112263575350824, + "eval_auc": 0.9402670406956853, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7203715566944267, + "eval_f1_macro": 0.8338047799185901, + "eval_loss": 0.24845102429389954, + "eval_pr_auc": 0.7255585294747666, + "eval_precision": 0.7155583837098314, + "eval_precision_macro": 0.832000069313312, + "eval_pred_class_0": 16525, + "eval_pred_class_1": 3143, + "eval_predicted_binding_ratio": 0.15980272523896685, + "eval_recall": 0.7252499193808449, + "eval_recall_macro": 0.8356436112266088, + "eval_runtime": 0.2397, + "eval_samples_per_second": 680.081, + "eval_steps_per_second": 4.172, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3366 + }, + { + "epoch": 188.0, + "eval_accuracy": 0.9114805775879601, + "eval_auc": 0.9404174175370716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7215736446505677, + "eval_f1_macro": 0.8344742146415792, + "eval_loss": 0.24825866520404816, + "eval_pr_auc": 0.7262464197023197, + "eval_precision": 0.7157360406091371, + "eval_precision_macro": 0.8322867657635175, + "eval_pred_class_0": 16516, + "eval_pred_class_1": 3152, + "eval_predicted_binding_ratio": 0.16026032133414683, + "eval_recall": 0.72750725572396, + "eval_recall_macro": 0.8367119184396343, + "eval_runtime": 0.2593, + "eval_samples_per_second": 628.496, + "eval_steps_per_second": 3.856, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3384 + }, + { + "epoch": 189.0, + "eval_accuracy": 0.9114297335773845, + "eval_auc": 0.9404972726910148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7223461906279885, + "eval_f1_macro": 0.8348286515416876, + "eval_loss": 0.24819281697273254, + "eval_pr_auc": 0.7264794032375063, + "eval_precision": 0.7141506460762685, + "eval_precision_macro": 0.8317646228259488, + "eval_pred_class_0": 16495, + "eval_pred_class_1": 3173, + "eval_predicted_binding_ratio": 0.16132804555623348, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.8379923162699333, + "eval_runtime": 0.2478, + "eval_samples_per_second": 657.696, + "eval_steps_per_second": 4.035, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3402 + }, + { + "epoch": 190.0, + "eval_accuracy": 0.9116839536302623, + "eval_auc": 0.940656301723974, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.722922316158877, + "eval_f1_macro": 0.8351963018783921, + "eval_loss": 0.24791164696216583, + "eval_pr_auc": 0.7271914277283732, + "eval_precision": 0.7152777777777778, + "eval_precision_macro": 0.8323358585858586, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.8381432186662634, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.211, + "eval_steps_per_second": 3.866, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3420 + }, + { + "epoch": 191.0, + "eval_accuracy": 0.9118873296725646, + "eval_auc": 0.9407912914845091, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7233838786911413, + "eval_f1_macro": 0.8354907358742514, + "eval_loss": 0.24768619239330292, + "eval_pr_auc": 0.727892471269696, + "eval_precision": 0.7161820480404552, + "eval_precision_macro": 0.8327941262984632, + "eval_pred_class_0": 16504, + "eval_pred_class_1": 3164, + "eval_predicted_binding_ratio": 0.1608704494610535, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.8382639405833274, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.424, + "eval_steps_per_second": 3.935, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3438 + }, + { + "epoch": 192.0, + "eval_accuracy": 0.9119381736831401, + "eval_auc": 0.9409245683252279, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7240280433397068, + "eval_f1_macro": 0.8358192243316804, + "eval_loss": 0.2475385069847107, + "eval_pr_auc": 0.7283869598485145, + "eval_precision": 0.7155905511811024, + "eval_precision_macro": 0.832663401462133, + "eval_pred_class_0": 16493, + "eval_pred_class_1": 3175, + "eval_predicted_binding_ratio": 0.16142973357738458, + "eval_recall": 0.7326668816510803, + "eval_recall_macro": 0.8390804680483324, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.682, + "eval_steps_per_second": 3.943, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3456 + }, + { + "epoch": 193.0, + "eval_accuracy": 0.9120398617042912, + "eval_auc": 0.9410766288889338, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7232885476647473, + "eval_f1_macro": 0.8354987049773379, + "eval_loss": 0.24721089005470276, + "eval_pr_auc": 0.7290279509427341, + "eval_precision": 0.7175499841320215, + "eval_precision_macro": 0.8333466455139735, + "eval_pred_class_0": 16517, + "eval_pred_class_1": 3151, + "eval_predicted_binding_ratio": 0.16020947732357127, + "eval_recall": 0.7291196388261851, + "eval_recall_macro": 0.837699192866343, + "eval_runtime": 0.2594, + "eval_samples_per_second": 628.255, + "eval_steps_per_second": 3.854, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3474 + }, + { + "epoch": 194.0, + "eval_accuracy": 0.9122432377465934, + "eval_auc": 0.9411616325348253, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7239283429302623, + "eval_f1_macro": 0.8358790547924193, + "eval_loss": 0.2470986247062683, + "eval_pr_auc": 0.7293209780794321, + "eval_precision": 0.7181847032688036, + "eval_precision_macro": 0.8337245487646312, + "eval_pred_class_0": 16517, + "eval_pred_class_1": 3151, + "eval_predicted_binding_ratio": 0.16020947732357127, + "eval_recall": 0.7297645920670751, + "eval_recall_macro": 0.83808203044532, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.922, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3492 + }, + { + "epoch": 194.44444444444446, + "grad_norm": 15854.8017578125, + "learning_rate": 7.667662546617938e-07, + "loss": 0.2185, + "step": 3500 + }, + { + "epoch": 195.0, + "eval_accuracy": 0.9121923937360179, + "eval_auc": 0.9412192294636534, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7248685677871595, + "eval_f1_macro": 0.8363143165624445, + "eval_loss": 0.2470363825559616, + "eval_pr_auc": 0.7294473908430833, + "eval_precision": 0.7163098236775819, + "eval_precision_macro": 0.8331124670170592, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7336343115124153, + "eval_recall_macro": 0.839624543937532, + "eval_runtime": 0.2532, + "eval_samples_per_second": 643.796, + "eval_steps_per_second": 3.95, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3510 + }, + { + "epoch": 196.0, + "eval_accuracy": 0.9123449257677445, + "eval_auc": 0.9413068608842632, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7252151737328658, + "eval_f1_macro": 0.8365353589310388, + "eval_loss": 0.2468923032283783, + "eval_pr_auc": 0.7297206006779651, + "eval_precision": 0.7169870784746297, + "eval_precision_macro": 0.8334556489675362, + "eval_pred_class_0": 16495, + "eval_pred_class_1": 3173, + "eval_predicted_binding_ratio": 0.16132804555623348, + "eval_recall": 0.7336343115124153, + "eval_recall_macro": 0.83971508537533, + "eval_runtime": 0.2673, + "eval_samples_per_second": 609.86, + "eval_steps_per_second": 3.741, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3528 + }, + { + "epoch": 197.0, + "eval_accuracy": 0.9124466137888957, + "eval_auc": 0.9415050729580239, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7246562200191877, + "eval_f1_macro": 0.8363018721763311, + "eval_loss": 0.2465437948703766, + "eval_pr_auc": 0.7306168212570879, + "eval_precision": 0.7186806216301934, + "eval_precision_macro": 0.8340602623742853, + "eval_pred_class_0": 16515, + "eval_pred_class_1": 3153, + "eval_predicted_binding_ratio": 0.1603111653447224, + "eval_recall": 0.7307320219284101, + "eval_recall_macro": 0.8385959258552536, + "eval_runtime": 0.2512, + "eval_samples_per_second": 648.769, + "eval_steps_per_second": 3.98, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3546 + }, + { + "epoch": 198.0, + "eval_accuracy": 0.9127008338417735, + "eval_auc": 0.9416286951597772, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7254996003197443, + "eval_f1_macro": 0.836798347664482, + "eval_loss": 0.24635502696037292, + "eval_pr_auc": 0.7311574695224861, + "eval_precision": 0.7194039315155358, + "eval_precision_macro": 0.8345112185130059, + "eval_pred_class_0": 16514, + "eval_pred_class_1": 3154, + "eval_predicted_binding_ratio": 0.16036200935529796, + "eval_recall": 0.7316994517897453, + "eval_recall_macro": 0.8391400017444531, + "eval_runtime": 0.2567, + "eval_samples_per_second": 635.086, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3564 + }, + { + "epoch": 199.0, + "eval_accuracy": 0.9126499898311979, + "eval_auc": 0.941704083096699, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7255591054313099, + "eval_f1_macro": 0.8368090605158727, + "eval_loss": 0.24623039364814758, + "eval_pr_auc": 0.7315041353273113, + "eval_precision": 0.7188983855650523, + "eval_precision_macro": 0.8343113891602595, + "eval_pred_class_0": 16509, + "eval_pred_class_1": 3159, + "eval_predicted_binding_ratio": 0.16061622940817571, + "eval_recall": 0.7323444050306352, + "eval_recall_macro": 0.8393719369271001, + "eval_runtime": 0.2608, + "eval_samples_per_second": 625.096, + "eval_steps_per_second": 3.835, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3582 + }, + { + "epoch": 200.0, + "eval_accuracy": 0.912751677852349, + "eval_auc": 0.9417882886776758, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7266645428480408, + "eval_f1_macro": 0.8373778882187448, + "eval_loss": 0.24614199995994568, + "eval_pr_auc": 0.7317905290059212, + "eval_precision": 0.7179729304375196, + "eval_precision_macro": 0.8341244192542944, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7355691712350855, + "eval_recall_macro": 0.8407428761951972, + "eval_runtime": 0.2558, + "eval_samples_per_second": 637.19, + "eval_steps_per_second": 3.909, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3600 + }, + { + "epoch": 201.0, + "eval_accuracy": 0.9129042098840756, + "eval_auc": 0.9419125142943645, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7274463007159905, + "eval_f1_macro": 0.837808654578745, + "eval_loss": 0.2459731251001358, + "eval_pr_auc": 0.7322365639924645, + "eval_precision": 0.717964824120603, + "eval_precision_macro": 0.8342614705412528, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7371815543373106, + "eval_recall_macro": 0.8414887067877777, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.607, + "eval_steps_per_second": 3.924, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3618 + }, + { + "epoch": 202.0, + "eval_accuracy": 0.9131075859263779, + "eval_auc": 0.9421228725268236, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7264286857691692, + "eval_f1_macro": 0.8373900508237788, + "eval_loss": 0.2455640733242035, + "eval_pr_auc": 0.7332689412482398, + "eval_precision": 0.721233312142403, + "eval_precision_macro": 0.8354381062588301, + "eval_pred_class_0": 16522, + "eval_pred_class_1": 3146, + "eval_predicted_binding_ratio": 0.1599552572706935, + "eval_recall": 0.7316994517897453, + "eval_recall_macro": 0.8393814455785812, + "eval_runtime": 0.2535, + "eval_samples_per_second": 643.062, + "eval_steps_per_second": 3.945, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3636 + }, + { + "epoch": 203.0, + "eval_accuracy": 0.9132092739475289, + "eval_auc": 0.9422253168108461, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7266613290632506, + "eval_f1_macro": 0.8375381529725912, + "eval_loss": 0.24535632133483887, + "eval_pr_auc": 0.7337329229212076, + "eval_precision": 0.7216921119592875, + "eval_precision_macro": 0.8356705536799585, + "eval_pred_class_0": 16524, + "eval_pred_class_1": 3144, + "eval_predicted_binding_ratio": 0.15985356924954242, + "eval_recall": 0.7316994517897453, + "eval_recall_macro": 0.8394418065371132, + "eval_runtime": 0.2645, + "eval_samples_per_second": 616.363, + "eval_steps_per_second": 3.781, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3654 + }, + { + "epoch": 204.0, + "eval_accuracy": 0.9133109619686801, + "eval_auc": 0.9423872461284946, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7268066015061689, + "eval_f1_macro": 0.8376441226295008, + "eval_loss": 0.2451435625553131, + "eval_pr_auc": 0.7344416461934514, + "eval_precision": 0.7222929936305732, + "eval_precision_macro": 0.8359468356342605, + "eval_pred_class_0": 16528, + "eval_pred_class_1": 3140, + "eval_predicted_binding_ratio": 0.1596501932072402, + "eval_recall": 0.7313769751693002, + "eval_recall_macro": 0.8393711096646888, + "eval_runtime": 0.24, + "eval_samples_per_second": 679.026, + "eval_steps_per_second": 4.166, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3672 + }, + { + "epoch": 205.0, + "eval_accuracy": 0.9138194020744357, + "eval_auc": 0.9424586242758461, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7293629251157592, + "eval_f1_macro": 0.8390563302747484, + "eval_loss": 0.24504327774047852, + "eval_pr_auc": 0.7346240551024029, + "eval_precision": 0.7223276407337128, + "eval_precision_macro": 0.8364152440915626, + "eval_pred_class_0": 16506, + "eval_pred_class_1": 3162, + "eval_predicted_binding_ratio": 0.16076876143990237, + "eval_recall": 0.7365366010964205, + "eval_recall_macro": 0.8417698397526527, + "eval_runtime": 0.2606, + "eval_samples_per_second": 625.36, + "eval_steps_per_second": 3.837, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3690 + }, + { + "epoch": 206.0, + "eval_accuracy": 0.91376855806386, + "eval_auc": 0.9426215171108914, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.729073482428115, + "eval_f1_macro": 0.8388988164347613, + "eval_loss": 0.2448122650384903, + "eval_pr_auc": 0.7352376772544322, + "eval_precision": 0.7223805001582779, + "eval_precision_macro": 0.8363855980711433, + "eval_pred_class_0": 16509, + "eval_pred_class_1": 3159, + "eval_predicted_binding_ratio": 0.16061622940817571, + "eval_recall": 0.7358916478555305, + "eval_recall_macro": 0.8414775436114739, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.41, + "eval_steps_per_second": 3.88, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3708 + }, + { + "epoch": 207.0, + "eval_accuracy": 0.9139210900955868, + "eval_auc": 0.9426929439207377, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7299409794225554, + "eval_f1_macro": 0.8393709493840633, + "eval_loss": 0.24476298689842224, + "eval_pr_auc": 0.7354485111055644, + "eval_precision": 0.7222222222222222, + "eval_precision_macro": 0.8364747474747475, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7378265075782006, + "eval_recall_macro": 0.8423544320350108, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.323, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3726 + }, + { + "epoch": 208.0, + "eval_accuracy": 0.9138702460850112, + "eval_auc": 0.9428956913390121, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.72896, + "eval_f1_macro": 0.8388800483588226, + "eval_loss": 0.24439764022827148, + "eval_pr_auc": 0.736435270049487, + "eval_precision": 0.723404255319149, + "eval_precision_macro": 0.8367914187788916, + "eval_pred_class_0": 16519, + "eval_pred_class_1": 3149, + "eval_predicted_binding_ratio": 0.16010778930242017, + "eval_recall": 0.7346017413737504, + "eval_recall_macro": 0.8410136732461799, + "eval_runtime": 0.2835, + "eval_samples_per_second": 574.921, + "eval_steps_per_second": 3.527, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3744 + }, + { + "epoch": 209.0, + "eval_accuracy": 0.9140736221273134, + "eval_auc": 0.9429205676063466, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.730805989168525, + "eval_f1_macro": 0.83984185960937, + "eval_loss": 0.24438706040382385, + "eval_pr_auc": 0.7364306271247294, + "eval_precision": 0.722064841045011, + "eval_precision_macro": 0.8365645289452815, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7397613673008707, + "eval_recall_macro": 0.8432313204585479, + "eval_runtime": 0.2624, + "eval_samples_per_second": 621.222, + "eval_steps_per_second": 3.811, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3762 + }, + { + "epoch": 210.0, + "eval_accuracy": 0.9142769981696156, + "eval_auc": 0.9430619808161933, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7309288222151292, + "eval_f1_macro": 0.8399730291904192, + "eval_loss": 0.2441486269235611, + "eval_pr_auc": 0.7370811402900591, + "eval_precision": 0.7235387045813586, + "eval_precision_macro": 0.8371980622222068, + "eval_pred_class_0": 16503, + "eval_pred_class_1": 3165, + "eval_predicted_binding_ratio": 0.16092129347162903, + "eval_recall": 0.7384714608190907, + "eval_recall_macro": 0.842827811051786, + "eval_runtime": 0.2675, + "eval_samples_per_second": 609.291, + "eval_steps_per_second": 3.738, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3780 + }, + { + "epoch": 211.0, + "eval_accuracy": 0.9146329062436445, + "eval_auc": 0.9431815542983806, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7325155328978812, + "eval_f1_macro": 0.8408637738901821, + "eval_loss": 0.24397221207618713, + "eval_pr_auc": 0.7374915342644908, + "eval_precision": 0.7238664987405542, + "eval_precision_macro": 0.8376184300639468, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7413737504030957, + "eval_recall_macro": 0.8442185948852564, + "eval_runtime": 0.264, + "eval_samples_per_second": 617.425, + "eval_steps_per_second": 3.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3798 + }, + { + "epoch": 212.0, + "eval_accuracy": 0.9144295302013423, + "eval_auc": 0.9432977798009264, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7319636884854276, + "eval_f1_macro": 0.8405258137499286, + "eval_loss": 0.24377743899822235, + "eval_pr_auc": 0.7380125440447487, + "eval_precision": 0.7230962869729389, + "eval_precision_macro": 0.837200053735105, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7410512737826508, + "eval_recall_macro": 0.8439668151372359, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.044, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3816 + }, + { + "epoch": 213.0, + "eval_accuracy": 0.9144295302013423, + "eval_auc": 0.9434619086633391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7311072056239016, + "eval_f1_macro": 0.8401129643018078, + "eval_loss": 0.24348998069763184, + "eval_pr_auc": 0.7388402605739814, + "eval_precision": 0.7245091830272324, + "eval_precision_macro": 0.8376331499630408, + "eval_pred_class_0": 16510, + "eval_pred_class_1": 3158, + "eval_predicted_binding_ratio": 0.16056538539760015, + "eval_recall": 0.7378265075782006, + "eval_recall_macro": 0.8426562368276709, + "eval_runtime": 0.2496, + "eval_samples_per_second": 652.954, + "eval_steps_per_second": 4.006, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3834 + }, + { + "epoch": 214.0, + "eval_accuracy": 0.9147854382753712, + "eval_auc": 0.943601764673353, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7310654685494223, + "eval_f1_macro": 0.8402185728440683, + "eval_loss": 0.24328412115573883, + "eval_pr_auc": 0.7395557790782865, + "eval_precision": 0.727563078888534, + "eval_precision_macro": 0.838897945080114, + "eval_pred_class_0": 16537, + "eval_pred_class_1": 3131, + "eval_predicted_binding_ratio": 0.15919259711206019, + "eval_recall": 0.7346017413737504, + "eval_recall_macro": 0.841556921872968, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.38, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3852 + }, + { + "epoch": 215.0, + "eval_accuracy": 0.9146837502542201, + "eval_auc": 0.9436924715636332, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7310897435897435, + "eval_f1_macro": 0.8401943762667112, + "eval_loss": 0.2431441992521286, + "eval_pr_auc": 0.7398761361047077, + "eval_precision": 0.7266645428480408, + "eval_precision_macro": 0.838527383046018, + "eval_pred_class_0": 16529, + "eval_pred_class_1": 3139, + "eval_predicted_binding_ratio": 0.15959934919666463, + "eval_recall": 0.7355691712350855, + "eval_recall_macro": 0.8418897344073055, + "eval_runtime": 0.2583, + "eval_samples_per_second": 631.075, + "eval_steps_per_second": 3.872, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3870 + }, + { + "epoch": 216.0, + "eval_accuracy": 0.9147345942647956, + "eval_auc": 0.9437266715649686, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7332591060919358, + "eval_f1_macro": 0.8412581348487456, + "eval_loss": 0.24318096041679382, + "eval_pr_auc": 0.739819563727558, + "eval_precision": 0.7234777150031387, + "eval_precision_macro": 0.8375913025931845, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7433086101257659, + "eval_recall_macro": 0.8450653028295274, + "eval_runtime": 0.2695, + "eval_samples_per_second": 604.716, + "eval_steps_per_second": 3.71, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3888 + }, + { + "epoch": 217.0, + "eval_accuracy": 0.915039658328249, + "eval_auc": 0.9438657295100676, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7336202773792444, + "eval_f1_macro": 0.8415401994826537, + "eval_loss": 0.24295340478420258, + "eval_pr_auc": 0.740473021691125, + "eval_precision": 0.7254098360655737, + "eval_precision_macro": 0.8384566154139702, + "eval_pred_class_0": 16496, + "eval_pred_class_1": 3172, + "eval_predicted_binding_ratio": 0.16127720154565792, + "eval_recall": 0.7420187036439858, + "eval_recall_macro": 0.8447221543812975, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.926, + "eval_steps_per_second": 3.852, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3906 + }, + { + "epoch": 218.0, + "eval_accuracy": 0.9148362822859467, + "eval_auc": 0.9439684365715622, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7333227193122114, + "eval_f1_macro": 0.8413247993777817, + "eval_loss": 0.24282881617546082, + "eval_pr_auc": 0.7409189185674594, + "eval_precision": 0.7242138364779874, + "eval_precision_macro": 0.837907500480624, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7426636568848759, + "eval_recall_macro": 0.8448635481261465, + "eval_runtime": 0.2549, + "eval_samples_per_second": 639.434, + "eval_steps_per_second": 3.923, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3924 + }, + { + "epoch": 219.0, + "eval_accuracy": 0.9149888143176734, + "eval_auc": 0.9440560582596731, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7337579617834394, + "eval_f1_macro": 0.8415885646284089, + "eval_loss": 0.24268390238285065, + "eval_pr_auc": 0.7412204047828347, + "eval_precision": 0.724756212645486, + "eval_precision_macro": 0.8382104794199593, + "eval_pred_class_0": 16489, + "eval_pred_class_1": 3179, + "eval_predicted_binding_ratio": 0.1616331096196868, + "eval_recall": 0.7429861335053208, + "eval_recall_macro": 0.845085147394901, + "eval_runtime": 0.2558, + "eval_samples_per_second": 637.151, + "eval_steps_per_second": 3.909, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3942 + }, + { + "epoch": 220.0, + "eval_accuracy": 0.9152430343705511, + "eval_auc": 0.9441587945186644, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7346808849275823, + "eval_f1_macro": 0.842123366857946, + "eval_loss": 0.24253520369529724, + "eval_pr_auc": 0.7415422610927452, + "eval_precision": 0.7253299811439347, + "eval_precision_macro": 0.8386142808788943, + "eval_pred_class_0": 16486, + "eval_pred_class_1": 3182, + "eval_predicted_binding_ratio": 0.16178564165141346, + "eval_recall": 0.7442760399871009, + "eval_recall_macro": 0.8457602811150571, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.818, + "eval_steps_per_second": 3.888, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3960 + }, + { + "epoch": 221.0, + "eval_accuracy": 0.9153955664022778, + "eval_auc": 0.9442562168332251, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7354531001589825, + "eval_f1_macro": 0.8425495241156832, + "eval_loss": 0.2423904687166214, + "eval_pr_auc": 0.741902799087436, + "eval_precision": 0.7253057384760113, + "eval_precision_macro": 0.8387436514456639, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.745888423089326, + "eval_recall_macro": 0.8465061117076376, + "eval_runtime": 0.2433, + "eval_samples_per_second": 669.926, + "eval_steps_per_second": 4.11, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3978 + }, + { + "epoch": 222.0, + "eval_accuracy": 0.9153955664022778, + "eval_auc": 0.944399226172901, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.734609250398724, + "eval_f1_macro": 0.8421428275824746, + "eval_loss": 0.24210123717784882, + "eval_pr_auc": 0.7426309034006747, + "eval_precision": 0.7267276743452193, + "eval_precision_macro": 0.8391805533372256, + "eval_pred_class_0": 16499, + "eval_pred_class_1": 3169, + "eval_predicted_binding_ratio": 0.16112466951393126, + "eval_recall": 0.7426636568848759, + "eval_recall_macro": 0.8451955333980726, + "eval_runtime": 0.2674, + "eval_samples_per_second": 609.607, + "eval_steps_per_second": 3.74, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3996 + }, + { + "epoch": 222.22222222222223, + "grad_norm": 16301.5107421875, + "learning_rate": 6.802697587657594e-07, + "loss": 0.211, + "step": 4000 + }, + { + "epoch": 223.0, + "eval_accuracy": 0.9156497864551556, + "eval_auc": 0.9445356174132858, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.735026353617633, + "eval_f1_macro": 0.8424338117294514, + "eval_loss": 0.2418563961982727, + "eval_pr_auc": 0.7432491183346004, + "eval_precision": 0.7281645569620253, + "eval_precision_macro": 0.8398516024451512, + "eval_pred_class_0": 16508, + "eval_pred_class_1": 3160, + "eval_predicted_binding_ratio": 0.16066707341875128, + "eval_recall": 0.7420187036439858, + "eval_recall_macro": 0.8450843201324897, + "eval_runtime": 0.2638, + "eval_samples_per_second": 617.886, + "eval_steps_per_second": 3.791, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4014 + }, + { + "epoch": 224.0, + "eval_accuracy": 0.9159040065080334, + "eval_auc": 0.9445885524751136, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.736624203821656, + "eval_f1_macro": 0.8432939508943711, + "eval_loss": 0.24180874228477478, + "eval_pr_auc": 0.7434863322076849, + "eval_precision": 0.7275872916011324, + "eval_precision_macro": 0.8398989281099847, + "eval_pred_class_0": 16489, + "eval_pred_class_1": 3179, + "eval_predicted_binding_ratio": 0.1616331096196868, + "eval_recall": 0.745888423089326, + "eval_recall_macro": 0.8468079165002977, + "eval_runtime": 0.2501, + "eval_samples_per_second": 651.671, + "eval_steps_per_second": 3.998, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4032 + }, + { + "epoch": 225.0, + "eval_accuracy": 0.9159548505186089, + "eval_auc": 0.9446536531606378, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7374106433677522, + "eval_f1_macro": 0.8436909456056703, + "eval_loss": 0.24177636206150055, + "eval_pr_auc": 0.7437995583771988, + "eval_precision": 0.7266750156543519, + "eval_precision_macro": 0.8396638402297497, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7484682360528861, + "eval_recall_macro": 0.8478865596272157, + "eval_runtime": 0.2167, + "eval_samples_per_second": 752.113, + "eval_steps_per_second": 4.614, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4050 + }, + { + "epoch": 226.0, + "eval_accuracy": 0.9161582265609112, + "eval_auc": 0.9447719419529625, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.737629276054097, + "eval_f1_macro": 0.843868342907385, + "eval_loss": 0.24154822528362274, + "eval_pr_auc": 0.7443388353128296, + "eval_precision": 0.7280150753768844, + "eval_precision_macro": 0.8402572343640063, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7475008061915511, + "eval_recall_macro": 0.8476141080514102, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.202, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4068 + }, + { + "epoch": 227.0, + "eval_accuracy": 0.9162599145820622, + "eval_auc": 0.9448749507219245, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7381974248927039, + "eval_f1_macro": 0.8441781495775367, + "eval_loss": 0.2414349913597107, + "eval_pr_auc": 0.7447448668341484, + "eval_precision": 0.7278996865203762, + "eval_precision_macro": 0.840312265884293, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7487907126733312, + "eval_recall_macro": 0.8481987003337683, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.465, + "eval_steps_per_second": 3.88, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4086 + }, + { + "epoch": 228.0, + "eval_accuracy": 0.9163107585926378, + "eval_auc": 0.9449481196490842, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.738314785373609, + "eval_f1_macro": 0.8442527143596241, + "eval_loss": 0.2413274347782135, + "eval_pr_auc": 0.7451543649133733, + "eval_precision": 0.7281279397930386, + "eval_precision_macro": 0.840427826926679, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7487907126733312, + "eval_recall_macro": 0.8482288808130343, + "eval_runtime": 0.2185, + "eval_samples_per_second": 746.149, + "eval_steps_per_second": 4.578, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4104 + }, + { + "epoch": 229.0, + "eval_accuracy": 0.9162599145820622, + "eval_auc": 0.9450982726429946, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7376135096383623, + "eval_f1_macro": 0.843896745442007, + "eval_loss": 0.2410273402929306, + "eval_pr_auc": 0.7459954701052622, + "eval_precision": 0.728904282115869, + "eval_precision_macro": 0.8406224054285385, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7465333763302161, + "eval_recall_macro": 0.8472812955170728, + "eval_runtime": 0.2597, + "eval_samples_per_second": 627.64, + "eval_steps_per_second": 3.851, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4122 + }, + { + "epoch": 230.0, + "eval_accuracy": 0.9167683546878178, + "eval_auc": 0.9451833444163787, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7392066273697626, + "eval_f1_macro": 0.8448445490519523, + "eval_loss": 0.2408701479434967, + "eval_pr_auc": 0.74641099889946, + "eval_precision": 0.7304785894206549, + "eval_precision_macro": 0.8415611477299734, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7481457594324411, + "eval_recall_macro": 0.8482383894645154, + "eval_runtime": 0.2678, + "eval_samples_per_second": 608.615, + "eval_steps_per_second": 3.734, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4140 + }, + { + "epoch": 231.0, + "eval_accuracy": 0.9167175106772423, + "eval_auc": 0.9452793360535927, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7393380012730745, + "eval_f1_macro": 0.8448898647294817, + "eval_loss": 0.24077175557613373, + "eval_pr_auc": 0.7467905435777061, + "eval_precision": 0.7298146402764687, + "eval_precision_macro": 0.8413101105537636, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7491131892937762, + "eval_recall_macro": 0.8486013824781189, + "eval_runtime": 0.2588, + "eval_samples_per_second": 629.884, + "eval_steps_per_second": 3.864, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4158 + }, + { + "epoch": 232.0, + "eval_accuracy": 0.9163616026032133, + "eval_auc": 0.9453335850027798, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7391786903440621, + "eval_f1_macro": 0.8446869866386211, + "eval_loss": 0.24075280129909515, + "eval_pr_auc": 0.7469096614042753, + "eval_precision": 0.7270742358078602, + "eval_precision_macro": 0.840149923152381, + "eval_pred_class_0": 16462, + "eval_pred_class_1": 3206, + "eval_predicted_binding_ratio": 0.16300589790522677, + "eval_recall": 0.7516930022573364, + "eval_recall_macro": 0.8494385817709088, + "eval_runtime": 0.2648, + "eval_samples_per_second": 615.647, + "eval_steps_per_second": 3.777, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4176 + }, + { + "epoch": 233.0, + "eval_accuracy": 0.9168191986983933, + "eval_auc": 0.9454244865430391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7405645417063115, + "eval_f1_macro": 0.845516906033295, + "eval_loss": 0.24061860144138336, + "eval_pr_auc": 0.7472467341999135, + "eval_precision": 0.7285491419656787, + "eval_precision_macro": 0.8410102813636934, + "eval_pred_class_0": 16463, + "eval_pred_class_1": 3205, + "eval_predicted_binding_ratio": 0.1629550538946512, + "eval_recall": 0.7529829087391164, + "eval_recall_macro": 0.8502344374081289, + "eval_runtime": 0.2576, + "eval_samples_per_second": 632.77, + "eval_steps_per_second": 3.882, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4194 + }, + { + "epoch": 234.0, + "eval_accuracy": 0.9171242627618467, + "eval_auc": 0.9455514956544295, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7409408773045136, + "eval_f1_macro": 0.8458063544009555, + "eval_loss": 0.24034352600574493, + "eval_pr_auc": 0.7478642707879094, + "eval_precision": 0.7304920087746788, + "eval_precision_macro": 0.841880100399963, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7516930022573364, + "eval_recall_macro": 0.849891288959899, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.626, + "eval_steps_per_second": 3.924, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4212 + }, + { + "epoch": 235.0, + "eval_accuracy": 0.9170734187512711, + "eval_auc": 0.945608099868364, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7413984461709212, + "eval_f1_macro": 0.8460087995182923, + "eval_loss": 0.24029456079006195, + "eval_pr_auc": 0.7479864003180418, + "eval_precision": 0.7292576419213974, + "eval_precision_macro": 0.8414542370705274, + "eval_pred_class_0": 16462, + "eval_pred_class_1": 3206, + "eval_predicted_binding_ratio": 0.16300589790522677, + "eval_recall": 0.7539503386004515, + "eval_recall_macro": 0.8507785132973285, + "eval_runtime": 0.2166, + "eval_samples_per_second": 752.582, + "eval_steps_per_second": 4.617, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4230 + }, + { + "epoch": 236.0, + "eval_accuracy": 0.9172767947935733, + "eval_auc": 0.9457173958316524, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7416229950770208, + "eval_f1_macro": 0.8461890816058248, + "eval_loss": 0.2400863915681839, + "eval_pr_auc": 0.7485735786505677, + "eval_precision": 0.7306007509386734, + "eval_precision_macro": 0.8420487970332027, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7529829087391164, + "eval_recall_macro": 0.850506061721523, + "eval_runtime": 0.2654, + "eval_samples_per_second": 614.11, + "eval_steps_per_second": 3.768, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4248 + }, + { + "epoch": 237.0, + "eval_accuracy": 0.9173784828147244, + "eval_auc": 0.9457852508143816, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7421044278685923, + "eval_f1_macro": 0.8464570875531852, + "eval_loss": 0.24001120030879974, + "eval_pr_auc": 0.7487297504117033, + "eval_precision": 0.730625, + "eval_precision_macro": 0.8421463596065095, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7539503386004515, + "eval_recall_macro": 0.8509595961729245, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.497, + "eval_steps_per_second": 3.825, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4266 + }, + { + "epoch": 238.0, + "eval_accuracy": 0.9176835468781778, + "eval_auc": 0.9458918216779619, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7428934413212641, + "eval_f1_macro": 0.8469453737675663, + "eval_loss": 0.23980914056301117, + "eval_pr_auc": 0.7491659673680734, + "eval_precision": 0.7318523153942428, + "eval_precision_macro": 0.8427959974251447, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7542728152208965, + "eval_recall_macro": 0.8512717368794771, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.674, + "eval_steps_per_second": 3.869, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4284 + }, + { + "epoch": 239.0, + "eval_accuracy": 0.9175818588570266, + "eval_auc": 0.9459814190633611, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7427392477384542, + "eval_f1_macro": 0.8468350393376697, + "eval_loss": 0.2397017627954483, + "eval_pr_auc": 0.7495363660441035, + "eval_precision": 0.73125, + "eval_precision_macro": 0.8425195834345397, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7545952918413416, + "eval_recall_macro": 0.8513424337519016, + "eval_runtime": 0.2693, + "eval_samples_per_second": 605.187, + "eval_steps_per_second": 3.713, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4302 + }, + { + "epoch": 240.0, + "eval_accuracy": 0.9177852348993288, + "eval_auc": 0.9460787245879343, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7425569176882661, + "eval_f1_macro": 0.846819224235148, + "eval_loss": 0.2394852489233017, + "eval_pr_auc": 0.7500368484248636, + "eval_precision": 0.7333333333333333, + "eval_precision_macro": 0.8433466763706938, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7520154788777813, + "eval_recall_macro": 0.8504146930213136, + "eval_runtime": 0.2663, + "eval_samples_per_second": 612.01, + "eval_steps_per_second": 3.755, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4320 + }, + { + "epoch": 241.0, + "eval_accuracy": 0.91788692292048, + "eval_auc": 0.9461055278900622, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7437728066000318, + "eval_f1_macro": 0.8474411515820368, + "eval_loss": 0.23946216702461243, + "eval_pr_auc": 0.7500126507936957, + "eval_precision": 0.7320424734540912, + "eval_precision_macro": 0.8430344761294506, + "eval_pred_class_0": 16466, + "eval_pred_class_1": 3202, + "eval_predicted_binding_ratio": 0.16280252186292454, + "eval_recall": 0.7558851983231216, + "eval_recall_macro": 0.8520477479513235, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.523, + "eval_steps_per_second": 3.881, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4338 + }, + { + "epoch": 242.0, + "eval_accuracy": 0.9178360789099044, + "eval_auc": 0.9462651603379567, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7429207763283487, + "eval_f1_macro": 0.8470125818101653, + "eval_loss": 0.23918889462947845, + "eval_pr_auc": 0.7507506323790389, + "eval_precision": 0.7331240188383046, + "eval_precision_macro": 0.8433259480225619, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7529829087391164, + "eval_recall_macro": 0.8508380469934491, + "eval_runtime": 0.2685, + "eval_samples_per_second": 607.073, + "eval_steps_per_second": 3.724, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4356 + }, + { + "epoch": 243.0, + "eval_accuracy": 0.9181411429733577, + "eval_auc": 0.9463650352422546, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7440381558028617, + "eval_f1_macro": 0.847659094847506, + "eval_loss": 0.2390899360179901, + "eval_pr_auc": 0.7511682669955798, + "eval_precision": 0.7337723424270931, + "eval_precision_macro": 0.8437961778887089, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7545952918413416, + "eval_recall_macro": 0.8516744190238277, + "eval_runtime": 0.2667, + "eval_samples_per_second": 611.141, + "eval_steps_per_second": 3.749, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4374 + }, + { + "epoch": 244.0, + "eval_accuracy": 0.918446207036811, + "eval_auc": 0.9465344196541042, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7436061381074168, + "eval_f1_macro": 0.8475588127054617, + "eval_loss": 0.2388136237859726, + "eval_pr_auc": 0.7520628478642977, + "eval_precision": 0.7372424722662441, + "eval_precision_macro": 0.8451548762954184, + "eval_pred_class_0": 16513, + "eval_pred_class_1": 3155, + "eval_predicted_binding_ratio": 0.1604128533658735, + "eval_recall": 0.7500806191551113, + "eval_recall_macro": 0.8500206922660327, + "eval_runtime": 0.2635, + "eval_samples_per_second": 618.538, + "eval_steps_per_second": 3.795, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4392 + }, + { + "epoch": 245.0, + "eval_accuracy": 0.9182936750050844, + "eval_auc": 0.946561008841255, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7443931923015746, + "eval_f1_macro": 0.8478842115097998, + "eval_loss": 0.2388090342283249, + "eval_pr_auc": 0.7520269916576209, + "eval_precision": 0.7344632768361582, + "eval_precision_macro": 0.844145847858681, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7545952918413416, + "eval_recall_macro": 0.8517649604616258, + "eval_runtime": 0.2356, + "eval_samples_per_second": 691.824, + "eval_steps_per_second": 4.244, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4410 + }, + { + "epoch": 246.0, + "eval_accuracy": 0.9185478950579622, + "eval_auc": 0.9466103526109676, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7456335344553826, + "eval_f1_macro": 0.8485719582198821, + "eval_loss": 0.23877908289432526, + "eval_pr_auc": 0.7522268586665612, + "eval_precision": 0.7344385361276197, + "eval_precision_macro": 0.844360910951309, + "eval_pred_class_0": 16471, + "eval_pred_class_1": 3197, + "eval_predicted_binding_ratio": 0.16254830181004679, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.8529643255056077, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.409, + "eval_steps_per_second": 3.868, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4428 + }, + { + "epoch": 247.0, + "eval_accuracy": 0.9181919869839333, + "eval_auc": 0.9466863828928207, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.74464370734804, + "eval_f1_macro": 0.847968894691123, + "eval_loss": 0.23864901065826416, + "eval_pr_auc": 0.7525479720158522, + "eval_precision": 0.733125, + "eval_precision_macro": 0.84363925491863, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7565301515640116, + "eval_recall_macro": 0.8524909464888326, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.776, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4446 + }, + { + "epoch": 248.0, + "eval_accuracy": 0.9184970510473867, + "eval_auc": 0.9467725739035849, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7454343338097507, + "eval_f1_macro": 0.8484579580910492, + "eval_loss": 0.23847386240959167, + "eval_pr_auc": 0.752960490937812, + "eval_precision": 0.7343554443053817, + "eval_precision_macro": 0.8442903982090288, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7568526281844566, + "eval_recall_macro": 0.8528030871953852, + "eval_runtime": 0.2626, + "eval_samples_per_second": 620.673, + "eval_steps_per_second": 3.808, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4464 + }, + { + "epoch": 249.0, + "eval_accuracy": 0.9183953630262355, + "eval_auc": 0.9468334214870647, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7452785272179019, + "eval_f1_macro": 0.8483468464756075, + "eval_loss": 0.238382488489151, + "eval_pr_auc": 0.7531576423642267, + "eval_precision": 0.73375, + "eval_precision_macro": 0.8440124787466602, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.8528737840678097, + "eval_runtime": 0.2634, + "eval_samples_per_second": 618.912, + "eval_steps_per_second": 3.797, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4482 + }, + { + "epoch": 250.0, + "grad_norm": 32703.09375, + "learning_rate": 5.870150616070439e-07, + "loss": 0.2045, + "step": 4500 + }, + { + "epoch": 250.0, + "eval_accuracy": 0.9189546471425666, + "eval_auc": 0.9469757203543168, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7464206172446707, + "eval_f1_macro": 0.849095331315225, + "eval_loss": 0.23810486495494843, + "eval_pr_auc": 0.7538503066163141, + "eval_precision": 0.7365777080062794, + "eval_precision_macro": 0.8453864697284325, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7565301515640116, + "eval_recall_macro": 0.8529436536778228, + "eval_runtime": 0.2683, + "eval_samples_per_second": 607.492, + "eval_steps_per_second": 3.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4500 + }, + { + "epoch": 251.0, + "eval_accuracy": 0.9192088671954444, + "eval_auc": 0.9470911964544428, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7465305471367044, + "eval_f1_macro": 0.8492382980338313, + "eval_loss": 0.23794293403625488, + "eval_pr_auc": 0.7543678751573928, + "eval_precision": 0.7386363636363636, + "eval_precision_macro": 0.8462575757575758, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7545952918413416, + "eval_recall_macro": 0.8523082090884139, + "eval_runtime": 0.2575, + "eval_samples_per_second": 632.943, + "eval_steps_per_second": 3.883, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4518 + }, + { + "epoch": 252.0, + "eval_accuracy": 0.9186495830791133, + "eval_auc": 0.9471146615094285, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7459510955859003, + "eval_f1_macro": 0.8487610069611806, + "eval_loss": 0.2380078136920929, + "eval_pr_auc": 0.7543445385135205, + "eval_precision": 0.7347513293712856, + "eval_precision_macro": 0.8445476639570896, + "eval_pred_class_0": 16471, + "eval_pred_class_1": 3197, + "eval_predicted_binding_ratio": 0.16254830181004679, + "eval_recall": 0.7574975814253466, + "eval_recall_macro": 0.8531557442950962, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.051, + "eval_steps_per_second": 3.816, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4536 + }, + { + "epoch": 253.0, + "eval_accuracy": 0.9187004270896888, + "eval_auc": 0.9472153636761378, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7461501825686617, + "eval_f1_macro": 0.8488749520465066, + "eval_loss": 0.23785638809204102, + "eval_pr_auc": 0.7549126832149435, + "eval_precision": 0.7348342714196373, + "eval_precision_macro": 0.8446181071730852, + "eval_pred_class_0": 16470, + "eval_pred_class_1": 3198, + "eval_predicted_binding_ratio": 0.16259914582062232, + "eval_recall": 0.7578200580457917, + "eval_recall_macro": 0.8533169826053187, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.46, + "eval_steps_per_second": 3.825, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4554 + }, + { + "epoch": 254.0, + "eval_accuracy": 0.918903803131991, + "eval_auc": 0.9473291852514412, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7465437788018433, + "eval_f1_macro": 0.849136671654349, + "eval_loss": 0.23767386376857758, + "eval_pr_auc": 0.7554825230801616, + "eval_precision": 0.7359022556390977, + "eval_precision_macro": 0.8451300547435596, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7574975814253466, + "eval_recall_macro": 0.8533066466914263, + "eval_runtime": 0.2597, + "eval_samples_per_second": 627.53, + "eval_steps_per_second": 3.85, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4572 + }, + { + "epoch": 255.0, + "eval_accuracy": 0.9196156192800488, + "eval_auc": 0.9474168847995438, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7481280866656046, + "eval_f1_macro": 0.8501522492676461, + "eval_loss": 0.23749451339244843, + "eval_pr_auc": 0.7559063738482461, + "eval_precision": 0.739294710327456, + "eval_precision_macro": 0.8468181046180088, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.8535981155701939, + "eval_runtime": 0.267, + "eval_samples_per_second": 610.581, + "eval_steps_per_second": 3.746, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4590 + }, + { + "epoch": 256.0, + "eval_accuracy": 0.9195647752694732, + "eval_auc": 0.9474761751831905, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7480089200382287, + "eval_f1_macro": 0.8500768176935048, + "eval_loss": 0.23738548159599304, + "eval_pr_auc": 0.7561572732622138, + "eval_precision": 0.7390620081838212, + "eval_precision_macro": 0.8467003692001515, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.853567935090928, + "eval_runtime": 0.2595, + "eval_samples_per_second": 628.111, + "eval_steps_per_second": 3.853, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4608 + }, + { + "epoch": 257.0, + "eval_accuracy": 0.919818995322351, + "eval_auc": 0.9475989409250355, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7486051331101546, + "eval_f1_macro": 0.8504541559450298, + "eval_loss": 0.23718814551830292, + "eval_pr_auc": 0.7567216824275462, + "eval_precision": 0.7402269861286255, + "eval_precision_macro": 0.8472897782243516, + "eval_pred_class_0": 16496, + "eval_pred_class_1": 3172, + "eval_predicted_binding_ratio": 0.16127720154565792, + "eval_recall": 0.7571751048049017, + "eval_recall_macro": 0.853718837487258, + "eval_runtime": 0.2144, + "eval_samples_per_second": 760.368, + "eval_steps_per_second": 4.665, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4626 + }, + { + "epoch": 258.0, + "eval_accuracy": 0.9193613992271711, + "eval_auc": 0.9475927315907009, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.748013981569749, + "eval_f1_macro": 0.8500072329009691, + "eval_loss": 0.23723167181015015, + "eval_pr_auc": 0.7565508642499409, + "eval_precision": 0.7372377074851237, + "eval_precision_macro": 0.845948140540741, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7591099645275717, + "eval_recall_macro": 0.8542335601596028, + "eval_runtime": 0.2406, + "eval_samples_per_second": 677.35, + "eval_steps_per_second": 4.156, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4644 + }, + { + "epoch": 259.0, + "eval_accuracy": 0.9197681513117755, + "eval_auc": 0.9476849762158163, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7486460656259956, + "eval_f1_macro": 0.8504558902151395, + "eval_loss": 0.23700466752052307, + "eval_pr_auc": 0.7570817989267699, + "eval_precision": 0.7396915328926661, + "eval_precision_macro": 0.8470757706910725, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7578200580457917, + "eval_recall_macro": 0.8539507726699049, + "eval_runtime": 0.2559, + "eval_samples_per_second": 636.906, + "eval_steps_per_second": 3.907, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4662 + }, + { + "epoch": 260.0, + "eval_accuracy": 0.9199715273540777, + "eval_auc": 0.9477206555569931, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7495225970719287, + "eval_f1_macro": 0.8509503339952407, + "eval_loss": 0.23693729937076569, + "eval_pr_auc": 0.7572519889982183, + "eval_precision": 0.7398680490103676, + "eval_precision_macro": 0.8473073942352414, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7594324411480168, + "eval_recall_macro": 0.8547267837417515, + "eval_runtime": 0.2431, + "eval_samples_per_second": 670.398, + "eval_steps_per_second": 4.113, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4680 + }, + { + "epoch": 261.0, + "eval_accuracy": 0.919818995322351, + "eval_auc": 0.9477750407611654, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7492447129909365, + "eval_f1_macro": 0.8507623994645729, + "eval_loss": 0.23685960471630096, + "eval_pr_auc": 0.7574807332819814, + "eval_precision": 0.739021329987453, + "eval_precision_macro": 0.8469075096539207, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7597549177684618, + "eval_recall_macro": 0.85476730013491, + "eval_runtime": 0.2608, + "eval_samples_per_second": 625.06, + "eval_steps_per_second": 3.835, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4698 + }, + { + "epoch": 262.0, + "eval_accuracy": 0.9200223713646533, + "eval_auc": 0.947870010485989, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7495621716287215, + "eval_f1_macro": 0.8509874086097018, + "eval_loss": 0.23665639758110046, + "eval_pr_auc": 0.7579557575566394, + "eval_precision": 0.740251572327044, + "eval_precision_macro": 0.8474729477355745, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7591099645275717, + "eval_recall_macro": 0.854625906390061, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.828, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4716 + }, + { + "epoch": 263.0, + "eval_accuracy": 0.9201749033963799, + "eval_auc": 0.9479619047411422, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7501591343093571, + "eval_f1_macro": 0.8513291133243506, + "eval_loss": 0.23653987050056458, + "eval_pr_auc": 0.7584013256784117, + "eval_precision": 0.7404963870562362, + "eval_precision_macro": 0.8476822244653337, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7600773943889068, + "eval_recall_macro": 0.8551096213207285, + "eval_runtime": 0.2539, + "eval_samples_per_second": 641.941, + "eval_steps_per_second": 3.938, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4734 + }, + { + "epoch": 264.0, + "eval_accuracy": 0.9203274354281066, + "eval_auc": 0.9480515799865329, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7501195981502152, + "eval_f1_macro": 0.8513640482812168, + "eval_loss": 0.2363332211971283, + "eval_pr_auc": 0.7588834286830018, + "eval_precision": 0.7419558359621451, + "eval_precision_macro": 0.848278196802748, + "eval_pred_class_0": 16498, + "eval_pred_class_1": 3170, + "eval_predicted_binding_ratio": 0.16117551352450682, + "eval_recall": 0.7584650112866818, + "eval_recall_macro": 0.8545448736037441, + "eval_runtime": 0.253, + "eval_samples_per_second": 644.351, + "eval_steps_per_second": 3.953, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4752 + }, + { + "epoch": 265.0, + "eval_accuracy": 0.9203782794386821, + "eval_auc": 0.9481172938194913, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.750557502389296, + "eval_f1_macro": 0.8515931077800434, + "eval_loss": 0.23625436425209045, + "eval_pr_auc": 0.7591087523185005, + "eval_precision": 0.7415801070192005, + "eval_precision_macro": 0.8482019751638359, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7597549177684618, + "eval_recall_macro": 0.8550992854068361, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.903, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4770 + }, + { + "epoch": 266.0, + "eval_accuracy": 0.9200223713646533, + "eval_auc": 0.9482093632596256, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7495621716287215, + "eval_f1_macro": 0.8509874086097018, + "eval_loss": 0.23614051938056946, + "eval_pr_auc": 0.7594934170637511, + "eval_precision": 0.740251572327044, + "eval_precision_macro": 0.8474729477355745, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7591099645275717, + "eval_recall_macro": 0.854625906390061, + "eval_runtime": 0.2555, + "eval_samples_per_second": 638.024, + "eval_steps_per_second": 3.914, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4788 + }, + { + "epoch": 267.0, + "eval_accuracy": 0.9204291234492576, + "eval_auc": 0.9483264257570818, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7504385265507894, + "eval_f1_macro": 0.8515537559413557, + "eval_loss": 0.23595084249973297, + "eval_pr_auc": 0.7600448623712702, + "eval_precision": 0.7422712933753943, + "eval_precision_macro": 0.8484662322132155, + "eval_pred_class_0": 16498, + "eval_pred_class_1": 3170, + "eval_predicted_binding_ratio": 0.16117551352450682, + "eval_recall": 0.7587874879071267, + "eval_recall_macro": 0.8547362923932326, + "eval_runtime": 0.2589, + "eval_samples_per_second": 629.531, + "eval_steps_per_second": 3.862, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4806 + }, + { + "epoch": 268.0, + "eval_accuracy": 0.9203782794386821, + "eval_auc": 0.9483670102777331, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7509541984732825, + "eval_f1_macro": 0.8517842887791249, + "eval_loss": 0.23594258725643158, + "eval_pr_auc": 0.7601171664174119, + "eval_precision": 0.740822089739567, + "eval_precision_macro": 0.8479609508220922, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7613673008706868, + "eval_recall_macro": 0.8557545745616185, + "eval_runtime": 0.2463, + "eval_samples_per_second": 661.883, + "eval_steps_per_second": 4.061, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4824 + }, + { + "epoch": 269.0, + "eval_accuracy": 0.920276591417531, + "eval_auc": 0.9484603741402287, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7505567928730512, + "eval_f1_macro": 0.8515567625484772, + "eval_loss": 0.23575998842716217, + "eval_pr_auc": 0.7605631058573062, + "eval_precision": 0.7406593406593407, + "eval_precision_macro": 0.8478216317444613, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7607223476297968, + "eval_recall_macro": 0.8554320979411736, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.928, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4842 + }, + { + "epoch": 270.0, + "eval_accuracy": 0.9206833435021354, + "eval_auc": 0.9485513243429828, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.751434034416826, + "eval_f1_macro": 0.8521235507837306, + "eval_loss": 0.235606849193573, + "eval_pr_auc": 0.7610077759721279, + "eval_precision": 0.7426771653543307, + "eval_precision_macro": 0.8488138752255192, + "eval_pred_class_0": 16493, + "eval_pred_class_1": 3175, + "eval_predicted_binding_ratio": 0.16142973357738458, + "eval_recall": 0.7603998710093518, + "eval_recall_macro": 0.8555424839443451, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.303, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4860 + }, + { + "epoch": 271.0, + "eval_accuracy": 0.9209884075655888, + "eval_auc": 0.948647082400222, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7519948930737312, + "eval_f1_macro": 0.8525018311755109, + "eval_loss": 0.23542079329490662, + "eval_pr_auc": 0.7614859215167992, + "eval_precision": 0.744391785150079, + "eval_precision_macro": 0.8496242389363071, + "eval_pred_class_0": 16503, + "eval_pred_class_1": 3165, + "eval_predicted_binding_ratio": 0.16092129347162903, + "eval_recall": 0.7597549177684618, + "eval_recall_macro": 0.8554614511580283, + "eval_runtime": 0.2456, + "eval_samples_per_second": 663.61, + "eval_steps_per_second": 4.071, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4878 + }, + { + "epoch": 272.0, + "eval_accuracy": 0.920734187512711, + "eval_auc": 0.9487102755159504, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7517120560598821, + "eval_f1_macro": 0.8522755458325244, + "eval_loss": 0.2353215366601944, + "eval_pr_auc": 0.761752604262035, + "eval_precision": 0.7426054122089364, + "eval_precision_macro": 0.8488345435817272, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7610448242502419, + "eval_recall_macro": 0.8558347800855242, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.279, + "eval_steps_per_second": 3.965, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4896 + }, + { + "epoch": 273.0, + "eval_accuracy": 0.920734187512711, + "eval_auc": 0.9487825490532058, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7517911160643209, + "eval_f1_macro": 0.8523136490925144, + "eval_loss": 0.23522616922855377, + "eval_pr_auc": 0.7620046741511783, + "eval_precision": 0.7424528301886792, + "eval_precision_macro": 0.8487858522607636, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.7613673008706868, + "eval_recall_macro": 0.8559658379164806, + "eval_runtime": 0.2581, + "eval_samples_per_second": 631.657, + "eval_steps_per_second": 3.875, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4914 + }, + { + "epoch": 274.0, + "eval_accuracy": 0.9208358755338621, + "eval_auc": 0.9488776355680169, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7518725099601593, + "eval_f1_macro": 0.8523888728682258, + "eval_loss": 0.2350645512342453, + "eval_pr_auc": 0.7623918274747735, + "eval_precision": 0.7432262129804663, + "eval_precision_macro": 0.8491200787225601, + "eval_pred_class_0": 16494, + "eval_pred_class_1": 3174, + "eval_predicted_binding_ratio": 0.16137888956680904, + "eval_recall": 0.7607223476297968, + "eval_recall_macro": 0.8557640832130997, + "eval_runtime": 0.2427, + "eval_samples_per_second": 671.591, + "eval_steps_per_second": 4.12, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4932 + }, + { + "epoch": 275.0, + "eval_accuracy": 0.9210900955867399, + "eval_auc": 0.9489531597599242, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7525510204081632, + "eval_f1_macro": 0.8528058755561261, + "eval_loss": 0.2349635362625122, + "eval_pr_auc": 0.7627186958629152, + "eval_precision": 0.7442447177546515, + "eval_precision_macro": 0.8496637300357182, + "eval_pred_class_0": 16497, + "eval_pred_class_1": 3171, + "eval_predicted_binding_ratio": 0.16122635753508235, + "eval_recall": 0.7610448242502419, + "eval_recall_macro": 0.8560460434403863, + "eval_runtime": 0.2606, + "eval_samples_per_second": 625.549, + "eval_steps_per_second": 3.838, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4950 + }, + { + "epoch": 276.0, + "eval_accuracy": 0.9207850315232866, + "eval_auc": 0.9489723619803666, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7524626628535113, + "eval_f1_macro": 0.8526552767085183, + "eval_loss": 0.2350020557641983, + "eval_pr_auc": 0.7627735453579831, + "eval_precision": 0.7416222987785781, + "eval_precision_macro": 0.8485653223786669, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.763624637213802, + "eval_recall_macro": 0.8569134232124421, + "eval_runtime": 0.2544, + "eval_samples_per_second": 640.627, + "eval_steps_per_second": 3.93, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4968 + }, + { + "epoch": 277.0, + "eval_accuracy": 0.9209884075655888, + "eval_auc": 0.9490740568619694, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7524689391525964, + "eval_f1_macro": 0.8527303253449472, + "eval_loss": 0.2348015159368515, + "eval_pr_auc": 0.7632371897507115, + "eval_precision": 0.7434686811457349, + "eval_precision_macro": 0.8493281796365992, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7616897774911319, + "eval_recall_macro": 0.8562477981437672, + "eval_runtime": 0.2476, + "eval_samples_per_second": 658.287, + "eval_steps_per_second": 4.039, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4986 + }, + { + "epoch": 277.77777777777777, + "grad_norm": 14799.8212890625, + "learning_rate": 4.904982238472025e-07, + "loss": 0.199, + "step": 5000 + }, + { + "epoch": 278.0, + "eval_accuracy": 0.9207850315232866, + "eval_auc": 0.9491519363186243, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7514358647096363, + "eval_f1_macro": 0.8521604145127957, + "eval_loss": 0.23470228910446167, + "eval_pr_auc": 0.7636185597213633, + "eval_precision": 0.7436059362172402, + "eval_precision_macro": 0.8491982774838095, + "eval_pred_class_0": 16501, + "eval_pred_class_1": 3167, + "eval_predicted_binding_ratio": 0.16102298149278016, + "eval_recall": 0.7594324411480168, + "eval_recall_macro": 0.8552096714100077, + "eval_runtime": 0.2561, + "eval_samples_per_second": 636.482, + "eval_steps_per_second": 3.905, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5004 + }, + { + "epoch": 279.0, + "eval_accuracy": 0.9209375635550132, + "eval_auc": 0.9492209202712323, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7521912350597609, + "eval_f1_macro": 0.8525784825369885, + "eval_loss": 0.2345963418483734, + "eval_pr_auc": 0.7638862490185815, + "eval_precision": 0.7435412728418399, + "eval_precision_macro": 0.8493079227068421, + "eval_pred_class_0": 16494, + "eval_pred_class_1": 3174, + "eval_predicted_binding_ratio": 0.16137888956680904, + "eval_recall": 0.7610448242502419, + "eval_recall_macro": 0.8559555020025882, + "eval_runtime": 0.273, + "eval_samples_per_second": 597.168, + "eval_steps_per_second": 3.664, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5022 + }, + { + "epoch": 280.0, + "eval_accuracy": 0.9210900955867399, + "eval_auc": 0.9492453877736104, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7532591414944356, + "eval_f1_macro": 0.8531471523002045, + "eval_loss": 0.23453067243099213, + "eval_pr_auc": 0.7639137465976396, + "eval_precision": 0.7428661022264033, + "eval_precision_macro": 0.8492229655497572, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7639471138342471, + "eval_recall_macro": 0.8572255639189947, + "eval_runtime": 0.2247, + "eval_samples_per_second": 725.266, + "eval_steps_per_second": 4.449, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5040 + }, + { + "epoch": 281.0, + "eval_accuracy": 0.9210900955867399, + "eval_auc": 0.9493002596027307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7531806615776081, + "eval_f1_macro": 0.8531093334515976, + "eval_loss": 0.2344331294298172, + "eval_pr_auc": 0.7641447446055818, + "eval_precision": 0.7430185127078758, + "eval_precision_macro": 0.8492715280607518, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.763624637213802, + "eval_recall_macro": 0.8570945060880382, + "eval_runtime": 0.263, + "eval_samples_per_second": 619.693, + "eval_steps_per_second": 3.802, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5058 + }, + { + "epoch": 282.0, + "eval_accuracy": 0.9212426276184665, + "eval_auc": 0.9493485327975579, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7538534880025425, + "eval_f1_macro": 0.8534875889608693, + "eval_loss": 0.2344052791595459, + "eval_pr_auc": 0.764296468123137, + "eval_precision": 0.7431077694235589, + "eval_precision_macro": 0.849430796583593, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8577092788496623, + "eval_runtime": 0.2484, + "eval_samples_per_second": 656.097, + "eval_steps_per_second": 4.025, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5076 + }, + { + "epoch": 283.0, + "eval_accuracy": 0.9212426276184665, + "eval_auc": 0.9494190544850012, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7536969311496263, + "eval_f1_macro": 0.8534121476034391, + "eval_loss": 0.23425185680389404, + "eval_pr_auc": 0.7646652388104445, + "eval_precision": 0.7434127979924717, + "eval_precision_macro": 0.8495280009379834, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.764269590454692, + "eval_recall_macro": 0.8574471631877492, + "eval_runtime": 0.2547, + "eval_samples_per_second": 639.881, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5094 + }, + { + "epoch": 284.0, + "eval_accuracy": 0.9213951596501933, + "eval_auc": 0.9495173137944721, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7539000318369946, + "eval_f1_macro": 0.853564041452472, + "eval_loss": 0.2340681403875351, + "eval_pr_auc": 0.7651886756989377, + "eval_precision": 0.744419993712669, + "eval_precision_macro": 0.8499803613859639, + "eval_pred_class_0": 16487, + "eval_pred_class_1": 3181, + "eval_predicted_binding_ratio": 0.1617347976408379, + "eval_recall": 0.763624637213802, + "eval_recall_macro": 0.8572755889636343, + "eval_runtime": 0.2477, + "eval_samples_per_second": 658.168, + "eval_steps_per_second": 4.038, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5112 + }, + { + "epoch": 285.0, + "eval_accuracy": 0.9213951596501933, + "eval_auc": 0.9495497619459952, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7540566337893733, + "eval_f1_macro": 0.8536395120535369, + "eval_loss": 0.234034925699234, + "eval_pr_auc": 0.7653794962608654, + "eval_precision": 0.7441130298273155, + "eval_precision_macro": 0.8498821534503319, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.764269590454692, + "eval_recall_macro": 0.8575377046255472, + "eval_runtime": 0.2024, + "eval_samples_per_second": 805.487, + "eval_steps_per_second": 4.942, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5130 + }, + { + "epoch": 286.0, + "eval_accuracy": 0.9215476916819199, + "eval_auc": 0.9496228140831674, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.754181934044926, + "eval_f1_macro": 0.8537539029854382, + "eval_loss": 0.23388919234275818, + "eval_pr_auc": 0.7657504791267543, + "eval_precision": 0.7452770780856424, + "eval_precision_macro": 0.8503853253634615, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.763302160593357, + "eval_recall_macro": 0.8572350725704758, + "eval_runtime": 0.1796, + "eval_samples_per_second": 907.381, + "eval_steps_per_second": 5.567, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5148 + }, + { + "epoch": 287.0, + "eval_accuracy": 0.9219544437665244, + "eval_auc": 0.9497348546111616, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7550662198819211, + "eval_f1_macro": 0.8543240621923138, + "eval_loss": 0.2337103933095932, + "eval_pr_auc": 0.7663000808208629, + "eval_precision": 0.7473152242577384, + "eval_precision_macro": 0.8513875842534602, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7629796839729119, + "eval_recall_macro": 0.8573454585736473, + "eval_runtime": 0.2659, + "eval_samples_per_second": 613.107, + "eval_steps_per_second": 3.761, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5166 + }, + { + "epoch": 288.0, + "eval_accuracy": 0.9218527557453732, + "eval_auc": 0.9498016974139991, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7548253309937789, + "eval_f1_macro": 0.8541719723587154, + "eval_loss": 0.23359474539756775, + "eval_pr_auc": 0.7666273574525592, + "eval_precision": 0.7468434343434344, + "eval_precision_macro": 0.8511489898989899, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7629796839729119, + "eval_recall_macro": 0.8572850976151154, + "eval_runtime": 0.2686, + "eval_samples_per_second": 606.819, + "eval_steps_per_second": 3.723, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5184 + }, + { + "epoch": 289.0, + "eval_accuracy": 0.9216493797030709, + "eval_auc": 0.9498063690134986, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7546569017672345, + "eval_f1_macro": 0.8540188154275592, + "eval_loss": 0.23359988629817963, + "eval_pr_auc": 0.7666725529217715, + "eval_precision": 0.7452830188679245, + "eval_precision_macro": 0.8504738723645784, + "eval_pred_class_0": 16488, + "eval_pred_class_1": 3180, + "eval_predicted_binding_ratio": 0.16168395363026236, + "eval_recall": 0.764269590454692, + "eval_recall_macro": 0.8576886070218773, + "eval_runtime": 0.2712, + "eval_samples_per_second": 600.959, + "eval_steps_per_second": 3.687, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5202 + }, + { + "epoch": 290.0, + "eval_accuracy": 0.9213951596501933, + "eval_auc": 0.9498248412965191, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7543692405465523, + "eval_f1_macro": 0.8537901526260393, + "eval_loss": 0.23362942039966583, + "eval_pr_auc": 0.7666566917414745, + "eval_precision": 0.7435014093329158, + "eval_precision_macro": 0.8496869717377781, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7655594969364721, + "eval_recall_macro": 0.8580619359493733, + "eval_runtime": 0.2158, + "eval_samples_per_second": 755.492, + "eval_steps_per_second": 4.635, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5220 + }, + { + "epoch": 291.0, + "eval_accuracy": 0.9219544437665244, + "eval_auc": 0.9499087646350263, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7554564282300462, + "eval_f1_macro": 0.8545121458733945, + "eval_loss": 0.23345860838890076, + "eval_pr_auc": 0.767032300253484, + "eval_precision": 0.746536523929471, + "eval_precision_macro": 0.8511363192046093, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.764592067075137, + "eval_recall_macro": 0.8580007477284299, + "eval_runtime": 0.2582, + "eval_samples_per_second": 631.391, + "eval_steps_per_second": 3.874, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5238 + }, + { + "epoch": 292.0, + "eval_accuracy": 0.9226154159040065, + "eval_auc": 0.9500412726083274, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7567135549872123, + "eval_f1_macro": 0.8553519407342349, + "eval_loss": 0.23320625722408295, + "eval_pr_auc": 0.7678050059622479, + "eval_precision": 0.7502377179080824, + "eval_precision_macro": 0.8528939452496871, + "eval_pred_class_0": 16513, + "eval_pred_class_1": 3155, + "eval_predicted_binding_ratio": 0.1604128533658735, + "eval_recall": 0.763302160593357, + "eval_recall_macro": 0.857868862635062, + "eval_runtime": 0.2022, + "eval_samples_per_second": 806.124, + "eval_steps_per_second": 4.946, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5256 + }, + { + "epoch": 293.0, + "eval_accuracy": 0.9219035997559487, + "eval_auc": 0.9500337688516315, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7551801083838062, + "eval_f1_macro": 0.8543609694420392, + "eval_loss": 0.23323103785514832, + "eval_pr_auc": 0.7676369094541509, + "eval_precision": 0.7466120390797353, + "eval_precision_macro": 0.8511174775574487, + "eval_pred_class_0": 16495, + "eval_pred_class_1": 3173, + "eval_predicted_binding_ratio": 0.16132804555623348, + "eval_recall": 0.7639471138342471, + "eval_recall_macro": 0.8577084515872508, + "eval_runtime": 0.2457, + "eval_samples_per_second": 663.373, + "eval_steps_per_second": 4.07, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5274 + }, + { + "epoch": 294.0, + "eval_accuracy": 0.9219035997559487, + "eval_auc": 0.9500722803500048, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7554140127388536, + "eval_f1_macro": 0.8544737053045671, + "eval_loss": 0.23318050801753998, + "eval_pr_auc": 0.7677669872107012, + "eval_precision": 0.7461465869770368, + "eval_precision_macro": 0.8509676473001504, + "eval_pred_class_0": 16489, + "eval_pred_class_1": 3179, + "eval_predicted_binding_ratio": 0.1616331096196868, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8581016250801203, + "eval_runtime": 0.2622, + "eval_samples_per_second": 621.765, + "eval_steps_per_second": 3.815, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5292 + }, + { + "epoch": 295.0, + "eval_accuracy": 0.9223103518405532, + "eval_auc": 0.9501381693679445, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7562998405103668, + "eval_f1_macro": 0.8550446157127531, + "eval_loss": 0.23305083811283112, + "eval_pr_auc": 0.7681080957655504, + "eval_precision": 0.7481855474913222, + "eval_precision_macro": 0.8519702208636682, + "eval_pred_class_0": 16499, + "eval_pred_class_1": 3169, + "eval_predicted_binding_ratio": 0.16112466951393126, + "eval_recall": 0.764592067075137, + "eval_recall_macro": 0.8582120110832919, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.885, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5310 + }, + { + "epoch": 296.0, + "eval_accuracy": 0.9220052877770999, + "eval_auc": 0.9501577511558462, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7556546670914304, + "eval_f1_macro": 0.8546256879531203, + "eval_loss": 0.23304298520088196, + "eval_pr_auc": 0.7681639014007426, + "eval_precision": 0.7466163046899591, + "eval_precision_macro": 0.8512051870912047, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8581619860386525, + "eval_runtime": 0.2233, + "eval_samples_per_second": 730.092, + "eval_steps_per_second": 4.479, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5328 + }, + { + "epoch": 297.0, + "eval_accuracy": 0.9223611958511287, + "eval_auc": 0.9502301512155882, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7564981661616967, + "eval_f1_macro": 0.8551582014839937, + "eval_loss": 0.23290005326271057, + "eval_pr_auc": 0.7685493782671796, + "eval_precision": 0.7482649842271294, + "eval_precision_macro": 0.8520389050120978, + "eval_pred_class_0": 16498, + "eval_pred_class_1": 3170, + "eval_predicted_binding_ratio": 0.16117551352450682, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8583732493935144, + "eval_runtime": 0.259, + "eval_samples_per_second": 629.312, + "eval_steps_per_second": 3.861, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5346 + }, + { + "epoch": 298.0, + "eval_accuracy": 0.9224628838722798, + "eval_auc": 0.9503260649928105, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.756739511883873, + "eval_f1_macro": 0.8553105125875349, + "eval_loss": 0.23275841772556305, + "eval_pr_auc": 0.768979970846171, + "eval_precision": 0.7487373737373737, + "eval_precision_macro": 0.8522777777777777, + "eval_pred_class_0": 16500, + "eval_pred_class_1": 3168, + "eval_predicted_binding_ratio": 0.1610738255033557, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8584336103520465, + "eval_runtime": 0.265, + "eval_samples_per_second": 615.056, + "eval_steps_per_second": 3.773, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5364 + }, + { + "epoch": 299.0, + "eval_accuracy": 0.9224120398617043, + "eval_auc": 0.9503799246420391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7565411614550096, + "eval_f1_macro": 0.8551969143430849, + "eval_loss": 0.23264609277248383, + "eval_pr_auc": 0.7692428666566218, + "eval_precision": 0.748658035996211, + "eval_precision_macro": 0.8522091464751675, + "eval_pred_class_0": 16501, + "eval_pred_class_1": 3167, + "eval_predicted_binding_ratio": 0.16102298149278016, + "eval_recall": 0.764592067075137, + "eval_recall_macro": 0.8582723720418239, + "eval_runtime": 0.2384, + "eval_samples_per_second": 683.765, + "eval_steps_per_second": 4.195, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5382 + }, + { + "epoch": 300.0, + "eval_accuracy": 0.9223611958511287, + "eval_auc": 0.9503983969250598, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7565758010521282, + "eval_f1_macro": 0.8551956221484214, + "eval_loss": 0.23263320326805115, + "eval_pr_auc": 0.7693119480202146, + "eval_precision": 0.748108448928121, + "eval_precision_macro": 0.8519882690809373, + "eval_pred_class_0": 16496, + "eval_pred_class_1": 3172, + "eval_predicted_binding_ratio": 0.16127720154565792, + "eval_recall": 0.7652370203160271, + "eval_recall_macro": 0.8585043072244709, + "eval_runtime": 0.264, + "eval_samples_per_second": 617.523, + "eval_steps_per_second": 3.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5400 + }, + { + "epoch": 301.0, + "eval_accuracy": 0.9225137278828553, + "eval_auc": 0.9504205091626904, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7572475310608474, + "eval_f1_macro": 0.855573369257207, + "eval_loss": 0.2325783669948578, + "eval_pr_auc": 0.7694562109808358, + "eval_precision": 0.7481901164620711, + "eval_precision_macro": 0.8521436908185075, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7665269267978072, + "eval_recall_macro": 0.859119079986095, + "eval_runtime": 0.2413, + "eval_samples_per_second": 675.443, + "eval_steps_per_second": 4.144, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5418 + }, + { + "epoch": 302.0, + "eval_accuracy": 0.9223611958511287, + "eval_auc": 0.9504706120673215, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.756808408982322, + "eval_f1_macro": 0.8553077347570655, + "eval_loss": 0.23251411318778992, + "eval_pr_auc": 0.7697272239104135, + "eval_precision": 0.7476400251730648, + "eval_precision_macro": 0.8518369925744038, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7662044501773622, + "eval_recall_macro": 0.8588974807173404, + "eval_runtime": 0.2708, + "eval_samples_per_second": 602.022, + "eval_steps_per_second": 3.693, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5436 + }, + { + "epoch": 303.0, + "eval_accuracy": 0.9223611958511287, + "eval_auc": 0.9504959749596038, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7571178622554477, + "eval_f1_macro": 0.8554568705510044, + "eval_loss": 0.23248492181301117, + "eval_pr_auc": 0.7698215821647963, + "eval_precision": 0.7470182046453233, + "eval_precision_macro": 0.8516367567335341, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8594217120411665, + "eval_runtime": 0.2271, + "eval_samples_per_second": 717.802, + "eval_steps_per_second": 4.404, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5454 + }, + { + "epoch": 304.0, + "eval_accuracy": 0.9228187919463087, + "eval_auc": 0.9505587106478812, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7579719387755102, + "eval_f1_macro": 0.8560304891070873, + "eval_loss": 0.23235370218753815, + "eval_pr_auc": 0.7701546218803492, + "eval_precision": 0.749605802585935, + "eval_precision_macro": 0.8528595176474563, + "eval_pred_class_0": 16497, + "eval_pred_class_1": 3171, + "eval_predicted_binding_ratio": 0.16122635753508235, + "eval_recall": 0.7665269267978072, + "eval_recall_macro": 0.8593001628616911, + "eval_runtime": 0.1849, + "eval_samples_per_second": 881.772, + "eval_steps_per_second": 5.41, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5472 + }, + { + "epoch": 305.0, + "eval_accuracy": 0.9231238560097621, + "eval_auc": 0.9506673837312365, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7583120204603581, + "eval_f1_macro": 0.8563023222011585, + "eval_loss": 0.23212042450904846, + "eval_pr_auc": 0.7706813722507476, + "eval_precision": 0.7518225039619651, + "eval_precision_macro": 0.8538377341465491, + "eval_pred_class_0": 16513, + "eval_pred_class_1": 3155, + "eval_predicted_binding_ratio": 0.1604128533658735, + "eval_recall": 0.7649145436955821, + "eval_recall_macro": 0.8588259565825047, + "eval_runtime": 0.1758, + "eval_samples_per_second": 927.397, + "eval_steps_per_second": 5.69, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5490 + }, + { + "epoch": 305.55555555555554, + "grad_norm": 15827.6396484375, + "learning_rate": 3.943376017723057e-07, + "loss": 0.1954, + "step": 5500 + }, + { + "epoch": 306.0, + "eval_accuracy": 0.9233272320520642, + "eval_auc": 0.9506862453142154, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7591823698498882, + "eval_f1_macro": 0.8567938214370079, + "eval_loss": 0.23211389780044556, + "eval_pr_auc": 0.7707434518060764, + "eval_precision": 0.7519772223979754, + "eval_precision_macro": 0.8540585209342515, + "eval_pred_class_0": 16507, + "eval_pred_class_1": 3161, + "eval_predicted_binding_ratio": 0.1607179174293268, + "eval_recall": 0.7665269267978072, + "eval_recall_macro": 0.8596019676543512, + "eval_runtime": 0.2349, + "eval_samples_per_second": 693.884, + "eval_steps_per_second": 4.257, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5508 + }, + { + "epoch": 307.0, + "eval_accuracy": 0.9230730119991865, + "eval_auc": 0.9506972041080411, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7588075880758808, + "eval_f1_macro": 0.8565232326853711, + "eval_loss": 0.23213696479797363, + "eval_pr_auc": 0.7706724583082463, + "eval_precision": 0.7503152585119798, + "eval_precision_macro": 0.8533038465207814, + "eval_pred_class_0": 16496, + "eval_pred_class_1": 3172, + "eval_predicted_binding_ratio": 0.16127720154565792, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8598442387508907, + "eval_runtime": 0.2681, + "eval_samples_per_second": 608.072, + "eval_steps_per_second": 3.731, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5526 + }, + { + "epoch": 308.0, + "eval_accuracy": 0.9229204799674599, + "eval_auc": 0.9507381389986547, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.758521822236381, + "eval_f1_macro": 0.8563315143004762, + "eval_loss": 0.23207640647888184, + "eval_pr_auc": 0.7708824410889222, + "eval_precision": 0.7494491658797607, + "eval_precision_macro": 0.8528944938003498, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7678168332795873, + "eval_recall_macro": 0.8598847551440492, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.193, + "eval_steps_per_second": 3.946, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5544 + }, + { + "epoch": 309.0, + "eval_accuracy": 0.9233272320520642, + "eval_auc": 0.9508327778185136, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7593360995850622, + "eval_f1_macro": 0.8568679288369823, + "eval_loss": 0.2318853884935379, + "eval_pr_auc": 0.7713979747932131, + "eval_precision": 0.7516587677725118, + "eval_precision_macro": 0.8539545732457663, + "eval_pred_class_0": 16503, + "eval_pred_class_1": 3165, + "eval_predicted_binding_ratio": 0.16092129347162903, + "eval_recall": 0.7671718800386972, + "eval_recall_macro": 0.8598640833162641, + "eval_runtime": 0.3164, + "eval_samples_per_second": 515.146, + "eval_steps_per_second": 3.16, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5562 + }, + { + "epoch": 310.0, + "eval_accuracy": 0.9231238560097621, + "eval_auc": 0.9508632794702453, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7587747287811104, + "eval_f1_macro": 0.8565253830188363, + "eval_loss": 0.23183651268482208, + "eval_pr_auc": 0.7715329415149417, + "eval_precision": 0.7508683296495106, + "eval_precision_macro": 0.8535264016588866, + "eval_pred_class_0": 16501, + "eval_pred_class_1": 3167, + "eval_predicted_binding_ratio": 0.16102298149278016, + "eval_recall": 0.7668494034182521, + "eval_recall_macro": 0.8596123035682436, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.635, + "eval_steps_per_second": 3.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5580 + }, + { + "epoch": 311.0, + "eval_accuracy": 0.9230730119991865, + "eval_auc": 0.9509030269959862, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7588844621513944, + "eval_f1_macro": 0.8565602855810055, + "eval_loss": 0.23182560503482819, + "eval_pr_auc": 0.7716106695707178, + "eval_precision": 0.7501575299306869, + "eval_precision_macro": 0.8532526463767658, + "eval_pred_class_0": 16494, + "eval_pred_class_1": 3174, + "eval_predicted_binding_ratio": 0.16137888956680904, + "eval_recall": 0.7678168332795873, + "eval_recall_macro": 0.8599752965818471, + "eval_runtime": 0.2638, + "eval_samples_per_second": 617.932, + "eval_steps_per_second": 3.791, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5598 + }, + { + "epoch": 312.0, + "eval_accuracy": 0.9231238560097621, + "eval_auc": 0.9509431638216853, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7591589678241478, + "eval_f1_macro": 0.8567105868221108, + "eval_loss": 0.23177149891853333, + "eval_pr_auc": 0.7718084005522707, + "eval_precision": 0.7500786905886057, + "eval_precision_macro": 0.853269895291271, + "eval_pred_class_0": 16491, + "eval_pred_class_1": 3177, + "eval_predicted_binding_ratio": 0.1615314215985357, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8602675927230261, + "eval_runtime": 0.2364, + "eval_samples_per_second": 689.439, + "eval_steps_per_second": 4.23, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5616 + }, + { + "epoch": 313.0, + "eval_accuracy": 0.9230730119991865, + "eval_auc": 0.9509867264870173, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7590380633858895, + "eval_f1_macro": 0.856634317411552, + "eval_loss": 0.23171813786029816, + "eval_pr_auc": 0.771954368377823, + "eval_precision": 0.749842668344871, + "eval_precision_macro": 0.8531505640086999, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8602374122437602, + "eval_runtime": 0.2592, + "eval_samples_per_second": 628.85, + "eval_steps_per_second": 3.858, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5634 + }, + { + "epoch": 314.0, + "eval_accuracy": 0.9228696359568843, + "eval_auc": 0.9509922740114228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7584010192705845, + "eval_f1_macro": 0.8562552937959844, + "eval_loss": 0.23173367977142334, + "eval_pr_auc": 0.7720376545527768, + "eval_precision": 0.7492133417243549, + "eval_precision_macro": 0.8527752578846153, + "eval_pred_class_0": 16490, + "eval_pred_class_1": 3178, + "eval_predicted_binding_ratio": 0.16158226560911124, + "eval_recall": 0.7678168332795873, + "eval_recall_macro": 0.8598545746647831, + "eval_runtime": 0.2664, + "eval_samples_per_second": 611.795, + "eval_steps_per_second": 3.753, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5652 + }, + { + "epoch": 315.0, + "eval_accuracy": 0.9233780760626398, + "eval_auc": 0.9510762654774227, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7595340673368438, + "eval_f1_macro": 0.8569813431425517, + "eval_loss": 0.23158977925777435, + "eval_pr_auc": 0.7725350282702966, + "eval_precision": 0.7517372078332281, + "eval_precision_macro": 0.8540227670483556, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8600253216264866, + "eval_runtime": 0.2525, + "eval_samples_per_second": 645.435, + "eval_steps_per_second": 3.96, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5670 + }, + { + "epoch": 316.0, + "eval_accuracy": 0.9236322961155176, + "eval_auc": 0.9511527434542277, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7601405301820504, + "eval_f1_macro": 0.8573636072933594, + "eval_loss": 0.23149563372135162, + "eval_pr_auc": 0.7728791764414321, + "eval_precision": 0.7529262891490035, + "eval_precision_macro": 0.8546239248495366, + "eval_pred_class_0": 16507, + "eval_pred_class_1": 3161, + "eval_predicted_binding_ratio": 0.1607179174293268, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8601762240228168, + "eval_runtime": 0.2078, + "eval_samples_per_second": 784.391, + "eval_steps_per_second": 4.812, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5688 + }, + { + "epoch": 317.0, + "eval_accuracy": 0.923581452104942, + "eval_auc": 0.9511918583675363, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7598657932577089, + "eval_f1_macro": 0.8572131820235396, + "eval_loss": 0.23143813014030457, + "eval_pr_auc": 0.7730966214358813, + "eval_precision": 0.7530082330588981, + "eval_precision_macro": 0.8546082958147307, + "eval_pred_class_0": 16510, + "eval_pred_class_1": 3158, + "eval_predicted_binding_ratio": 0.16056538539760015, + "eval_recall": 0.7668494034182521, + "eval_recall_macro": 0.8598839278816377, + "eval_runtime": 0.2098, + "eval_samples_per_second": 777.022, + "eval_steps_per_second": 4.767, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5706 + }, + { + "epoch": 318.0, + "eval_accuracy": 0.923479764083791, + "eval_auc": 0.9512240340090886, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7597765363128491, + "eval_f1_macro": 0.8571341935895835, + "eval_loss": 0.23142649233341217, + "eval_pr_auc": 0.7731516186784236, + "eval_precision": 0.7522123893805309, + "eval_precision_macro": 0.8542630051604545, + "eval_pred_class_0": 16504, + "eval_pred_class_1": 3164, + "eval_predicted_binding_ratio": 0.1608704494610535, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8600856825850187, + "eval_runtime": 0.2091, + "eval_samples_per_second": 779.633, + "eval_steps_per_second": 4.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5724 + }, + { + "epoch": 319.0, + "eval_accuracy": 0.9236322961155176, + "eval_auc": 0.9512825457928188, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7600638977635783, + "eval_f1_macro": 0.8573266640831436, + "eval_loss": 0.2313271462917328, + "eval_pr_auc": 0.7734563478045352, + "eval_precision": 0.7530864197530864, + "eval_precision_macro": 0.8546763493762101, + "eval_pred_class_0": 16509, + "eval_pred_class_1": 3159, + "eval_predicted_binding_ratio": 0.16061622940817571, + "eval_recall": 0.7671718800386972, + "eval_recall_macro": 0.8600451661918602, + "eval_runtime": 0.2154, + "eval_samples_per_second": 756.779, + "eval_steps_per_second": 4.643, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5742 + }, + { + "epoch": 320.0, + "eval_accuracy": 0.9237848281472443, + "eval_auc": 0.9513326194999532, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7605048729829046, + "eval_f1_macro": 0.8575931868618003, + "eval_loss": 0.23122872412204742, + "eval_pr_auc": 0.7736938128704516, + "eval_precision": 0.7536415452818239, + "eval_precision_macro": 0.8549855212781016, + "eval_pred_class_0": 16510, + "eval_pred_class_1": 3158, + "eval_predicted_binding_ratio": 0.16056538539760015, + "eval_recall": 0.7674943566591422, + "eval_recall_macro": 0.8602667654606148, + "eval_runtime": 0.235, + "eval_samples_per_second": 693.713, + "eval_steps_per_second": 4.256, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5760 + }, + { + "epoch": 321.0, + "eval_accuracy": 0.9236322961155176, + "eval_auc": 0.9513477438033324, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7602171136653896, + "eval_f1_macro": 0.8574005258699469, + "eval_loss": 0.2312333732843399, + "eval_pr_auc": 0.773709338696213, + "eval_precision": 0.7527663610496365, + "eval_precision_macro": 0.8545716082739852, + "eval_pred_class_0": 16505, + "eval_pred_class_1": 3163, + "eval_predicted_binding_ratio": 0.16081960545047794, + "eval_recall": 0.7678168332795873, + "eval_recall_macro": 0.8603072818537733, + "eval_runtime": 0.2276, + "eval_samples_per_second": 716.105, + "eval_steps_per_second": 4.393, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5778 + }, + { + "epoch": 322.0, + "eval_accuracy": 0.9235306080943665, + "eval_auc": 0.9513562889374167, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7602040816326531, + "eval_f1_macro": 0.8573582711574831, + "eval_loss": 0.23121465742588043, + "eval_pr_auc": 0.7737540605936648, + "eval_precision": 0.7518133081046988, + "eval_precision_macro": 0.854175430193466, + "eval_pred_class_0": 16497, + "eval_pred_class_1": 3171, + "eval_predicted_binding_ratio": 0.16122635753508235, + "eval_recall": 0.7687842631409223, + "eval_recall_macro": 0.8606400943881107, + "eval_runtime": 0.2488, + "eval_samples_per_second": 655.047, + "eval_steps_per_second": 4.019, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5796 + }, + { + "epoch": 323.0, + "eval_accuracy": 0.9236831401260931, + "eval_auc": 0.9514191998106756, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7604914632200415, + "eval_f1_macro": 0.8575507604890313, + "eval_loss": 0.23111233115196228, + "eval_pr_auc": 0.7739781495758574, + "eval_precision": 0.7526847757422616, + "eval_precision_macro": 0.8545874490758332, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8605995779949522, + "eval_runtime": 0.2355, + "eval_samples_per_second": 692.067, + "eval_steps_per_second": 4.246, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5814 + }, + { + "epoch": 324.0, + "eval_accuracy": 0.9236831401260931, + "eval_auc": 0.9514517842171841, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7604914632200415, + "eval_f1_macro": 0.8575507604890313, + "eval_loss": 0.23104801774024963, + "eval_pr_auc": 0.7741130008089699, + "eval_precision": 0.7526847757422616, + "eval_precision_macro": 0.8545874490758332, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8605995779949522, + "eval_runtime": 0.2425, + "eval_samples_per_second": 672.295, + "eval_steps_per_second": 4.125, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5832 + }, + { + "epoch": 325.0, + "eval_accuracy": 0.9237339841366687, + "eval_auc": 0.951504651151519, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7606128311522502, + "eval_f1_macro": 0.857627250169412, + "eval_loss": 0.23095941543579102, + "eval_pr_auc": 0.7744096919390852, + "eval_precision": 0.7529225908372827, + "eval_precision_macro": 0.8547076748648027, + "eval_pred_class_0": 16503, + "eval_pred_class_1": 3165, + "eval_predicted_binding_ratio": 0.16092129347162903, + "eval_recall": 0.7684617865204773, + "eval_recall_macro": 0.8606297584742182, + "eval_runtime": 0.2175, + "eval_samples_per_second": 749.374, + "eval_steps_per_second": 4.597, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5850 + }, + { + "epoch": 326.0, + "eval_accuracy": 0.9237848281472443, + "eval_auc": 0.951546208922066, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7606578317100431, + "eval_f1_macro": 0.8576669257120046, + "eval_loss": 0.23088191449642181, + "eval_pr_auc": 0.774641356281408, + "eval_precision": 0.7533206831119544, + "eval_precision_macro": 0.8548803827531177, + "eval_pred_class_0": 16506, + "eval_pred_class_1": 3162, + "eval_predicted_binding_ratio": 0.16076876143990237, + "eval_recall": 0.7681393099000322, + "eval_recall_macro": 0.8605288811225278, + "eval_runtime": 0.2627, + "eval_samples_per_second": 620.56, + "eval_steps_per_second": 3.807, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5868 + }, + { + "epoch": 327.0, + "eval_accuracy": 0.9237848281472443, + "eval_auc": 0.9515728175742149, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7608105951811074, + "eval_f1_macro": 0.8577405662711912, + "eval_loss": 0.23083868622779846, + "eval_pr_auc": 0.7747608129205691, + "eval_precision": 0.7530006317119393, + "eval_precision_macro": 0.8547756764183257, + "eval_pred_class_0": 16502, + "eval_pred_class_1": 3166, + "eval_predicted_binding_ratio": 0.1609721374822046, + "eval_recall": 0.7687842631409223, + "eval_recall_macro": 0.8607909967844407, + "eval_runtime": 0.2492, + "eval_samples_per_second": 654.065, + "eval_steps_per_second": 4.013, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5886 + }, + { + "epoch": 328.0, + "eval_accuracy": 0.9238356721578198, + "eval_auc": 0.9516116113150578, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7611607142857143, + "eval_f1_macro": 0.8579273206076528, + "eval_loss": 0.23078228533267975, + "eval_pr_auc": 0.7749744562806883, + "eval_precision": 0.7527593818984547, + "eval_precision_macro": 0.8547393927131844, + "eval_pred_class_0": 16497, + "eval_pred_class_1": 3171, + "eval_predicted_binding_ratio": 0.16122635753508235, + "eval_recall": 0.7697516930022573, + "eval_recall_macro": 0.8612143507565763, + "eval_runtime": 0.2121, + "eval_samples_per_second": 768.541, + "eval_steps_per_second": 4.715, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5904 + }, + { + "epoch": 329.0, + "eval_accuracy": 0.9238865161683953, + "eval_auc": 0.9516614027797223, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7614342629482072, + "eval_f1_macro": 0.8580771629311073, + "eval_loss": 0.2307167798280716, + "eval_pr_auc": 0.7752067172424865, + "eval_precision": 0.7526780088216761, + "eval_precision_macro": 0.8547553982510223, + "eval_pred_class_0": 16494, + "eval_pred_class_1": 3174, + "eval_predicted_binding_ratio": 0.16137888956680904, + "eval_recall": 0.7703966462431474, + "eval_recall_macro": 0.8615066468977552, + "eval_runtime": 0.2538, + "eval_samples_per_second": 642.134, + "eval_steps_per_second": 3.939, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5922 + }, + { + "epoch": 330.0, + "eval_accuracy": 0.9236322961155176, + "eval_auc": 0.9516455582714203, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7612841703750794, + "eval_f1_macro": 0.857914812460267, + "eval_loss": 0.23076769709587097, + "eval_pr_auc": 0.775112377066796, + "eval_precision": 0.750548417424005, + "eval_precision_macro": 0.8538504058352652, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7723315059658175, + "eval_recall_macro": 0.8621420914871643, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.508, + "eval_steps_per_second": 3.868, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5940 + }, + { + "epoch": 331.0, + "eval_accuracy": 0.9240390482001221, + "eval_auc": 0.9516963035209824, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7621776504297995, + "eval_f1_macro": 0.8584894423868002, + "eval_loss": 0.23067235946655273, + "eval_pr_auc": 0.7753820734835624, + "eval_precision": 0.7525935240490412, + "eval_precision_macro": 0.8548556265844769, + "eval_pred_class_0": 16487, + "eval_pred_class_1": 3181, + "eval_predicted_binding_ratio": 0.1617347976408379, + "eval_recall": 0.7720090293453724, + "eval_recall_macro": 0.8622524774903357, + "eval_runtime": 0.1824, + "eval_samples_per_second": 893.499, + "eval_steps_per_second": 5.482, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5958 + }, + { + "epoch": 332.0, + "eval_accuracy": 0.9240390482001221, + "eval_auc": 0.9517492385828104, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7619502868068834, + "eval_f1_macro": 0.8583798620967267, + "eval_loss": 0.23056790232658386, + "eval_pr_auc": 0.775607049366595, + "eval_precision": 0.7530708661417322, + "eval_precision_macro": 0.8550111500417023, + "eval_pred_class_0": 16493, + "eval_pred_class_1": 3175, + "eval_predicted_binding_ratio": 0.16142973357738458, + "eval_recall": 0.7710415994840374, + "eval_recall_macro": 0.8618593039974662, + "eval_runtime": 0.2665, + "eval_samples_per_second": 611.679, + "eval_steps_per_second": 3.753, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5976 + }, + { + "epoch": 333.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9517777256072577, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7621475227019276, + "eval_f1_macro": 0.8584929210351648, + "eval_loss": 0.23053352534770966, + "eval_pr_auc": 0.7757600766000483, + "eval_precision": 0.7531486146095718, + "eval_precision_macro": 0.855079036870636, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7713640761044824, + "eval_recall_macro": 0.8620205423076888, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.883, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5994 + }, + { + "epoch": 333.3333333333333, + "grad_norm": 16736.6328125, + "learning_rate": 3.021381973636964e-07, + "loss": 0.1913, + "step": 6000 + }, + { + "epoch": 334.0, + "eval_accuracy": 0.9237848281472443, + "eval_auc": 0.9517933365355851, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7616473207187152, + "eval_f1_macro": 0.8581438407085573, + "eval_loss": 0.23052088916301727, + "eval_pr_auc": 0.7758187649274527, + "eval_precision": 0.751254705144291, + "eval_precision_macro": 0.8542074496595242, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7723315059658175, + "eval_recall_macro": 0.8622326329249622, + "eval_runtime": 0.2573, + "eval_samples_per_second": 633.473, + "eval_steps_per_second": 3.886, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6012 + }, + { + "epoch": 335.0, + "eval_accuracy": 0.9238356721578198, + "eval_auc": 0.9518365877609504, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7619198982835347, + "eval_f1_macro": 0.8582932017746205, + "eval_loss": 0.23045583069324493, + "eval_pr_auc": 0.7760158372270667, + "eval_precision": 0.7511751801942964, + "eval_precision_macro": 0.8542244778801185, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8625249290661412, + "eval_runtime": 0.219, + "eval_samples_per_second": 744.186, + "eval_steps_per_second": 4.566, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6030 + }, + { + "epoch": 336.0, + "eval_accuracy": 0.9241915802318487, + "eval_auc": 0.9518931822423861, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7624661462482077, + "eval_f1_macro": 0.8586824817571539, + "eval_loss": 0.23035065829753876, + "eval_pr_auc": 0.7762652864261658, + "eval_precision": 0.753463476070529, + "eval_precision_macro": 0.855266785330923, + "eval_pred_class_0": 16492, + "eval_pred_class_1": 3176, + "eval_predicted_binding_ratio": 0.16148057758796014, + "eval_recall": 0.7716865527249275, + "eval_recall_macro": 0.8622119610971773, + "eval_runtime": 0.2363, + "eval_samples_per_second": 689.881, + "eval_steps_per_second": 4.232, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6048 + }, + { + "epoch": 337.0, + "eval_accuracy": 0.9240390482001221, + "eval_auc": 0.9519021264089276, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7623289850461342, + "eval_f1_macro": 0.8585623745200415, + "eval_loss": 0.2303379327058792, + "eval_pr_auc": 0.7763353590359, + "eval_precision": 0.752276295133438, + "eval_precision_macro": 0.8547524774823897, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7726539825862625, + "eval_recall_macro": 0.8625145931522488, + "eval_runtime": 0.2467, + "eval_samples_per_second": 660.828, + "eval_steps_per_second": 4.054, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6066 + }, + { + "epoch": 338.0, + "eval_accuracy": 0.9236831401260931, + "eval_auc": 0.9519062432559864, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7616325234238527, + "eval_f1_macro": 0.8581006831532533, + "eval_loss": 0.23036180436611176, + "eval_pr_auc": 0.7763420780879606, + "eval_precision": 0.7503128911138923, + "eval_precision_macro": 0.8538172032062905, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7732989358271525, + "eval_recall_macro": 0.8625654454592997, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.319, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6084 + }, + { + "epoch": 339.0, + "eval_accuracy": 0.9237339841366687, + "eval_auc": 0.9519363653402588, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7618291521117815, + "eval_f1_macro": 0.8582134440261069, + "eval_loss": 0.23031854629516602, + "eval_pr_auc": 0.776475073481046, + "eval_precision": 0.7503909915545824, + "eval_precision_macro": 0.8538853142461151, + "eval_pred_class_0": 16471, + "eval_pred_class_1": 3197, + "eval_predicted_binding_ratio": 0.16254830181004679, + "eval_recall": 0.7736214124475975, + "eval_recall_macro": 0.8627266837695222, + "eval_runtime": 0.2533, + "eval_samples_per_second": 643.603, + "eval_steps_per_second": 3.948, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6102 + }, + { + "epoch": 340.0, + "eval_accuracy": 0.9236831401260931, + "eval_auc": 0.9519665750170216, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7616325234238527, + "eval_f1_macro": 0.8581006831532533, + "eval_loss": 0.23026354610919952, + "eval_pr_auc": 0.7766276763039114, + "eval_precision": 0.7503128911138923, + "eval_precision_macro": 0.8538172032062905, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7732989358271525, + "eval_recall_macro": 0.8625654454592997, + "eval_runtime": 0.2621, + "eval_samples_per_second": 621.893, + "eval_steps_per_second": 3.815, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6120 + }, + { + "epoch": 341.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9520129601070512, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7625258469858438, + "eval_f1_macro": 0.8586752506435165, + "eval_loss": 0.230192169547081, + "eval_pr_auc": 0.7768138361852162, + "eval_precision": 0.7523540489642184, + "eval_precision_macro": 0.8548203930053466, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8626758314624713, + "eval_runtime": 0.2426, + "eval_samples_per_second": 671.971, + "eval_steps_per_second": 4.123, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6138 + }, + { + "epoch": 342.0, + "eval_accuracy": 0.9239882041895465, + "eval_auc": 0.9520488340982072, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7622833518842423, + "eval_f1_macro": 0.8585223761569667, + "eval_loss": 0.2301386296749115, + "eval_pr_auc": 0.7769598633905366, + "eval_precision": 0.7518820577164367, + "eval_precision_macro": 0.8545818055572474, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8626154705039393, + "eval_runtime": 0.2473, + "eval_samples_per_second": 659.185, + "eval_steps_per_second": 4.044, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6156 + }, + { + "epoch": 343.0, + "eval_accuracy": 0.9241407362212731, + "eval_auc": 0.952100825107636, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7626471524021635, + "eval_f1_macro": 0.8587517153841377, + "eval_loss": 0.23005619645118713, + "eval_pr_auc": 0.7772456933395511, + "eval_precision": 0.7525902668759812, + "eval_precision_macro": 0.8549397976374689, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8627060119417373, + "eval_runtime": 0.2129, + "eval_samples_per_second": 765.55, + "eval_steps_per_second": 4.697, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6174 + }, + { + "epoch": 344.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9521250006350455, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7625258469858438, + "eval_f1_macro": 0.8586752506435165, + "eval_loss": 0.23000310361385345, + "eval_pr_auc": 0.7773917675410515, + "eval_precision": 0.7523540489642184, + "eval_precision_macro": 0.8548203930053466, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8626758314624713, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.276, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6192 + }, + { + "epoch": 345.0, + "eval_accuracy": 0.9241915802318487, + "eval_auc": 0.9521700231752211, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7627684964200477, + "eval_f1_macro": 0.8588281984687149, + "eval_loss": 0.22992061078548431, + "eval_pr_auc": 0.777567865132197, + "eval_precision": 0.7528266331658291, + "eval_precision_macro": 0.8550592763014295, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8627361924210033, + "eval_runtime": 0.2684, + "eval_samples_per_second": 607.371, + "eval_steps_per_second": 3.726, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6210 + }, + { + "epoch": 346.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9521951914175242, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7624502784407319, + "eval_f1_macro": 0.8586388332084449, + "eval_loss": 0.22986458241939545, + "eval_pr_auc": 0.7777181268262345, + "eval_precision": 0.7525125628140703, + "eval_precision_macro": 0.8548719086819685, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7726539825862625, + "eval_recall_macro": 0.8625447736315148, + "eval_runtime": 0.2315, + "eval_samples_per_second": 704.221, + "eval_steps_per_second": 4.32, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6228 + }, + { + "epoch": 347.0, + "eval_accuracy": 0.9240898922106976, + "eval_auc": 0.9522022182817713, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7625258469858438, + "eval_f1_macro": 0.8586752506435165, + "eval_loss": 0.22987791895866394, + "eval_pr_auc": 0.7777283636078421, + "eval_precision": 0.7523540489642184, + "eval_precision_macro": 0.8548203930053466, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7729764592067075, + "eval_recall_macro": 0.8626758314624713, + "eval_runtime": 0.2021, + "eval_samples_per_second": 806.414, + "eval_steps_per_second": 4.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6246 + }, + { + "epoch": 348.0, + "eval_accuracy": 0.9243441122635754, + "eval_auc": 0.9522300142987927, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7634340222575516, + "eval_f1_macro": 0.8592029398342167, + "eval_loss": 0.229818195104599, + "eval_pr_auc": 0.7778254023800715, + "eval_precision": 0.7529005957980558, + "eval_precision_macro": 0.8552111450378106, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7742663656884876, + "eval_recall_macro": 0.8633509651826273, + "eval_runtime": 0.2422, + "eval_samples_per_second": 672.887, + "eval_steps_per_second": 4.128, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6264 + }, + { + "epoch": 349.0, + "eval_accuracy": 0.9243949562741509, + "eval_auc": 0.9522554939810626, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763780778395552, + "eval_f1_macro": 0.8593880436271214, + "eval_loss": 0.22978660464286804, + "eval_pr_auc": 0.7779049389173774, + "eval_precision": 0.7526612398246713, + "eval_precision_macro": 0.8551760733541227, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7752337955498226, + "eval_recall_macro": 0.8637743191547629, + "eval_runtime": 0.237, + "eval_samples_per_second": 687.696, + "eval_steps_per_second": 4.219, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6282 + }, + { + "epoch": 350.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9522950273918264, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7638270820089001, + "eval_f1_macro": 0.859428369717681, + "eval_loss": 0.2297380119562149, + "eval_pr_auc": 0.7780796039517833, + "eval_precision": 0.7530554685051708, + "eval_precision_macro": 0.855346694014678, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7749113189293776, + "eval_recall_macro": 0.8636734418030723, + "eval_runtime": 0.251, + "eval_samples_per_second": 649.348, + "eval_steps_per_second": 3.984, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6300 + }, + { + "epoch": 351.0, + "eval_accuracy": 0.9242932682529998, + "eval_auc": 0.9523093341652933, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7636883034438978, + "eval_f1_macro": 0.8593074482256571, + "eval_loss": 0.22972844541072845, + "eval_pr_auc": 0.7781517759374512, + "eval_precision": 0.751875, + "eval_precision_macro": 0.8548359697595336, + "eval_pred_class_0": 16468, + "eval_pred_class_1": 3200, + "eval_predicted_binding_ratio": 0.16270083384177345, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8639760738581439, + "eval_runtime": 0.2607, + "eval_samples_per_second": 625.342, + "eval_steps_per_second": 3.836, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6318 + }, + { + "epoch": 352.0, + "eval_accuracy": 0.9242424242424242, + "eval_auc": 0.952317528929415, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7635671215487146, + "eval_f1_macro": 0.8592310391299909, + "eval_loss": 0.22972537577152252, + "eval_pr_auc": 0.7781868067856106, + "eval_precision": 0.7516401124648547, + "eval_precision_macro": 0.8547172445484534, + "eval_pred_class_0": 16467, + "eval_pred_class_1": 3201, + "eval_predicted_binding_ratio": 0.16275167785234898, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8639458933788778, + "eval_runtime": 0.234, + "eval_samples_per_second": 696.7, + "eval_steps_per_second": 4.274, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6336 + }, + { + "epoch": 353.0, + "eval_accuracy": 0.9243441122635754, + "eval_auc": 0.9523571596651685, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7638095238095238, + "eval_f1_macro": 0.8593838755989138, + "eval_loss": 0.2296588122844696, + "eval_pr_auc": 0.7784014772150735, + "eval_precision": 0.7521100343857455, + "eval_precision_macro": 0.8549547682402951, + "eval_pred_class_0": 16469, + "eval_pred_class_1": 3199, + "eval_predicted_binding_ratio": 0.16264998983119788, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8640062543374099, + "eval_runtime": 0.2433, + "eval_samples_per_second": 669.953, + "eval_steps_per_second": 4.11, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6354 + }, + { + "epoch": 354.0, + "eval_accuracy": 0.9243441122635754, + "eval_auc": 0.952381568772553, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7638095238095238, + "eval_f1_macro": 0.8593838755989138, + "eval_loss": 0.22961482405662537, + "eval_pr_auc": 0.7785102930543456, + "eval_precision": 0.7521100343857455, + "eval_precision_macro": 0.8549547682402951, + "eval_pred_class_0": 16469, + "eval_pred_class_1": 3199, + "eval_predicted_binding_ratio": 0.16264998983119788, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8640062543374099, + "eval_runtime": 0.2671, + "eval_samples_per_second": 610.236, + "eval_steps_per_second": 3.744, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6372 + }, + { + "epoch": 355.0, + "eval_accuracy": 0.9242932682529998, + "eval_auc": 0.9523906199965831, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763838223632038, + "eval_f1_macro": 0.8593796791618457, + "eval_loss": 0.2295987904071808, + "eval_pr_auc": 0.7785825605072106, + "eval_precision": 0.7515605493133583, + "eval_precision_macro": 0.8547343562893321, + "eval_pred_class_0": 16464, + "eval_pred_class_1": 3204, + "eval_predicted_binding_ratio": 0.16290420988407567, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.8642381895200568, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.418, + "eval_steps_per_second": 3.849, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6390 + }, + { + "epoch": 356.0, + "eval_accuracy": 0.9243949562741509, + "eval_auc": 0.9524596428791869, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7636305833730727, + "eval_f1_macro": 0.8593156699585895, + "eval_loss": 0.229468435049057, + "eval_pr_auc": 0.7789122838326235, + "eval_precision": 0.7529780564263323, + "eval_precision_macro": 0.8552789299002641, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7745888423089327, + "eval_recall_macro": 0.8635122034928499, + "eval_runtime": 0.2641, + "eval_samples_per_second": 617.191, + "eval_steps_per_second": 3.786, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6408 + }, + { + "epoch": 357.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9524861542063461, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763751987281399, + "eval_f1_macro": 0.8593921831946546, + "eval_loss": 0.2294115126132965, + "eval_pr_auc": 0.7790142584142796, + "eval_precision": 0.7532141737221699, + "eval_precision_macro": 0.8553982756468123, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7745888423089327, + "eval_recall_macro": 0.8635423839721159, + "eval_runtime": 0.2557, + "eval_samples_per_second": 637.426, + "eval_steps_per_second": 3.911, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6426 + }, + { + "epoch": 358.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9524829619466881, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7639771283354511, + "eval_f1_macro": 0.8595006707052558, + "eval_loss": 0.2294154018163681, + "eval_pr_auc": 0.7789936192429844, + "eval_precision": 0.7527386541471048, + "eval_precision_macro": 0.8552438490185533, + "eval_pred_class_0": 16473, + "eval_pred_class_1": 3195, + "eval_predicted_binding_ratio": 0.16244661378889566, + "eval_recall": 0.7755562721702677, + "eval_recall_macro": 0.8639355574649854, + "eval_runtime": 0.2445, + "eval_samples_per_second": 666.738, + "eval_steps_per_second": 4.09, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6444 + }, + { + "epoch": 359.0, + "eval_accuracy": 0.9243949562741509, + "eval_auc": 0.9525149624032593, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763780778395552, + "eval_f1_macro": 0.8593880436271214, + "eval_loss": 0.2293538749217987, + "eval_pr_auc": 0.7791661873069065, + "eval_precision": 0.7526612398246713, + "eval_precision_macro": 0.8551760733541227, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7752337955498226, + "eval_recall_macro": 0.8637743191547629, + "eval_runtime": 0.2637, + "eval_samples_per_second": 618.132, + "eval_steps_per_second": 3.792, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6462 + }, + { + "epoch": 360.0, + "eval_accuracy": 0.9243949562741509, + "eval_auc": 0.9525288798767679, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763780778395552, + "eval_f1_macro": 0.8593880436271214, + "eval_loss": 0.22932648658752441, + "eval_pr_auc": 0.7792072068588576, + "eval_precision": 0.7526612398246713, + "eval_precision_macro": 0.8551760733541227, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7752337955498226, + "eval_recall_macro": 0.8637743191547629, + "eval_runtime": 0.2714, + "eval_samples_per_second": 600.631, + "eval_steps_per_second": 3.685, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6480 + }, + { + "epoch": 361.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9525697758373857, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.763751987281399, + "eval_f1_macro": 0.8593921831946546, + "eval_loss": 0.22925521433353424, + "eval_pr_auc": 0.77936321808712, + "eval_precision": 0.7532141737221699, + "eval_precision_macro": 0.8553982756468123, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7745888423089327, + "eval_recall_macro": 0.8635423839721159, + "eval_runtime": 0.249, + "eval_samples_per_second": 654.66, + "eval_steps_per_second": 4.016, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6498 + }, + { + "epoch": 361.1111111111111, + "grad_norm": 21180.3203125, + "learning_rate": 2.1735650901333336e-07, + "loss": 0.1893, + "step": 6500 + }, + { + "epoch": 362.0, + "eval_accuracy": 0.9244458002847264, + "eval_auc": 0.9526172898972942, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7636016544702513, + "eval_f1_macro": 0.8593197379764267, + "eval_loss": 0.22917793691158295, + "eval_pr_auc": 0.7795955328857882, + "eval_precision": 0.7535321821036107, + "eval_precision_macro": 0.8555017581027062, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7739438890680426, + "eval_recall_macro": 0.8632802683102028, + "eval_runtime": 0.208, + "eval_samples_per_second": 783.548, + "eval_steps_per_second": 4.807, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6516 + }, + { + "epoch": 363.0, + "eval_accuracy": 0.924496644295302, + "eval_auc": 0.9526382926300437, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7636479388826993, + "eval_f1_macro": 0.8593600478608577, + "eval_loss": 0.22912409901618958, + "eval_pr_auc": 0.7796869947527124, + "eval_precision": 0.7539283469516027, + "eval_precision_macro": 0.8556733812884909, + "eval_pred_class_0": 16486, + "eval_pred_class_1": 3182, + "eval_predicted_binding_ratio": 0.16178564165141346, + "eval_recall": 0.7736214124475975, + "eval_recall_macro": 0.8631793909585124, + "eval_runtime": 0.2657, + "eval_samples_per_second": 613.513, + "eval_steps_per_second": 3.764, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6534 + }, + { + "epoch": 364.0, + "eval_accuracy": 0.924496644295302, + "eval_auc": 0.952662049659998, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7637231503579952, + "eval_f1_macro": 0.859396294249525, + "eval_loss": 0.22910362482070923, + "eval_pr_auc": 0.779788701256993, + "eval_precision": 0.7537688442211056, + "eval_precision_macro": 0.8556213791598126, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7739438890680426, + "eval_recall_macro": 0.8633104487894689, + "eval_runtime": 0.2607, + "eval_samples_per_second": 625.175, + "eval_steps_per_second": 3.835, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6552 + }, + { + "epoch": 365.0, + "eval_accuracy": 0.9245474883058775, + "eval_auc": 0.9526903420344663, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7638446849140674, + "eval_f1_macro": 0.8594728689002142, + "eval_loss": 0.2290574461221695, + "eval_pr_auc": 0.7799354479263911, + "eval_precision": 0.7540056550424128, + "eval_precision_macro": 0.8557410744123195, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7739438890680426, + "eval_recall_macro": 0.8633406292687349, + "eval_runtime": 0.2049, + "eval_samples_per_second": 795.329, + "eval_steps_per_second": 4.879, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6570 + }, + { + "epoch": 366.0, + "eval_accuracy": 0.9245474883058775, + "eval_auc": 0.9527021767531981, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7640699523052464, + "eval_f1_macro": 0.8595814265550925, + "eval_loss": 0.22903695702552795, + "eval_pr_auc": 0.7799769457420497, + "eval_precision": 0.753527751646284, + "eval_precision_macro": 0.8555854062558139, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7749113189293776, + "eval_recall_macro": 0.8637338027616044, + "eval_runtime": 0.2118, + "eval_samples_per_second": 769.509, + "eval_steps_per_second": 4.721, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6588 + }, + { + "epoch": 367.0, + "eval_accuracy": 0.9247000203376042, + "eval_auc": 0.9527121622971282, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.764659145081837, + "eval_f1_macro": 0.8599193797618125, + "eval_loss": 0.22902432084083557, + "eval_pr_auc": 0.7800307850119022, + "eval_precision": 0.7537593984962406, + "eval_precision_macro": 0.8557884149558164, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.864217517692272, + "eval_runtime": 0.2519, + "eval_samples_per_second": 647.072, + "eval_steps_per_second": 3.97, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6606 + }, + { + "epoch": 368.0, + "eval_accuracy": 0.9248017083587553, + "eval_auc": 0.9527291941703031, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7649769585253456, + "eval_f1_macro": 0.8601085500794873, + "eval_loss": 0.22899393737316132, + "eval_pr_auc": 0.7800749822284204, + "eval_precision": 0.7540726817042607, + "eval_precision_macro": 0.85597540373147, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8644089364817604, + "eval_runtime": 0.2331, + "eval_samples_per_second": 699.217, + "eval_steps_per_second": 4.29, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6624 + }, + { + "epoch": 369.0, + "eval_accuracy": 0.9248525523693308, + "eval_auc": 0.9527588782921224, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7650238473767885, + "eval_f1_macro": 0.8601491566364061, + "eval_loss": 0.22894835472106934, + "eval_pr_auc": 0.780237083741907, + "eval_precision": 0.7544684854186265, + "eval_precision_macro": 0.856146798082819, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.86430805913007, + "eval_runtime": 0.2655, + "eval_samples_per_second": 613.902, + "eval_steps_per_second": 3.766, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6642 + }, + { + "epoch": 370.0, + "eval_accuracy": 0.9248525523693308, + "eval_auc": 0.9527729125556185, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7650238473767885, + "eval_f1_macro": 0.8601491566364061, + "eval_loss": 0.22892294824123383, + "eval_pr_auc": 0.7803195480022762, + "eval_precision": 0.7544684854186265, + "eval_precision_macro": 0.856146798082819, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.86430805913007, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.873, + "eval_steps_per_second": 3.852, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6660 + }, + { + "epoch": 371.0, + "eval_accuracy": 0.9248525523693308, + "eval_auc": 0.9527883872289603, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7650985378258105, + "eval_f1_macro": 0.8601851483463878, + "eval_loss": 0.22889479994773865, + "eval_pr_auc": 0.7804143746656889, + "eval_precision": 0.7543089940457537, + "eval_precision_macro": 0.8560948381043845, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8644391169610264, + "eval_runtime": 0.2368, + "eval_samples_per_second": 688.489, + "eval_steps_per_second": 4.224, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6678 + }, + { + "epoch": 372.0, + "eval_accuracy": 0.924954240390482, + "eval_auc": 0.9528159885960027, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7653418124006359, + "eval_f1_macro": 0.860338399996844, + "eval_loss": 0.22885586321353912, + "eval_pr_auc": 0.7805625189044384, + "eval_precision": 0.7547820633427407, + "eval_precision_macro": 0.8563339286918206, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8644994779195585, + "eval_runtime": 0.1849, + "eval_samples_per_second": 881.352, + "eval_steps_per_second": 5.407, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6696 + }, + { + "epoch": 373.0, + "eval_accuracy": 0.9250559284116331, + "eval_auc": 0.9528519015171544, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7655852417302799, + "eval_f1_macro": 0.8604917251982311, + "eval_loss": 0.22878196835517883, + "eval_pr_auc": 0.7807742707975688, + "eval_precision": 0.7552557263884531, + "eval_precision_macro": 0.8565733155332836, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8645598388780904, + "eval_runtime": 0.2727, + "eval_samples_per_second": 597.724, + "eval_steps_per_second": 3.667, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6714 + }, + { + "epoch": 374.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9528673372605004, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7657070144743121, + "eval_f1_macro": 0.8605684154038177, + "eval_loss": 0.22877708077430725, + "eval_pr_auc": 0.7808321396342274, + "eval_precision": 0.7554927809165097, + "eval_precision_macro": 0.85669312022406, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8645900193573565, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.842, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6732 + }, + { + "epoch": 375.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9528682131854067, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7659303988558716, + "eval_f1_macro": 0.8606760610325117, + "eval_loss": 0.2287902534008026, + "eval_pr_auc": 0.7808487230478494, + "eval_precision": 0.7550125313283208, + "eval_precision_macro": 0.8565363700584309, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8649831928502261, + "eval_runtime": 0.1856, + "eval_samples_per_second": 878.251, + "eval_steps_per_second": 5.388, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6750 + }, + { + "epoch": 376.0, + "eval_accuracy": 0.9250050844010576, + "eval_auc": 0.9528953279275011, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7655380702591003, + "eval_f1_macro": 0.8604509839871686, + "eval_loss": 0.228745236992836, + "eval_pr_auc": 0.7809833435589818, + "eval_precision": 0.754858934169279, + "eval_precision_macro": 0.8564014297014619, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864660716229781, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.614, + "eval_steps_per_second": 3.881, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6768 + }, + { + "epoch": 377.0, + "eval_accuracy": 0.9248017083587553, + "eval_auc": 0.9529040482465667, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7651262505955216, + "eval_f1_macro": 0.8601804865980422, + "eval_loss": 0.2287396788597107, + "eval_pr_auc": 0.781048839864553, + "eval_precision": 0.7537546933667084, + "eval_precision_macro": 0.8558720042841312, + "eval_pred_class_0": 16472, + "eval_pred_class_1": 3196, + "eval_predicted_binding_ratio": 0.16249745779947122, + "eval_recall": 0.7768461786520477, + "eval_recall_macro": 0.8646710521436733, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.891, + "eval_steps_per_second": 3.901, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6786 + }, + { + "epoch": 378.0, + "eval_accuracy": 0.9248525523693308, + "eval_auc": 0.9529144620204507, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7653968253968254, + "eval_f1_macro": 0.8603288764349426, + "eval_loss": 0.22873102128505707, + "eval_pr_auc": 0.7810643100928254, + "eval_precision": 0.7536730228196311, + "eval_precision_macro": 0.8558880628094148, + "eval_pred_class_0": 16469, + "eval_pred_class_1": 3199, + "eval_predicted_binding_ratio": 0.16264998983119788, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8649633482848524, + "eval_runtime": 0.1843, + "eval_samples_per_second": 884.276, + "eval_steps_per_second": 5.425, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6804 + }, + { + "epoch": 379.0, + "eval_accuracy": 0.924954240390482, + "eval_auc": 0.9529275424990492, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7657142857142857, + "eval_f1_macro": 0.8605178766021483, + "eval_loss": 0.22870197892189026, + "eval_pr_auc": 0.7811376412515475, + "eval_precision": 0.7539856205064083, + "eval_precision_macro": 0.8560747217232387, + "eval_pred_class_0": 16469, + "eval_pred_class_1": 3199, + "eval_predicted_binding_ratio": 0.16264998983119788, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8651547670743409, + "eval_runtime": 0.2724, + "eval_samples_per_second": 598.424, + "eval_steps_per_second": 3.671, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6822 + }, + { + "epoch": 380.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9529545793811519, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7660047656870532, + "eval_f1_macro": 0.8607118952674847, + "eval_loss": 0.22864677011966705, + "eval_pr_auc": 0.7812048860219004, + "eval_precision": 0.7548528490920476, + "eval_precision_macro": 0.8564843339790698, + "eval_pred_class_0": 16474, + "eval_pred_class_1": 3194, + "eval_predicted_binding_ratio": 0.16239576977832013, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8651142506811824, + "eval_runtime": 0.1856, + "eval_samples_per_second": 878.062, + "eval_steps_per_second": 5.387, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6840 + }, + { + "epoch": 381.0, + "eval_accuracy": 0.9250559284116331, + "eval_auc": 0.9529760687388493, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7658087067047982, + "eval_f1_macro": 0.8605994081311654, + "eval_loss": 0.2286224663257599, + "eval_pr_auc": 0.7813066136655398, + "eval_precision": 0.7547760726589414, + "eval_precision_macro": 0.8564168678924449, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.86495301237096, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.403, + "eval_steps_per_second": 3.966, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6858 + }, + { + "epoch": 382.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9530162639595422, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7656324582338903, + "eval_f1_macro": 0.8605324858111449, + "eval_loss": 0.22855480015277863, + "eval_pr_auc": 0.7815011919374028, + "eval_precision": 0.7556532663316583, + "eval_precision_macro": 0.856745584876579, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8644589615264, + "eval_runtime": 0.267, + "eval_samples_per_second": 610.397, + "eval_steps_per_second": 3.745, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6876 + }, + { + "epoch": 383.0, + "eval_accuracy": 0.9251576164327843, + "eval_auc": 0.9530368384623376, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7658288259624563, + "eval_f1_macro": 0.8606451240251011, + "eval_loss": 0.22851014137268066, + "eval_pr_auc": 0.7815937686262128, + "eval_precision": 0.7557299843014129, + "eval_precision_macro": 0.8568129991882603, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646201998366225, + "eval_runtime": 0.2682, + "eval_samples_per_second": 607.766, + "eval_steps_per_second": 3.729, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6894 + }, + { + "epoch": 384.0, + "eval_accuracy": 0.9251576164327843, + "eval_auc": 0.9530639240069352, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7658288259624563, + "eval_f1_macro": 0.8606451240251011, + "eval_loss": 0.22846660017967224, + "eval_pr_auc": 0.7817366980630457, + "eval_precision": 0.7557299843014129, + "eval_precision_macro": 0.8568129991882603, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646201998366225, + "eval_runtime": 0.2509, + "eval_samples_per_second": 649.734, + "eval_steps_per_second": 3.986, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6912 + }, + { + "epoch": 385.0, + "eval_accuracy": 0.9251067724222086, + "eval_auc": 0.9530865131370146, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7656324582338903, + "eval_f1_macro": 0.8605324858111449, + "eval_loss": 0.22843268513679504, + "eval_pr_auc": 0.7818405401720232, + "eval_precision": 0.7556532663316583, + "eval_precision_macro": 0.856745584876579, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8644589615264, + "eval_runtime": 0.2627, + "eval_samples_per_second": 620.559, + "eval_steps_per_second": 3.807, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6930 + }, + { + "epoch": 386.0, + "eval_accuracy": 0.9251576164327843, + "eval_auc": 0.9530828926474026, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7659033078880407, + "eval_f1_macro": 0.8606810172942987, + "eval_loss": 0.22846029698848724, + "eval_pr_auc": 0.781765942321457, + "eval_precision": 0.7555695010982115, + "eval_precision_macro": 0.8567605408530922, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864751257667579, + "eval_runtime": 0.2616, + "eval_samples_per_second": 623.151, + "eval_steps_per_second": 3.823, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6948 + }, + { + "epoch": 387.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9530829315773984, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7665184243964421, + "eval_f1_macro": 0.8610134494863566, + "eval_loss": 0.22847168147563934, + "eval_pr_auc": 0.7818060776753192, + "eval_precision": 0.7552425665101722, + "eval_precision_macro": 0.8567386267869261, + "eval_pred_class_0": 16473, + "eval_pred_class_1": 3195, + "eval_predicted_binding_ratio": 0.16244661378889566, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8654669077808935, + "eval_runtime": 0.2089, + "eval_samples_per_second": 780.429, + "eval_steps_per_second": 4.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6966 + }, + { + "epoch": 388.0, + "eval_accuracy": 0.9254118364856619, + "eval_auc": 0.953101403860419, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7668838391863976, + "eval_f1_macro": 0.861243571985536, + "eval_loss": 0.2284410148859024, + "eval_pr_auc": 0.7818375459402719, + "eval_precision": 0.7559523809523809, + "eval_precision_macro": 0.8570973363853918, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8655574492186915, + "eval_runtime": 0.2357, + "eval_samples_per_second": 691.702, + "eval_steps_per_second": 4.244, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6984 + }, + { + "epoch": 388.8888888888889, + "grad_norm": 17393.9921875, + "learning_rate": 1.4317094954644378e-07, + "loss": 0.1876, + "step": 7000 + }, + { + "epoch": 389.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9531112726143616, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671537484116899, + "eval_f1_macro": 0.8613916441816318, + "eval_loss": 0.22843530774116516, + "eval_pr_auc": 0.7818710932290109, + "eval_precision": 0.755868544600939, + "eval_precision_macro": 0.8571123212290193, + "eval_pred_class_0": 16473, + "eval_pred_class_1": 3195, + "eval_predicted_binding_ratio": 0.16244661378889566, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8658497453598706, + "eval_runtime": 0.2628, + "eval_samples_per_second": 620.309, + "eval_steps_per_second": 3.806, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7002 + }, + { + "epoch": 390.0, + "eval_accuracy": 0.9253101484645109, + "eval_auc": 0.9531541734697645, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7664175544601686, + "eval_f1_macro": 0.8609828565716282, + "eval_loss": 0.2283545583486557, + "eval_pr_auc": 0.7820972392670548, + "eval_precision": 0.7559598494353826, + "eval_precision_macro": 0.8570151188924486, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651039147672901, + "eval_runtime": 0.2594, + "eval_samples_per_second": 628.253, + "eval_steps_per_second": 3.854, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7020 + }, + { + "epoch": 391.0, + "eval_accuracy": 0.9253609924750864, + "eval_auc": 0.9531671274258764, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7665394402035624, + "eval_f1_macro": 0.8610596014864338, + "eval_loss": 0.22832486033439636, + "eval_pr_auc": 0.7821584295330316, + "eval_precision": 0.7561970505177282, + "eval_precision_macro": 0.8571349914927091, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.865134095246556, + "eval_runtime": 0.2425, + "eval_samples_per_second": 672.118, + "eval_steps_per_second": 4.123, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7038 + }, + { + "epoch": 392.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.953186076601346, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7661469933184856, + "eval_f1_macro": 0.8608344648891975, + "eval_loss": 0.22829268872737885, + "eval_pr_auc": 0.7822636305739935, + "eval_precision": 0.756043956043956, + "eval_precision_macro": 0.8570003193433394, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864811618626111, + "eval_runtime": 0.241, + "eval_samples_per_second": 676.357, + "eval_steps_per_second": 4.149, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7056 + }, + { + "epoch": 393.0, + "eval_accuracy": 0.9252084604433598, + "eval_auc": 0.9532213958400613, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.765950676213206, + "eval_f1_macro": 0.860721851071415, + "eval_loss": 0.2282164841890335, + "eval_pr_auc": 0.782458419213003, + "eval_precision": 0.7559673366834171, + "eval_precision_macro": 0.8569329524960401, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646503803158885, + "eval_runtime": 0.21, + "eval_samples_per_second": 776.053, + "eval_steps_per_second": 4.761, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7074 + }, + { + "epoch": 394.0, + "eval_accuracy": 0.9251576164327843, + "eval_auc": 0.9532409095004704, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7657542966263526, + "eval_f1_macro": 0.8606092068875439, + "eval_loss": 0.22819304466247559, + "eval_pr_auc": 0.7825471573946872, + "eval_precision": 0.7558906691800189, + "eval_precision_macro": 0.8568655651025967, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.864489142005666, + "eval_runtime": 0.1952, + "eval_samples_per_second": 835.24, + "eval_steps_per_second": 5.124, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7092 + }, + { + "epoch": 395.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9532573574237078, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7659980897803247, + "eval_f1_macro": 0.8607627043564902, + "eval_loss": 0.2281719297170639, + "eval_pr_auc": 0.7826366824043385, + "eval_precision": 0.7563659226658284, + "eval_precision_macro": 0.8571057489837905, + "eval_pred_class_0": 16487, + "eval_pred_class_1": 3181, + "eval_predicted_binding_ratio": 0.1617347976408379, + "eval_recall": 0.7758787487907127, + "eval_recall_macro": 0.8645495029641981, + "eval_runtime": 0.2003, + "eval_samples_per_second": 813.649, + "eval_steps_per_second": 4.992, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7110 + }, + { + "epoch": 396.0, + "eval_accuracy": 0.9252084604433598, + "eval_auc": 0.9532628173556228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.765950676213206, + "eval_f1_macro": 0.860721851071415, + "eval_loss": 0.22817298769950867, + "eval_pr_auc": 0.7826246984855115, + "eval_precision": 0.7559673366834171, + "eval_precision_macro": 0.8569329524960401, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646503803158885, + "eval_runtime": 0.2265, + "eval_samples_per_second": 719.545, + "eval_steps_per_second": 4.414, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7128 + }, + { + "epoch": 397.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9532649585053934, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7662213740458015, + "eval_f1_macro": 0.8608703093903662, + "eval_loss": 0.22816696763038635, + "eval_pr_auc": 0.7826996340764835, + "eval_precision": 0.7558832758079699, + "eval_precision_macro": 0.8569477661729006, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7768461786520477, + "eval_recall_macro": 0.8649426764570676, + "eval_runtime": 0.2363, + "eval_samples_per_second": 689.822, + "eval_steps_per_second": 4.232, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7146 + }, + { + "epoch": 398.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9532878980054353, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7661469933184856, + "eval_f1_macro": 0.8608344648891975, + "eval_loss": 0.22812943160533905, + "eval_pr_auc": 0.7828570635819191, + "eval_precision": 0.756043956043956, + "eval_precision_macro": 0.8570003193433394, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864811618626111, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.707, + "eval_steps_per_second": 3.888, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7164 + }, + { + "epoch": 399.0, + "eval_accuracy": 0.9252593044539353, + "eval_auc": 0.9533086671582098, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7660725652450668, + "eval_f1_macro": 0.860798596552099, + "eval_loss": 0.2281065434217453, + "eval_pr_auc": 0.7829274286728394, + "eval_precision": 0.7562048382029531, + "eval_precision_macro": 0.8570529802176428, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8646805607951544, + "eval_runtime": 0.2483, + "eval_samples_per_second": 656.497, + "eval_steps_per_second": 4.028, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7182 + }, + { + "epoch": 400.0, + "eval_accuracy": 0.9253101484645109, + "eval_auc": 0.953327003186245, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7661944930765557, + "eval_f1_macro": 0.8608753604764983, + "eval_loss": 0.22807644307613373, + "eval_pr_auc": 0.7830440116061308, + "eval_precision": 0.7564424890006285, + "eval_precision_macro": 0.8571730824234005, + "eval_pred_class_0": 16486, + "eval_pred_class_1": 3182, + "eval_predicted_binding_ratio": 0.16178564165141346, + "eval_recall": 0.7762012254111577, + "eval_recall_macro": 0.8647107412744205, + "eval_runtime": 0.221, + "eval_samples_per_second": 737.532, + "eval_steps_per_second": 4.525, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7200 + }, + { + "epoch": 401.0, + "eval_accuracy": 0.9253101484645109, + "eval_auc": 0.9533311297658027, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7664175544601686, + "eval_f1_macro": 0.8609828565716282, + "eval_loss": 0.22808308899402618, + "eval_pr_auc": 0.7830491617637381, + "eval_precision": 0.7559598494353826, + "eval_precision_macro": 0.8570151188924486, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651039147672901, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.406, + "eval_steps_per_second": 3.794, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7218 + }, + { + "epoch": 402.0, + "eval_accuracy": 0.9253609924750864, + "eval_auc": 0.9533512371086481, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.766390833863781, + "eval_f1_macro": 0.8609879862166538, + "eval_loss": 0.2280474752187729, + "eval_pr_auc": 0.7831121530747555, + "eval_precision": 0.7565190072258875, + "eval_precision_macro": 0.857240395332689, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7765237020316027, + "eval_recall_macro": 0.864871979584643, + "eval_runtime": 0.2425, + "eval_samples_per_second": 672.294, + "eval_steps_per_second": 4.125, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7236 + }, + { + "epoch": 403.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9533569987480307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.766783328030544, + "eval_f1_macro": 0.86121314661739, + "eval_loss": 0.22804181277751923, + "eval_pr_auc": 0.7831234273165448, + "eval_precision": 0.7566718995290423, + "eval_precision_macro": 0.8573749596534976, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651944562050881, + "eval_runtime": 0.256, + "eval_samples_per_second": 636.798, + "eval_steps_per_second": 3.907, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7254 + }, + { + "epoch": 404.0, + "eval_accuracy": 0.9255135245068131, + "eval_auc": 0.9533826536152816, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7669053301511536, + "eval_f1_macro": 0.861289946852225, + "eval_loss": 0.2279965728521347, + "eval_pr_auc": 0.78327266335354, + "eval_precision": 0.7569095477386935, + "eval_precision_macro": 0.8574950553544232, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8652246366843541, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.619, + "eval_steps_per_second": 3.826, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7272 + }, + { + "epoch": 405.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9533846682425657, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.766783328030544, + "eval_f1_macro": 0.86121314661739, + "eval_loss": 0.22799374163150787, + "eval_pr_auc": 0.7832776890714148, + "eval_precision": 0.7566718995290423, + "eval_precision_macro": 0.8573749596534976, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651944562050881, + "eval_runtime": 0.2631, + "eval_samples_per_second": 619.577, + "eval_steps_per_second": 3.801, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7290 + }, + { + "epoch": 406.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9533931647141554, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.766783328030544, + "eval_f1_macro": 0.86121314661739, + "eval_loss": 0.2279902696609497, + "eval_pr_auc": 0.7833151045367318, + "eval_precision": 0.7566718995290423, + "eval_precision_macro": 0.8573749596534976, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8651944562050881, + "eval_runtime": 0.2498, + "eval_samples_per_second": 652.534, + "eval_steps_per_second": 4.003, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7308 + }, + { + "epoch": 407.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9533809990904591, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7673235855054037, + "eval_f1_macro": 0.8615095109466251, + "eval_loss": 0.22801372408866882, + "eval_pr_auc": 0.7832695956978404, + "eval_precision": 0.7565026637417738, + "eval_precision_macro": 0.8574040902613707, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8657790484874461, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.354, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7326 + }, + { + "epoch": 408.0, + "eval_accuracy": 0.9254118364856619, + "eval_auc": 0.9533805708605049, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671058898237816, + "eval_f1_macro": 0.8613505657612415, + "eval_loss": 0.2280135303735733, + "eval_pr_auc": 0.7832983031509244, + "eval_precision": 0.7554721701063164, + "eval_precision_macro": 0.8569406995036744, + "eval_pred_class_0": 16470, + "eval_pred_class_1": 3198, + "eval_predicted_binding_ratio": 0.16259914582062232, + "eval_recall": 0.7791035149951628, + "eval_recall_macro": 0.865950622711561, + "eval_runtime": 0.2668, + "eval_samples_per_second": 610.9, + "eval_steps_per_second": 3.748, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7344 + }, + { + "epoch": 409.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9533953253289238, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7672276913305811, + "eval_f1_macro": 0.8614272726281818, + "eval_loss": 0.22799338400363922, + "eval_pr_auc": 0.7833637706556547, + "eval_precision": 0.7557084766969033, + "eval_precision_macro": 0.857060115344384, + "eval_pred_class_0": 16471, + "eval_pred_class_1": 3197, + "eval_predicted_binding_ratio": 0.16254830181004679, + "eval_recall": 0.7791035149951628, + "eval_recall_macro": 0.865980803190827, + "eval_runtime": 0.2998, + "eval_samples_per_second": 543.699, + "eval_steps_per_second": 3.336, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7362 + }, + { + "epoch": 410.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9534269948805303, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7672496025437202, + "eval_f1_macro": 0.8614738601594714, + "eval_loss": 0.22793784737586975, + "eval_pr_auc": 0.783492627588611, + "eval_precision": 0.7566635308874256, + "eval_precision_macro": 0.8574567123458305, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8656479906564896, + "eval_runtime": 0.2396, + "eval_samples_per_second": 680.309, + "eval_steps_per_second": 4.174, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7380 + }, + { + "epoch": 411.0, + "eval_accuracy": 0.9255135245068131, + "eval_auc": 0.9534421775789035, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7670535856256957, + "eval_f1_macro": 0.8613613920200376, + "eval_loss": 0.22790838778018951, + "eval_pr_auc": 0.7835790311478947, + "eval_precision": 0.7565872020075283, + "eval_precision_macro": 0.8573894747901719, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.865486752346267, + "eval_runtime": 0.2625, + "eval_samples_per_second": 620.852, + "eval_steps_per_second": 3.809, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7398 + }, + { + "epoch": 412.0, + "eval_accuracy": 0.9255135245068131, + "eval_auc": 0.9534503918080233, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7670535856256957, + "eval_f1_macro": 0.8613613920200376, + "eval_loss": 0.22789432108402252, + "eval_pr_auc": 0.7836065049249683, + "eval_precision": 0.7565872020075283, + "eval_precision_macro": 0.8573894747901719, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.865486752346267, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.102, + "eval_steps_per_second": 3.829, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7416 + }, + { + "epoch": 413.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9534621486667634, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7669316375198728, + "eval_f1_macro": 0.8612846167990333, + "eval_loss": 0.2278737723827362, + "eval_pr_auc": 0.7836530325514856, + "eval_precision": 0.7563499529633114, + "eval_precision_macro": 0.857269581736829, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8654565718670011, + "eval_runtime": 0.2015, + "eval_samples_per_second": 809.132, + "eval_steps_per_second": 4.964, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7434 + }, + { + "epoch": 414.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9534808739947569, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7668575063613231, + "eval_f1_macro": 0.8612488935825013, + "eval_loss": 0.22784681618213654, + "eval_pr_auc": 0.7837271248134856, + "eval_precision": 0.7565108252274867, + "eval_precision_macro": 0.8573222168125176, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653255140360445, + "eval_runtime": 0.2677, + "eval_samples_per_second": 608.938, + "eval_steps_per_second": 3.736, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7452 + }, + { + "epoch": 415.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535030056973854, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.2278076857328415, + "eval_pr_auc": 0.783829043372685, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2388, + "eval_samples_per_second": 682.596, + "eval_steps_per_second": 4.188, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7470 + }, + { + "epoch": 416.0, + "eval_accuracy": 0.9255135245068131, + "eval_auc": 0.9535103634665969, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7669053301511536, + "eval_f1_macro": 0.861289946852225, + "eval_loss": 0.22778868675231934, + "eval_pr_auc": 0.7838714010488458, + "eval_precision": 0.7569095477386935, + "eval_precision_macro": 0.8574950553544232, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7771686552724928, + "eval_recall_macro": 0.8652246366843541, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.742, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7488 + }, + { + "epoch": 416.6666666666667, + "grad_norm": 16683.39453125, + "learning_rate": 8.236268949930852e-08, + "loss": 0.186, + "step": 7500 + }, + { + "epoch": 417.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535206409854957, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22777557373046875, + "eval_pr_auc": 0.7839160806088992, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.595, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7506 + }, + { + "epoch": 418.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.953516261360965, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22778432071208954, + "eval_pr_auc": 0.7838825807566469, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2552, + "eval_samples_per_second": 638.836, + "eval_steps_per_second": 3.919, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7524 + }, + { + "epoch": 419.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.953526694599847, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22777114808559418, + "eval_pr_auc": 0.783922204343384, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2628, + "eval_samples_per_second": 620.269, + "eval_steps_per_second": 3.805, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7542 + }, + { + "epoch": 420.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535399113334307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22775039076805115, + "eval_pr_auc": 0.7839806097795337, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2677, + "eval_samples_per_second": 608.883, + "eval_steps_per_second": 3.735, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7560 + }, + { + "epoch": 421.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535487289774859, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7671014953865734, + "eval_f1_macro": 0.8614024874814863, + "eval_loss": 0.22773513197898865, + "eval_pr_auc": 0.7840321361391863, + "eval_precision": 0.7569858712715856, + "eval_precision_macro": 0.8575622798085769, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7774911318929377, + "eval_recall_macro": 0.8653858749945765, + "eval_runtime": 0.2732, + "eval_samples_per_second": 596.635, + "eval_steps_per_second": 3.66, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7578 + }, + { + "epoch": 422.0, + "eval_accuracy": 0.9254118364856619, + "eval_auc": 0.9535455951128217, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7668097281831188, + "eval_f1_macro": 0.8612078600062212, + "eval_loss": 0.22774243354797363, + "eval_pr_auc": 0.7840612126983214, + "eval_precision": 0.7561128526645768, + "eval_precision_macro": 0.8571497629022605, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.865426391387735, + "eval_runtime": 0.1892, + "eval_samples_per_second": 861.347, + "eval_steps_per_second": 5.284, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7596 + }, + { + "epoch": 423.0, + "eval_accuracy": 0.9254118364856619, + "eval_auc": 0.9535476389376027, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7668097281831188, + "eval_f1_macro": 0.8612078600062212, + "eval_loss": 0.22774267196655273, + "eval_pr_auc": 0.7840628987467491, + "eval_precision": 0.7561128526645768, + "eval_precision_macro": 0.8571497629022605, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.865426391387735, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.36, + "eval_steps_per_second": 3.935, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7614 + }, + { + "epoch": 424.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9535528458245448, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7670057215511761, + "eval_f1_macro": 0.8613203162894483, + "eval_loss": 0.22773417830467224, + "eval_pr_auc": 0.7840834648119666, + "eval_precision": 0.756189282356628, + "eval_precision_macro": 0.8572170542389439, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8655876296979576, + "eval_runtime": 0.2571, + "eval_samples_per_second": 633.927, + "eval_steps_per_second": 3.889, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7632 + }, + { + "epoch": 425.0, + "eval_accuracy": 0.9254626804962376, + "eval_auc": 0.9535637559558758, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7669316375198728, + "eval_f1_macro": 0.8612846167990333, + "eval_loss": 0.22771182656288147, + "eval_pr_auc": 0.7841366739811415, + "eval_precision": 0.7563499529633114, + "eval_precision_macro": 0.857269581736829, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8654565718670011, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.859, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7650 + }, + { + "epoch": 426.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535682523703939, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7673235855054037, + "eval_f1_macro": 0.8615095109466251, + "eval_loss": 0.22770953178405762, + "eval_pr_auc": 0.7841596739317596, + "eval_precision": 0.7565026637417738, + "eval_precision_macro": 0.8574040902613707, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8657790484874461, + "eval_runtime": 0.2634, + "eval_samples_per_second": 618.83, + "eval_steps_per_second": 3.797, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7668 + }, + { + "epoch": 427.0, + "eval_accuracy": 0.9255643685173887, + "eval_auc": 0.9535677852104438, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674714104193139, + "eval_f1_macro": 0.8615807415292696, + "eval_loss": 0.22770845890045166, + "eval_pr_auc": 0.7841620948509175, + "eval_precision": 0.7561815336463223, + "eval_precision_macro": 0.8572991684500658, + "eval_pred_class_0": 16473, + "eval_pred_class_1": 3195, + "eval_predicted_binding_ratio": 0.16244661378889566, + "eval_recall": 0.7791035149951628, + "eval_recall_macro": 0.866041164149359, + "eval_runtime": 0.2571, + "eval_samples_per_second": 633.948, + "eval_steps_per_second": 3.889, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7686 + }, + { + "epoch": 428.0, + "eval_accuracy": 0.9256152125279642, + "eval_auc": 0.9535825980738566, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767445557145128, + "eval_f1_macro": 0.8615862980157476, + "eval_loss": 0.22767424583435059, + "eval_pr_auc": 0.7842346625186999, + "eval_precision": 0.7567398119122257, + "eval_precision_macro": 0.8575239295026598, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658092289667121, + "eval_runtime": 0.254, + "eval_samples_per_second": 641.669, + "eval_steps_per_second": 3.937, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7704 + }, + { + "epoch": 429.0, + "eval_accuracy": 0.9256152125279642, + "eval_auc": 0.9535961651774029, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767445557145128, + "eval_f1_macro": 0.8615862980157476, + "eval_loss": 0.22765418887138367, + "eval_pr_auc": 0.7843006481928805, + "eval_precision": 0.7567398119122257, + "eval_precision_macro": 0.8575239295026598, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658092289667121, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.28, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7722 + }, + { + "epoch": 430.0, + "eval_accuracy": 0.9256660565385397, + "eval_auc": 0.9536132165155758, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674936386768448, + "eval_f1_macro": 0.8616274777746364, + "eval_loss": 0.22762420773506165, + "eval_pr_auc": 0.7844000578756268, + "eval_precision": 0.7571383746470034, + "eval_precision_macro": 0.8576966674521347, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657083516150216, + "eval_runtime": 0.2691, + "eval_samples_per_second": 605.732, + "eval_steps_per_second": 3.716, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7740 + }, + { + "epoch": 431.0, + "eval_accuracy": 0.9256660565385397, + "eval_auc": 0.9536164866352254, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674936386768448, + "eval_f1_macro": 0.8616274777746364, + "eval_loss": 0.22761479020118713, + "eval_pr_auc": 0.7844231309752159, + "eval_precision": 0.7571383746470034, + "eval_precision_macro": 0.8576966674521347, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657083516150216, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.783, + "eval_steps_per_second": 3.888, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7758 + }, + { + "epoch": 432.0, + "eval_accuracy": 0.9256660565385397, + "eval_auc": 0.9536227348995558, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674936386768448, + "eval_f1_macro": 0.8616274777746364, + "eval_loss": 0.22760987281799316, + "eval_pr_auc": 0.7844561314285999, + "eval_precision": 0.7571383746470034, + "eval_precision_macro": 0.8576966674521347, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657083516150216, + "eval_runtime": 0.2144, + "eval_samples_per_second": 760.142, + "eval_steps_per_second": 4.663, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7776 + }, + { + "epoch": 433.0, + "eval_accuracy": 0.9256660565385397, + "eval_auc": 0.9536278736590051, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7674936386768448, + "eval_f1_macro": 0.8616274777746364, + "eval_loss": 0.22760248184204102, + "eval_pr_auc": 0.7844848515258783, + "eval_precision": 0.7571383746470034, + "eval_precision_macro": 0.8576966674521347, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657083516150216, + "eval_runtime": 0.2745, + "eval_samples_per_second": 593.759, + "eval_steps_per_second": 3.643, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7794 + }, + { + "epoch": 434.0, + "eval_accuracy": 0.9257169005491153, + "eval_auc": 0.9536346669432771, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676896167912227, + "eval_f1_macro": 0.861739927468447, + "eval_loss": 0.22759221494197845, + "eval_pr_auc": 0.7845111553142384, + "eval_precision": 0.7572145545796738, + "eval_precision_macro": 0.8577638306878952, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658695899252441, + "eval_runtime": 0.216, + "eval_samples_per_second": 754.683, + "eval_steps_per_second": 4.63, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7812 + }, + { + "epoch": 435.0, + "eval_accuracy": 0.9257169005491153, + "eval_auc": 0.9536369248830353, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676896167912227, + "eval_f1_macro": 0.861739927468447, + "eval_loss": 0.2275882065296173, + "eval_pr_auc": 0.7845220230482824, + "eval_precision": 0.7572145545796738, + "eval_precision_macro": 0.8577638306878952, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658695899252441, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.083, + "eval_steps_per_second": 3.816, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7830 + }, + { + "epoch": 436.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.9536390660328059, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7678855325914149, + "eval_f1_macro": 0.8618523468803471, + "eval_loss": 0.2275806963443756, + "eval_pr_auc": 0.7845372505398349, + "eval_precision": 0.7572906867356538, + "eval_precision_macro": 0.8578309735638339, + "eval_pred_class_0": 16479, + "eval_pred_class_1": 3189, + "eval_predicted_binding_ratio": 0.16214154972544234, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660308282354665, + "eval_runtime": 0.2506, + "eval_samples_per_second": 650.493, + "eval_steps_per_second": 3.991, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7848 + }, + { + "epoch": 437.0, + "eval_accuracy": 0.9257169005491153, + "eval_auc": 0.9536480004668485, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676896167912227, + "eval_f1_macro": 0.861739927468447, + "eval_loss": 0.2275666743516922, + "eval_pr_auc": 0.7845807540266186, + "eval_precision": 0.7572145545796738, + "eval_precision_macro": 0.8577638306878952, + "eval_pred_class_0": 16480, + "eval_pred_class_1": 3188, + "eval_predicted_binding_ratio": 0.16209070571486678, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8658695899252441, + "eval_runtime": 0.1853, + "eval_samples_per_second": 879.496, + "eval_steps_per_second": 5.396, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7866 + }, + { + "epoch": 438.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9536552025160767, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.22755169868469238, + "eval_pr_auc": 0.7846223392518402, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2108, + "eval_samples_per_second": 773.198, + "eval_steps_per_second": 4.744, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7884 + }, + { + "epoch": 439.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.95366627809989, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676639083386378, + "eval_f1_macro": 0.8617455448748739, + "eval_loss": 0.2275334894657135, + "eval_pr_auc": 0.784667391259121, + "eval_precision": 0.7577756833176249, + "eval_precision_macro": 0.8579900557928737, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8656376547425971, + "eval_runtime": 0.259, + "eval_samples_per_second": 629.311, + "eval_steps_per_second": 3.861, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7902 + }, + { + "epoch": 440.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9536672708147835, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.22753211855888367, + "eval_pr_auc": 0.7846795397266301, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2424, + "eval_samples_per_second": 672.466, + "eval_steps_per_second": 4.126, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7920 + }, + { + "epoch": 441.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9536694119645541, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.22753164172172546, + "eval_pr_auc": 0.7846730527925044, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2647, + "eval_samples_per_second": 615.862, + "eval_steps_per_second": 3.778, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7938 + }, + { + "epoch": 442.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.9536803123633861, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7677378300986318, + "eval_f1_macro": 0.8617811692096791, + "eval_loss": 0.22751472890377045, + "eval_pr_auc": 0.7847227034562287, + "eval_precision": 0.7576138147566719, + "eval_precision_macro": 0.8579369201187351, + "eval_pred_class_0": 16483, + "eval_pred_class_1": 3185, + "eval_predicted_binding_ratio": 0.16193817368314012, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657687125735536, + "eval_runtime": 0.2667, + "eval_samples_per_second": 611.172, + "eval_steps_per_second": 3.75, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7956 + }, + { + "epoch": 443.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.9536880691650549, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676639083386378, + "eval_f1_macro": 0.8617455448748739, + "eval_loss": 0.22749866545200348, + "eval_pr_auc": 0.7847641543874231, + "eval_precision": 0.7577756833176249, + "eval_precision_macro": 0.8579900557928737, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8656376547425971, + "eval_runtime": 0.2405, + "eval_samples_per_second": 677.868, + "eval_steps_per_second": 4.159, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7974 + }, + { + "epoch": 444.0, + "eval_accuracy": 0.9257677445596909, + "eval_auc": 0.9536937626769448, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7676639083386378, + "eval_f1_macro": 0.8617455448748739, + "eval_loss": 0.2274913638830185, + "eval_pr_auc": 0.7847876605630844, + "eval_precision": 0.7577756833176249, + "eval_precision_macro": 0.8579900557928737, + "eval_pred_class_0": 16485, + "eval_pred_class_1": 3183, + "eval_predicted_binding_ratio": 0.16183648566198902, + "eval_recall": 0.7778136085133828, + "eval_recall_macro": 0.8656376547425971, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.896, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7992 + }, + { + "epoch": 444.44444444444446, + "grad_norm": 19008.333984375, + "learning_rate": 3.72113927636733e-08, + "loss": 0.1854, + "step": 8000 + }, + { + "epoch": 445.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.953696935471605, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767859984089101, + "eval_f1_macro": 0.861858042633035, + "eval_loss": 0.22748790681362152, + "eval_pr_auc": 0.7848011703648987, + "eval_precision": 0.7578517587939698, + "eval_precision_macro": 0.8580571582128063, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657988930528197, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.861, + "eval_steps_per_second": 3.772, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8010 + }, + { + "epoch": 446.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9536995243163275, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767859984089101, + "eval_f1_macro": 0.861858042633035, + "eval_loss": 0.22748683393001556, + "eval_pr_auc": 0.7848132010793348, + "eval_precision": 0.7578517587939698, + "eval_precision_macro": 0.8580571582128063, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657988930528197, + "eval_runtime": 0.2471, + "eval_samples_per_second": 659.648, + "eval_steps_per_second": 4.047, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8028 + }, + { + "epoch": 447.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9537016265361022, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.767859984089101, + "eval_f1_macro": 0.861858042633035, + "eval_loss": 0.22748340666294098, + "eval_pr_auc": 0.7848243277439235, + "eval_precision": 0.7578517587939698, + "eval_precision_macro": 0.8580571582128063, + "eval_pred_class_0": 16484, + "eval_pred_class_1": 3184, + "eval_predicted_binding_ratio": 0.16188732967256458, + "eval_recall": 0.7781360851338278, + "eval_recall_macro": 0.8657988930528197, + "eval_runtime": 0.2047, + "eval_samples_per_second": 796.254, + "eval_steps_per_second": 4.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8046 + }, + { + "epoch": 448.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9537030474809498, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.2274865061044693, + "eval_pr_auc": 0.7848313481583303, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.69, + "eval_steps_per_second": 3.826, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8064 + }, + { + "epoch": 449.0, + "eval_accuracy": 0.9258185885702664, + "eval_auc": 0.9537053443507039, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7679338317162399, + "eval_f1_macro": 0.8618936307360285, + "eval_loss": 0.22748111188411713, + "eval_pr_auc": 0.7848426067264029, + "eval_precision": 0.7576898932831136, + "eval_precision_macro": 0.8580040292771594, + "eval_pred_class_0": 16482, + "eval_pred_class_1": 3186, + "eval_predicted_binding_ratio": 0.16198901769371568, + "eval_recall": 0.7784585617542729, + "eval_recall_macro": 0.8659299508837761, + "eval_runtime": 0.2031, + "eval_samples_per_second": 802.733, + "eval_steps_per_second": 4.925, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8082 + }, + { + "epoch": 450.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537081765079003, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22747540473937988, + "eval_pr_auc": 0.7848573104950377, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2266, + "eval_samples_per_second": 719.273, + "eval_steps_per_second": 4.413, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8100 + }, + { + "epoch": 451.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537093638727732, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22747638821601868, + "eval_pr_auc": 0.7848599055730325, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.395, + "eval_steps_per_second": 3.966, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8118 + }, + { + "epoch": 452.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.953713694834809, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22746768593788147, + "eval_pr_auc": 0.7848753398216984, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2493, + "eval_samples_per_second": 653.726, + "eval_steps_per_second": 4.011, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8136 + }, + { + "epoch": 453.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537144345047297, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22746726870536804, + "eval_pr_auc": 0.7848749928109487, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2607, + "eval_samples_per_second": 625.248, + "eval_steps_per_second": 3.836, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8154 + }, + { + "epoch": 454.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537162252845378, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7681297709923665, + "eval_f1_macro": 0.8620060619667715, + "eval_loss": 0.22746579349040985, + "eval_pr_auc": 0.7848822771158814, + "eval_precision": 0.7577659240665202, + "eval_precision_macro": 0.8580711180917517, + "eval_pred_class_0": 16481, + "eval_pred_class_1": 3187, + "eval_predicted_binding_ratio": 0.16203986170429124, + "eval_recall": 0.7787810383747178, + "eval_recall_macro": 0.8660911891939986, + "eval_runtime": 0.2463, + "eval_samples_per_second": 661.74, + "eval_steps_per_second": 4.06, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8172 + }, + { + "epoch": 455.0, + "eval_accuracy": 0.9260219646125687, + "eval_auc": 0.9537124685399404, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7687172150691464, + "eval_f1_macro": 0.8623431740348002, + "eval_loss": 0.22747564315795898, + "eval_pr_auc": 0.7848568499525364, + "eval_precision": 0.7579937304075235, + "eval_precision_macro": 0.8582722627034582, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665749041246662, + "eval_runtime": 0.2569, + "eval_samples_per_second": 634.446, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8190 + }, + { + "epoch": 456.0, + "eval_accuracy": 0.9260219646125687, + "eval_auc": 0.9537148724671828, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7687172150691464, + "eval_f1_macro": 0.8623431740348002, + "eval_loss": 0.22747227549552917, + "eval_pr_auc": 0.7848699531009984, + "eval_precision": 0.7579937304075235, + "eval_precision_macro": 0.8582722627034582, + "eval_pred_class_0": 16478, + "eval_pred_class_1": 3190, + "eval_predicted_binding_ratio": 0.1621923937360179, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665749041246662, + "eval_runtime": 0.2171, + "eval_samples_per_second": 750.821, + "eval_steps_per_second": 4.606, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8208 + }, + { + "epoch": 457.0, + "eval_accuracy": 0.925971120601993, + "eval_auc": 0.9537148043396901, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.768595041322314, + "eval_f1_macro": 0.8622662895753321, + "eval_loss": 0.22747254371643066, + "eval_pr_auc": 0.784871702941944, + "eval_precision": 0.7577561892823567, + "eval_precision_macro": 0.858152234351077, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665447236454001, + "eval_runtime": 0.2395, + "eval_samples_per_second": 680.611, + "eval_steps_per_second": 4.176, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8226 + }, + { + "epoch": 458.0, + "eval_accuracy": 0.925971120601993, + "eval_auc": 0.9537153006971368, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.768595041322314, + "eval_f1_macro": 0.8622662895753321, + "eval_loss": 0.22747036814689636, + "eval_pr_auc": 0.7848700532008381, + "eval_precision": 0.7577561892823567, + "eval_precision_macro": 0.858152234351077, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665447236454001, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.347, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8244 + }, + { + "epoch": 459.0, + "eval_accuracy": 0.925971120601993, + "eval_auc": 0.9537154856146172, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.768595041322314, + "eval_f1_macro": 0.8622662895753321, + "eval_loss": 0.22747208178043365, + "eval_pr_auc": 0.7848674676035483, + "eval_precision": 0.7577561892823567, + "eval_precision_macro": 0.858152234351077, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665447236454001, + "eval_runtime": 0.2162, + "eval_samples_per_second": 753.78, + "eval_steps_per_second": 4.624, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8262 + }, + { + "epoch": 460.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537164783295108, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22747138142585754, + "eval_pr_auc": 0.7848698987106607, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.29, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8280 + }, + { + "epoch": 461.0, + "eval_accuracy": 0.925971120601993, + "eval_auc": 0.9537188335942584, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.768595041322314, + "eval_f1_macro": 0.8622662895753321, + "eval_loss": 0.2274673730134964, + "eval_pr_auc": 0.7848777447449568, + "eval_precision": 0.7577561892823567, + "eval_precision_macro": 0.858152234351077, + "eval_pred_class_0": 16477, + "eval_pred_class_1": 3191, + "eval_predicted_binding_ratio": 0.16224323774659344, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665447236454001, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.971, + "eval_steps_per_second": 3.896, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8298 + }, + { + "epoch": 462.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537204881190811, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22746412456035614, + "eval_pr_auc": 0.7848871874390749, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2183, + "eval_samples_per_second": 746.701, + "eval_steps_per_second": 4.581, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8316 + }, + { + "epoch": 463.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537223956888766, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274623066186905, + "eval_pr_auc": 0.784896188141376, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2673, + "eval_samples_per_second": 609.736, + "eval_steps_per_second": 3.741, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8334 + }, + { + "epoch": 464.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537243227236701, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274596393108368, + "eval_pr_auc": 0.7849067717237345, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2647, + "eval_samples_per_second": 615.711, + "eval_steps_per_second": 3.777, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8352 + }, + { + "epoch": 465.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537252959735659, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22745810449123383, + "eval_pr_auc": 0.7849091375774293, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2588, + "eval_samples_per_second": 629.717, + "eval_steps_per_second": 3.863, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8370 + }, + { + "epoch": 466.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537274565883342, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274550497531891, + "eval_pr_auc": 0.7849133909574995, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2576, + "eval_samples_per_second": 632.839, + "eval_steps_per_second": 3.882, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8388 + }, + { + "epoch": 467.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537294225531237, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274521142244339, + "eval_pr_auc": 0.7849233363238722, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2621, + "eval_samples_per_second": 621.858, + "eval_steps_per_second": 3.815, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8406 + }, + { + "epoch": 468.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537300065030612, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.227451354265213, + "eval_pr_auc": 0.7849296985834374, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.266, + "eval_samples_per_second": 612.691, + "eval_steps_per_second": 3.759, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8424 + }, + { + "epoch": 469.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.953731164670437, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744858264923096, + "eval_pr_auc": 0.7849332304496031, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.1831, + "eval_samples_per_second": 890.399, + "eval_steps_per_second": 5.463, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8442 + }, + { + "epoch": 470.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537301622230444, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22745059430599213, + "eval_pr_auc": 0.7849302221742441, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.282, + "eval_samples_per_second": 577.963, + "eval_steps_per_second": 3.546, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8460 + }, + { + "epoch": 471.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.95373253695279, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274470180273056, + "eval_pr_auc": 0.7849404295185345, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2547, + "eval_samples_per_second": 639.882, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8478 + }, + { + "epoch": 472.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537337243176629, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744475305080414, + "eval_pr_auc": 0.7849440080112278, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.737, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8496 + }, + { + "epoch": 472.22222222222223, + "grad_norm": 16415.080078125, + "learning_rate": 9.409753403698373e-09, + "loss": 0.185, + "step": 8500 + }, + { + "epoch": 473.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537344250575877, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744259238243103, + "eval_pr_auc": 0.7849494179086262, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2539, + "eval_samples_per_second": 642.078, + "eval_steps_per_second": 3.939, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8514 + }, + { + "epoch": 474.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537351647275085, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744259238243103, + "eval_pr_auc": 0.7849503915002546, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.1794, + "eval_samples_per_second": 908.686, + "eval_steps_per_second": 5.575, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8532 + }, + { + "epoch": 475.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537357876074417, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22744180262088776, + "eval_pr_auc": 0.7849530625634429, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.1819, + "eval_samples_per_second": 895.935, + "eval_steps_per_second": 5.497, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8550 + }, + { + "epoch": 476.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537367997873333, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274399697780609, + "eval_pr_auc": 0.7849568926584508, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2575, + "eval_samples_per_second": 633.091, + "eval_steps_per_second": 3.884, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8568 + }, + { + "epoch": 477.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537370139023102, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.2274392694234848, + "eval_pr_auc": 0.784958754067368, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2669, + "eval_samples_per_second": 610.802, + "eval_steps_per_second": 3.747, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8586 + }, + { + "epoch": 478.0, + "eval_accuracy": 0.9259202765914175, + "eval_auc": 0.9537376562472413, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7684729064039408, + "eval_f1_macro": 0.86218942357391, + "eval_loss": 0.22743819653987885, + "eval_pr_auc": 0.7849582401594454, + "eval_precision": 0.7575187969924813, + "eval_precision_macro": 0.85803228026366, + "eval_pred_class_0": 16476, + "eval_pred_class_1": 3192, + "eval_predicted_binding_ratio": 0.162294081757169, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8665145431661341, + "eval_runtime": 0.2266, + "eval_samples_per_second": 719.444, + "eval_steps_per_second": 4.414, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8604 + }, + { + "epoch": 479.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537377925022268, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743773460388184, + "eval_pr_auc": 0.7849574280187238, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2575, + "eval_samples_per_second": 632.921, + "eval_steps_per_second": 3.883, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8622 + }, + { + "epoch": 480.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537384932421518, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743669152259827, + "eval_pr_auc": 0.7849621773766102, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2648, + "eval_samples_per_second": 615.492, + "eval_steps_per_second": 3.776, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8640 + }, + { + "epoch": 481.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537382012671831, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743773460388184, + "eval_pr_auc": 0.7849637957606732, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2656, + "eval_samples_per_second": 613.647, + "eval_steps_per_second": 3.765, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8658 + }, + { + "epoch": 482.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537380066172038, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.227437824010849, + "eval_pr_auc": 0.7849623241007161, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.1901, + "eval_samples_per_second": 857.378, + "eval_steps_per_second": 5.26, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8676 + }, + { + "epoch": 483.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537382791271747, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743763029575348, + "eval_pr_auc": 0.7849636397572854, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2674, + "eval_samples_per_second": 609.618, + "eval_steps_per_second": 3.74, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8694 + }, + { + "epoch": 484.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537387365546257, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743697464466095, + "eval_pr_auc": 0.7849651242159179, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.398, + "eval_steps_per_second": 3.794, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8712 + }, + { + "epoch": 485.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537391745170787, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.2274360954761505, + "eval_pr_auc": 0.7849672499360805, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.1892, + "eval_samples_per_second": 861.673, + "eval_steps_per_second": 5.286, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8730 + }, + { + "epoch": 486.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.953739330237062, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743603587150574, + "eval_pr_auc": 0.784968007514094, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2358, + "eval_samples_per_second": 691.342, + "eval_steps_per_second": 4.241, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8748 + }, + { + "epoch": 487.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.95373934970206, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743603587150574, + "eval_pr_auc": 0.7849684807581643, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2101, + "eval_samples_per_second": 775.889, + "eval_steps_per_second": 4.76, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8766 + }, + { + "epoch": 488.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537393886320558, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.2274361550807953, + "eval_pr_auc": 0.7849697577570401, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.25, + "eval_samples_per_second": 652.0, + "eval_steps_per_second": 4.0, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8784 + }, + { + "epoch": 489.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537394859570454, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743597626686096, + "eval_pr_auc": 0.784969480914725, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.1914, + "eval_samples_per_second": 851.633, + "eval_steps_per_second": 5.225, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8802 + }, + { + "epoch": 490.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537395540845381, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.2274358570575714, + "eval_pr_auc": 0.784970228255774, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2687, + "eval_samples_per_second": 606.685, + "eval_steps_per_second": 3.722, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8820 + }, + { + "epoch": 491.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537395443520391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743581235408783, + "eval_pr_auc": 0.7849691356929127, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.272, + "eval_samples_per_second": 599.26, + "eval_steps_per_second": 3.676, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8838 + }, + { + "epoch": 492.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537396222120308, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743573784828186, + "eval_pr_auc": 0.7849709563979171, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2531, + "eval_samples_per_second": 644.069, + "eval_steps_per_second": 3.951, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8856 + }, + { + "epoch": 493.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.953739583282035, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743569314479828, + "eval_pr_auc": 0.7849695249246844, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.242, + "eval_samples_per_second": 673.593, + "eval_steps_per_second": 4.132, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8874 + }, + { + "epoch": 494.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537395443520391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743569314479828, + "eval_pr_auc": 0.7849692821960631, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2668, + "eval_samples_per_second": 610.934, + "eval_steps_per_second": 3.748, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8892 + }, + { + "epoch": 495.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537395248870412, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743569314479828, + "eval_pr_auc": 0.7849694151476083, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2644, + "eval_samples_per_second": 616.534, + "eval_steps_per_second": 3.782, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8910 + }, + { + "epoch": 496.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537396027470327, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743569314479828, + "eval_pr_auc": 0.7849695246002066, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2501, + "eval_samples_per_second": 651.692, + "eval_steps_per_second": 3.998, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8928 + }, + { + "epoch": 497.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.953739563817037, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743569314479828, + "eval_pr_auc": 0.7849694727996652, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2737, + "eval_samples_per_second": 595.465, + "eval_steps_per_second": 3.653, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8946 + }, + { + "epoch": 498.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537395832820348, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743569314479828, + "eval_pr_auc": 0.7849694859174267, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2635, + "eval_samples_per_second": 618.524, + "eval_steps_per_second": 3.795, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8964 + }, + { + "epoch": 499.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537396027470328, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.2274356633424759, + "eval_pr_auc": 0.784969582259249, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2533, + "eval_samples_per_second": 643.57, + "eval_steps_per_second": 3.948, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8982 + }, + { + "epoch": 500.0, + "grad_norm": 39285.96484375, + "learning_rate": 3.760708838784765e-14, + "loss": 0.1855, + "step": 9000 + }, + { + "epoch": 500.0, + "eval_accuracy": 0.925869432580842, + "eval_auc": 0.9537395832820349, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7683508102955195, + "eval_f1_macro": 0.8621125760211936, + "eval_loss": 0.22743569314479828, + "eval_pr_auc": 0.7849695246002065, + "eval_precision": 0.7572815533980582, + "eval_precision_macro": 0.8579124003712597, + "eval_pred_class_0": 16475, + "eval_pred_class_1": 3193, + "eval_predicted_binding_ratio": 0.16234492576774456, + "eval_recall": 0.7797484682360529, + "eval_recall_macro": 0.8664843626868681, + "eval_runtime": 0.2662, + "eval_samples_per_second": 612.248, + "eval_steps_per_second": 3.756, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9000 + } + ], + "logging_steps": 500, + "max_steps": 9000, + "num_input_tokens_seen": 0, + "num_train_epochs": 500, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 6765506177901744.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/training_args.bin b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8903358100d3be09ad49078090c6e572b3ddef68 --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:499ba8a39afec206dd7194e2d216bf0be2633330bfcda3d90a12ddcbc04cdaca +size 5368 diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/logs/events.out.tfevents.1772419140.amax.1813661.0 b/finetune_glome_nano_site_dice_0_10_1_9_tiny/logs/events.out.tfevents.1772419140.amax.1813661.0 new file mode 100644 index 0000000000000000000000000000000000000000..49f9d9e748c3e16965c5268987de8652f871cd9f --- /dev/null +++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/logs/events.out.tfevents.1772419140.amax.1813661.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31b4f0499f3c83af64510e723eab0e1b174cbd078260871728cbc3136654212c +size 582065 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/config.json b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a424582b16776120730808f810ad9081375d580e --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/config.json @@ -0,0 +1,52 @@ +{ + "architectures": [ + "GloMeModelForTokenClassification" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "bos_token_id": 28, + "cdr_weight": 0.0, + "class_weights": [ + 0.1, + 0.9 + ], + "classifier_activation": "gelu", + "classifier_bias": false, + "classifier_dropout": 0.1, + "classifier_pooling": "cls", + "cls_token_id": 28, + "compress_block_size": 16, + "compress_block_sliding_stride": 16, + "decoder_bias": true, + "dice_weight": 0.1, + "embedding_dropout": 0.1, + "eos_token_id": 29, + "hidden_activation": "gelu", + "hidden_size": 320, + "inner_rank": 32, + "intermediate_size": 1280, + "kv_heads": 10, + "mask_token_id": 31, + "mlp_bias": false, + "mlp_dropout": 0.1, + "model_size": "tiny", + "model_type": "glome", + "norm_bias": false, + "norm_eps": 1e-05, + "num_attention_heads": 20, + "num_hidden_layers": 6, + "num_selected_blocks": 8, + "num_slots": 64, + "pad_token_id": 30, + "reference_compile": null, + "selection_block_size": 16, + "sep_token_id": 29, + "sliding_window_size": 0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.52.3", + "unk_token_id": 27, + "use_glome": true, + "use_nsa": true, + "vocab_size": 36 +} diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/model.safetensors b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8561eaa6ed751ddb203be5951d3ff87e5118e6b8 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7a6c9e7bebd86d828e33c602f601585df15ab364132986ff74a2c37ee1a20b5 +size 61385376 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/optimizer.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b57b2fafe15e5b63edf036a2f9fd89ccaf3a622 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cedfd25f8179cb06ee2cdef74c23b5465b2feb98a5d90c0c18310779a53873fa +size 122881658 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/rng_state.pth b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7a6cb59a2c728074b73428ae75c539651c4ea361 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b5e0b21ceddfc06d6dfc155153f6b1cb2682aebf418b0515e022b082bfcbcf5 +size 14244 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/scaler.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363 +size 988 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/scheduler.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e18d4033fb579341903417eb4d0a9c2d8e62330 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:231ac688ea3b75d272ed7ef26f577919491d334410210be20bdcc895a1659ccb +size 1064 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/trainer_state.json b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7f724e75ea9d17a6e15191f645e9cf4ee63d5836 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/trainer_state.json @@ -0,0 +1,12041 @@ +{ + "best_global_step": 12818, + "best_metric": 0.6982375574473259, + "best_model_checkpoint": "./results/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818", + "epoch": 493.0, + "eval_steps": 500, + "global_step": 12818, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy": 0.3402481187716087, + "eval_auc": 0.3906724936824889, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25902238465052535, + "eval_f1_macro": 0.3322238022959372, + "eval_loss": 1.061540961265564, + "eval_pr_auc": 0.12123677424188789, + "eval_precision": 0.15737977933523004, + "eval_precision_macro": 0.49946219326282143, + "eval_pred_class_0": 5257, + "eval_pred_class_1": 14411, + "eval_predicted_binding_ratio": 0.7327130364043116, + "eval_recall": 0.7313769751693002, + "eval_recall_macro": 0.49920692785748166, + "eval_runtime": 0.3106, + "eval_samples_per_second": 524.711, + "eval_steps_per_second": 3.219, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 26 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.34141753101484645, + "eval_auc": 0.39093619574173194, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25910884859577876, + "eval_f1_macro": 0.33318779271412513, + "eval_loss": 1.0595855712890625, + "eval_pr_auc": 0.12129083172780017, + "eval_precision": 0.15748852732582394, + "eval_precision_macro": 0.4996674570038125, + "eval_pred_class_0": 5286, + "eval_pred_class_1": 14382, + "eval_predicted_binding_ratio": 0.7312385600976204, + "eval_recall": 0.7304095453079652, + "eval_recall_macro": 0.4995079053877304, + "eval_runtime": 0.2611, + "eval_samples_per_second": 624.188, + "eval_steps_per_second": 3.829, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 52 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.3436038234695953, + "eval_auc": 0.3913807276315981, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2594079853143644, + "eval_f1_macro": 0.3350089597864736, + "eval_loss": 1.0562976598739624, + "eval_pr_auc": 0.1213805792649038, + "eval_precision": 0.15776986951364175, + "eval_precision_macro": 0.5001890381857135, + "eval_pred_class_0": 5337, + "eval_pred_class_1": 14331, + "eval_predicted_binding_ratio": 0.7286455155582673, + "eval_recall": 0.7291196388261851, + "eval_recall_macro": 0.5002814346723429, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.877, + "eval_steps_per_second": 3.772, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.345688427903193, + "eval_auc": 0.39204411422551294, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25976416450963474, + "eval_f1_macro": 0.3367519287310599, + "eval_loss": 1.0516862869262695, + "eval_pr_auc": 0.1215177922821225, + "eval_precision": 0.15807896947633715, + "eval_precision_macro": 0.5007519661646174, + "eval_pred_class_0": 5384, + "eval_pred_class_1": 14284, + "eval_predicted_binding_ratio": 0.7262558470612162, + "eval_recall": 0.72815220896485, + "eval_recall_macro": 0.5011256608293798, + "eval_runtime": 0.2689, + "eval_samples_per_second": 606.218, + "eval_steps_per_second": 3.719, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 104 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.3489424445800285, + "eval_auc": 0.39286881698964193, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25986937171261776, + "eval_f1_macro": 0.33937421387990774, + "eval_loss": 1.0457645654678345, + "eval_pr_auc": 0.12168361829310792, + "eval_precision": 0.15830985915492957, + "eval_precision_macro": 0.5011556611063601, + "eval_pred_class_0": 5468, + "eval_pred_class_1": 14200, + "eval_predicted_binding_ratio": 0.7219849501728697, + "eval_recall": 0.7249274427603999, + "eval_recall_macro": 0.5017466331928395, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.89, + "eval_steps_per_second": 3.852, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 130 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.3526540573520439, + "eval_auc": 0.3938679358675814, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2595952547103978, + "eval_f1_macro": 0.34226375201644554, + "eval_loss": 1.0385552644729614, + "eval_pr_auc": 0.12189495582289459, + "eval_precision": 0.15835402625044342, + "eval_precision_macro": 0.5012118238196412, + "eval_pred_class_0": 5573, + "eval_pred_class_1": 14095, + "eval_predicted_binding_ratio": 0.7166463290624364, + "eval_recall": 0.7197678168332796, + "eval_recall_macro": 0.5018528828839544, + "eval_runtime": 0.2682, + "eval_samples_per_second": 607.675, + "eval_steps_per_second": 3.728, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 156 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.3575859263778727, + "eval_auc": 0.39509779283079605, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25933524825605253, + "eval_f1_macro": 0.34607916966826957, + "eval_loss": 1.0300335884094238, + "eval_pr_auc": 0.12215992714628282, + "eval_precision": 0.1584754262788365, + "eval_precision_macro": 0.5013918287261083, + "eval_pred_class_0": 5710, + "eval_pred_class_1": 13958, + "eval_predicted_binding_ratio": 0.7096806996135855, + "eval_recall": 0.7133182844243793, + "eval_recall_macro": 0.5021592327536275, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.797, + "eval_steps_per_second": 3.956, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 182 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.36261948342485256, + "eval_auc": 0.39656283563130934, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2583126257247663, + "eval_f1_macro": 0.3497589695442054, + "eval_loss": 1.0202081203460693, + "eval_pr_auc": 0.12247236024679278, + "eval_precision": 0.1581769436997319, + "eval_precision_macro": 0.5008542806107318, + "eval_pred_class_0": 5867, + "eval_pred_class_1": 13801, + "eval_predicted_binding_ratio": 0.7016981899532235, + "eval_recall": 0.7039664624314738, + "eval_recall_macro": 0.5013464231032241, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.667, + "eval_steps_per_second": 4.004, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 208 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.3690258287573724, + "eval_auc": 0.39822865015280895, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25848470363288717, + "eval_f1_macro": 0.3546848296925498, + "eval_loss": 1.0091010332107544, + "eval_pr_auc": 0.12282975659183427, + "eval_precision": 0.15863586358635864, + "eval_precision_macro": 0.5015788301853557, + "eval_pred_class_0": 6033, + "eval_pred_class_1": 13635, + "eval_predicted_binding_ratio": 0.6932580841976815, + "eval_recall": 0.6975169300225733, + "eval_recall_macro": 0.5025280068716114, + "eval_runtime": 0.2623, + "eval_samples_per_second": 621.417, + "eval_steps_per_second": 3.812, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 234 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.37553386211104334, + "eval_auc": 0.4001638991754374, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25788519637462237, + "eval_f1_macro": 0.35943500580602444, + "eval_loss": 0.9966734647750854, + "eval_pr_auc": 0.12325069957928089, + "eval_precision": 0.1586735073239646, + "eval_precision_macro": 0.5015911353953799, + "eval_pred_class_0": 6219, + "eval_pred_class_1": 13449, + "eval_predicted_binding_ratio": 0.6838010982306284, + "eval_recall": 0.6881651080296678, + "eval_recall_macro": 0.5025904311199223, + "eval_runtime": 0.2668, + "eval_samples_per_second": 610.918, + "eval_steps_per_second": 3.748, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 260 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.38382143583485867, + "eval_auc": 0.4023744221985687, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25855001529519733, + "eval_f1_macro": 0.3657153538700335, + "eval_loss": 0.9828852415084839, + "eval_pr_auc": 0.12373084625745168, + "eval_precision": 0.15954394442766537, + "eval_precision_macro": 0.5028728439448414, + "eval_pred_class_0": 6424, + "eval_pred_class_1": 13244, + "eval_predicted_binding_ratio": 0.6733780760626398, + "eval_recall": 0.6813930990003225, + "eval_recall_macro": 0.5047576347901956, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.475, + "eval_steps_per_second": 3.85, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 286 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.3912955053894651, + "eval_auc": 0.40482715792324586, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2574122317330356, + "eval_f1_macro": 0.370844480646392, + "eval_loss": 0.9678097367286682, + "eval_pr_auc": 0.12427357405982056, + "eval_precision": 0.15935796021810922, + "eval_precision_macro": 0.5025013059703454, + "eval_pred_class_0": 6647, + "eval_pred_class_1": 13021, + "eval_predicted_binding_ratio": 0.6620398617042912, + "eval_recall": 0.6691389874234118, + "eval_recall_macro": 0.5042139676659523, + "eval_runtime": 0.2363, + "eval_samples_per_second": 689.751, + "eval_steps_per_second": 4.232, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 312 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.4013117754728493, + "eval_auc": 0.40764224431659535, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2572383775941462, + "eval_f1_macro": 0.3779059068484294, + "eval_loss": 0.9513856172561646, + "eval_pr_auc": 0.12488748600523823, + "eval_precision": 0.15989648682559598, + "eval_precision_macro": 0.5031697587395765, + "eval_pred_class_0": 6916, + "eval_pred_class_1": 12752, + "eval_predicted_binding_ratio": 0.6483628228594671, + "eval_recall": 0.6575298290873912, + "eval_recall_macro": 0.5054414401669225, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.633, + "eval_steps_per_second": 3.943, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 338 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.4099552572706935, + "eval_auc": 0.4108023769954713, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25432114630855235, + "eval_f1_macro": 0.38308115532732967, + "eval_loss": 0.9335527420043945, + "eval_pr_auc": 0.12558462856716973, + "eval_precision": 0.15880276039159044, + "eval_precision_macro": 0.5015495900209409, + "eval_pred_class_0": 7206, + "eval_pred_class_1": 12462, + "eval_predicted_binding_ratio": 0.6336180597925565, + "eval_recall": 0.6381812318606901, + "eval_recall_macro": 0.5027086517847544, + "eval_runtime": 0.2682, + "eval_samples_per_second": 607.718, + "eval_steps_per_second": 3.728, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 364 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.41844620703681107, + "eval_auc": 0.4144857969457745, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2516356974613975, + "eval_f1_macro": 0.3880413644466475, + "eval_loss": 0.9142351150512695, + "eval_pr_auc": 0.1264136402678906, + "eval_precision": 0.15784289583846342, + "eval_precision_macro": 0.5002307331563727, + "eval_pred_class_0": 7485, + "eval_pred_class_1": 12183, + "eval_predicted_binding_ratio": 0.6194325808419768, + "eval_recall": 0.6201225411157691, + "eval_recall_macro": 0.5004095532886144, + "eval_runtime": 0.2527, + "eval_samples_per_second": 645.136, + "eval_steps_per_second": 3.958, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 390 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.42897091722595077, + "eval_auc": 0.41858189431685716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24961582147390926, + "eval_f1_macro": 0.3943717007980979, + "eval_loss": 0.8934236168861389, + "eval_pr_auc": 0.12736412734017702, + "eval_precision": 0.15742457441429294, + "eval_precision_macro": 0.4996940867457263, + "eval_pred_class_0": 7802, + "eval_pred_class_1": 11866, + "eval_predicted_binding_ratio": 0.6033150294895261, + "eval_recall": 0.6023863269912931, + "eval_recall_macro": 0.4994487317940711, + "eval_runtime": 0.2416, + "eval_samples_per_second": 674.578, + "eval_steps_per_second": 4.139, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 416 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.4378177750660972, + "eval_auc": 0.42318268015385996, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24520445081575534, + "eval_f1_macro": 0.3986584493314002, + "eval_loss": 0.8710600733757019, + "eval_pr_auc": 0.12844830521974454, + "eval_precision": 0.15552476619328023, + "eval_precision_macro": 0.4974052402394973, + "eval_pred_class_0": 8120, + "eval_pred_class_1": 11548, + "eval_predicted_binding_ratio": 0.5871466341264999, + "eval_recall": 0.5791680103192518, + "eval_recall_macro": 0.4952639713574891, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.27, + "eval_steps_per_second": 3.891, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 442 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.4492576774455969, + "eval_auc": 0.4283753771124365, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.23911211014329867, + "eval_f1_macro": 0.4037791626413705, + "eval_loss": 0.8472632765769958, + "eval_pr_auc": 0.12969206942947384, + "eval_precision": 0.15285136955545578, + "eval_precision_macro": 0.4944498263457579, + "eval_pred_class_0": 8533, + "eval_pred_class_1": 11135, + "eval_predicted_binding_ratio": 0.5661480577587961, + "eval_recall": 0.5488552079974202, + "eval_recall_macro": 0.4897351430824307, + "eval_runtime": 0.281, + "eval_samples_per_second": 580.087, + "eval_steps_per_second": 3.559, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 468 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.46339231238560097, + "eval_auc": 0.4341240430739382, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.23588184187662903, + "eval_f1_macro": 0.41119432949496704, + "eval_loss": 0.8218646049499512, + "eval_pr_auc": 0.13110819406948146, + "eval_precision": 0.15208663990290355, + "eval_precision_macro": 0.4938729504080779, + "eval_pred_class_0": 8957, + "eval_pred_class_1": 10711, + "eval_predicted_binding_ratio": 0.544590197274761, + "eval_recall": 0.5253144147049339, + "eval_recall_macro": 0.4885580946585574, + "eval_runtime": 0.2447, + "eval_samples_per_second": 666.13, + "eval_steps_per_second": 4.087, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 494 + }, + { + "epoch": 19.23076923076923, + "grad_norm": 232728.109375, + "learning_rate": 3.8384615384615384e-07, + "loss": 0.99, + "step": 500 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.4798657718120805, + "eval_auc": 0.44078220133048324, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2309427153811457, + "eval_f1_macro": 0.4189975157915178, + "eval_loss": 0.7945711016654968, + "eval_pr_auc": 0.1327664236209388, + "eval_precision": 0.15057347318890305, + "eval_precision_macro": 0.49263119629657465, + "eval_pred_class_0": 9467, + "eval_pred_class_1": 10201, + "eval_predicted_binding_ratio": 0.5186597518812284, + "eval_recall": 0.49532408900354724, + "eval_recall_macro": 0.4861481916617905, + "eval_runtime": 0.249, + "eval_samples_per_second": 654.734, + "eval_steps_per_second": 4.017, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 520 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.49964409192597115, + "eval_auc": 0.4482004774880778, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22407947646455886, + "eval_f1_macro": 0.42742637388305044, + "eval_loss": 0.765658974647522, + "eval_pr_auc": 0.1347110745909903, + "eval_precision": 0.14829889375913172, + "eval_precision_macro": 0.4908656872127009, + "eval_pred_class_0": 10086, + "eval_pred_class_1": 9582, + "eval_predicted_binding_ratio": 0.48718730933496035, + "eval_recall": 0.4582392776523702, + "eval_recall_macro": 0.48281674753627146, + "eval_runtime": 0.2494, + "eval_samples_per_second": 653.647, + "eval_steps_per_second": 4.01, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 546 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.5294895261338214, + "eval_auc": 0.4563512312496838, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22025615099427032, + "eval_f1_macro": 0.44167751484473966, + "eval_loss": 0.7351489067077637, + "eval_pr_auc": 0.13684095333600696, + "eval_precision": 0.14908178396258698, + "eval_precision_macro": 0.49225486317659667, + "eval_pred_class_0": 10901, + "eval_pred_class_1": 8767, + "eval_predicted_binding_ratio": 0.4457494407158837, + "eval_recall": 0.4214769429216382, + "eval_recall_macro": 0.48559209613637894, + "eval_runtime": 0.2523, + "eval_samples_per_second": 645.993, + "eval_steps_per_second": 3.963, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 572 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.564317673378076, + "eval_auc": 0.46539531162556536, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21679919568595193, + "eval_f1_macro": 0.45751035677940843, + "eval_loss": 0.7033414244651794, + "eval_pr_auc": 0.1392772958743257, + "eval_precision": 0.15127551020408164, + "eval_precision_macro": 0.49468577674559844, + "eval_pred_class_0": 11828, + "eval_pred_class_1": 7840, + "eval_predicted_binding_ratio": 0.3986170429123449, + "eval_recall": 0.38245727184779105, + "eval_recall_macro": 0.49040772688786005, + "eval_runtime": 0.2665, + "eval_samples_per_second": 611.699, + "eval_steps_per_second": 3.753, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 598 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.6033150294895261, + "eval_auc": 0.4754341993823483, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21223747980613894, + "eval_f1_macro": 0.4735759293567254, + "eval_loss": 0.6706362962722778, + "eval_pr_auc": 0.14231930535250045, + "eval_precision": 0.1544906658826988, + "eval_precision_macro": 0.4975718001003078, + "eval_pred_class_0": 12865, + "eval_pred_class_1": 6803, + "eval_predicted_binding_ratio": 0.34589180394549524, + "eval_recall": 0.3389229280877136, + "eval_recall_macro": 0.49586334730576304, + "eval_runtime": 0.2619, + "eval_samples_per_second": 622.356, + "eval_steps_per_second": 3.818, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 624 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.6474984746796827, + "eval_auc": 0.4864891211002582, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2075665790376043, + "eval_f1_macro": 0.4904508280155492, + "eval_loss": 0.6374054551124573, + "eval_pr_auc": 0.14557281943245967, + "eval_precision": 0.16076487252124647, + "eval_precision_macro": 0.5021727358326632, + "eval_pred_class_0": 14020, + "eval_pred_class_1": 5648, + "eval_predicted_binding_ratio": 0.28716697173073014, + "eval_recall": 0.2928087713640761, + "eval_recall_macro": 0.5033489139611471, + "eval_runtime": 0.2341, + "eval_samples_per_second": 696.138, + "eval_steps_per_second": 4.271, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 650 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.6925462680496237, + "eval_auc": 0.49869307137754393, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20360858685631503, + "eval_f1_macro": 0.5065549471155847, + "eval_loss": 0.6044979691505432, + "eval_pr_auc": 0.14935675594952297, + "eval_precision": 0.17208370436331255, + "eval_precision_macro": 0.5093417994668434, + "eval_pred_class_0": 15176, + "eval_pred_class_1": 4492, + "eval_predicted_binding_ratio": 0.22839129550538947, + "eval_recall": 0.2492744276039987, + "eval_recall_macro": 0.5123960114117054, + "eval_runtime": 0.2543, + "eval_samples_per_second": 640.94, + "eval_steps_per_second": 3.932, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 676 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.7326113483831604, + "eval_auc": 0.5121322314924708, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.1977116704805492, + "eval_f1_macro": 0.5186416868006297, + "eval_loss": 0.5727357268333435, + "eval_pr_auc": 0.15383837227298106, + "eval_precision": 0.18760856977417487, + "eval_precision_macro": 0.518159780138105, + "eval_pred_class_0": 16214, + "eval_pred_class_1": 3454, + "eval_predicted_binding_ratio": 0.1756152125279642, + "eval_recall": 0.20896485004837148, + "eval_recall_macro": 0.5197960002037596, + "eval_runtime": 0.2506, + "eval_samples_per_second": 650.309, + "eval_steps_per_second": 3.99, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 702 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.7681513117754728, + "eval_auc": 0.5270188672472933, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.18947742623533595, + "eval_f1_macro": 0.5271029967130403, + "eval_loss": 0.5426873564720154, + "eval_pr_auc": 0.1589999639181036, + "eval_precision": 0.21108910891089108, + "eval_precision_macro": 0.5306451786169109, + "eval_pred_class_0": 17143, + "eval_pred_class_1": 2525, + "eval_predicted_binding_ratio": 0.12838112670327437, + "eval_recall": 0.17188003869719445, + "eval_recall_macro": 0.5258205046507038, + "eval_runtime": 0.2527, + "eval_samples_per_second": 644.973, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 728 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.7986068741102298, + "eval_auc": 0.5437619187831915, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.18076525336091004, + "eval_f1_macro": 0.5329784934669249, + "eval_loss": 0.5149086117744446, + "eval_pr_auc": 0.16564494894795073, + "eval_precision": 0.2520184544405998, + "eval_precision_macro": 0.5517368953367268, + "eval_pred_class_0": 17934, + "eval_pred_class_1": 1734, + "eval_predicted_binding_ratio": 0.08816351433801098, + "eval_recall": 0.14092228313447275, + "eval_recall_macro": 0.5313170599592205, + "eval_runtime": 0.258, + "eval_samples_per_second": 631.786, + "eval_steps_per_second": 3.876, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 754 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.8241814114297336, + "eval_auc": 0.5629712926123112, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.17548879351454458, + "eval_f1_macro": 0.5385440097559633, + "eval_loss": 0.4897482395172119, + "eval_pr_auc": 0.17432371223202417, + "eval_precision": 0.3366880146386093, + "eval_precision_macro": 0.5947773855158054, + "eval_pred_class_0": 18575, + "eval_pred_class_1": 1093, + "eval_predicted_binding_ratio": 0.05557250355908074, + "eval_recall": 0.11867139632376653, + "eval_recall_macro": 0.5374548506940254, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.589, + "eval_steps_per_second": 3.899, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 780 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.8372991661582265, + "eval_auc": 0.5843234707368185, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.16883116883116883, + "eval_f1_macro": 0.5393273806169032, + "eval_loss": 0.46770623326301575, + "eval_pr_auc": 0.18537280435724188, + "eval_precision": 0.43391188251001334, + "eval_precision_macro": 0.6435905413924347, + "eval_pred_class_0": 18919, + "eval_pred_class_1": 749, + "eval_predicted_binding_ratio": 0.038082163921090095, + "eval_recall": 0.10480490164463076, + "eval_recall_macro": 0.5396059276135269, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.108, + "eval_steps_per_second": 3.817, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 806 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.8421293471629042, + "eval_auc": 0.6080693861773249, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.16012983500135244, + "eval_f1_macro": 0.5365030891665479, + "eval_loss": 0.44841739535331726, + "eval_pr_auc": 0.1997259509611161, + "eval_precision": 0.4966442953020134, + "eval_precision_macro": 0.6747850251677853, + "eval_pred_class_0": 19072, + "eval_pred_class_1": 596, + "eval_predicted_binding_ratio": 0.030303030303030304, + "eval_recall": 0.09545307965172525, + "eval_recall_macro": 0.5386723960460594, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.915, + "eval_steps_per_second": 3.852, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 832 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.8450782997762863, + "eval_auc": 0.6341019717128032, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.16497670594683475, + "eval_f1_macro": 0.5397977373430758, + "eval_loss": 0.4312308728694916, + "eval_pr_auc": 0.2181951536640109, + "eval_precision": 0.5492700729927007, + "eval_precision_macro": 0.7014132791741746, + "eval_pred_class_0": 19120, + "eval_pred_class_1": 548, + "eval_predicted_binding_ratio": 0.0278625177954037, + "eval_recall": 0.09706546275395034, + "eval_recall_macro": 0.5410781529982704, + "eval_runtime": 0.2519, + "eval_samples_per_second": 647.04, + "eval_steps_per_second": 3.97, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 858 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.8478238763473663, + "eval_auc": 0.6614343616815009, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.17932547299149987, + "eval_f1_macro": 0.5477310488609045, + "eval_loss": 0.41584137082099915, + "eval_pr_auc": 0.24110190314317137, + "eval_precision": 0.5989010989010989, + "eval_precision_macro": 0.7269162957114008, + "eval_pred_class_0": 19122, + "eval_pred_class_1": 546, + "eval_predicted_binding_ratio": 0.027760829774252593, + "eval_recall": 0.1054498548855208, + "eval_recall_macro": 0.546115402483504, + "eval_runtime": 0.2547, + "eval_samples_per_second": 640.016, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 884 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.8509253609924751, + "eval_auc": 0.6891114086357669, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20585048754062837, + "eval_f1_macro": 0.5617963020129356, + "eval_loss": 0.4015716016292572, + "eval_pr_auc": 0.2683830744239665, + "eval_precision": 0.6429780033840947, + "eval_precision_macro": 0.7501727569994856, + "eval_pred_class_0": 19077, + "eval_pred_class_1": 591, + "eval_predicted_binding_ratio": 0.030048810250152533, + "eval_recall": 0.12254111576910674, + "eval_recall_macro": 0.5549024767594251, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.872, + "eval_steps_per_second": 4.005, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 910 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.8537217815741306, + "eval_auc": 0.7168296727231165, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2366675510745556, + "eval_f1_macro": 0.5778889812054533, + "eval_loss": 0.3880736827850342, + "eval_pr_auc": 0.2994175694348318, + "eval_precision": 0.6676646706586826, + "eval_precision_macro": 0.7639639142767097, + "eval_pred_class_0": 19000, + "eval_pred_class_1": 668, + "eval_predicted_binding_ratio": 0.0339637990644702, + "eval_recall": 0.1438245727184779, + "eval_recall_macro": 0.5652122199621845, + "eval_runtime": 0.2693, + "eval_samples_per_second": 605.227, + "eval_steps_per_second": 3.713, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 936 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.8565690461663616, + "eval_auc": 0.743181046261935, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2768520892078954, + "eval_f1_macro": 0.59862076733571, + "eval_loss": 0.37574923038482666, + "eval_pr_auc": 0.33279138215623166, + "eval_precision": 0.675, + "eval_precision_macro": 0.7696337714649142, + "eval_pred_class_0": 18868, + "eval_pred_class_1": 800, + "eval_predicted_binding_ratio": 0.04067520846044336, + "eval_recall": 0.17413737504030957, + "eval_recall_macro": 0.5792217629109919, + "eval_runtime": 0.2774, + "eval_samples_per_second": 587.664, + "eval_steps_per_second": 3.605, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 962 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.8596705308114704, + "eval_auc": 0.7678773986205973, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.3205317577548006, + "eval_f1_macro": 0.6211435791665652, + "eval_loss": 0.36427780985832214, + "eval_pr_auc": 0.3671211589285648, + "eval_precision": 0.6774193548387096, + "eval_precision_macro": 0.7732261685723991, + "eval_pred_class_0": 18707, + "eval_pred_class_1": 961, + "eval_predicted_binding_ratio": 0.048861094163107584, + "eval_recall": 0.20993227990970656, + "eval_recall_macro": 0.5956101913823899, + "eval_runtime": 0.2611, + "eval_samples_per_second": 624.252, + "eval_steps_per_second": 3.83, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 988 + }, + { + "epoch": 38.46153846153846, + "grad_norm": 35024.03515625, + "learning_rate": 7.684615384615384e-07, + "loss": 0.5725, + "step": 1000 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.8642464917632703, + "eval_auc": 0.7904617013805764, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.3723554301833568, + "eval_f1_macro": 0.6481240123381296, + "eval_loss": 0.35397008061408997, + "eval_pr_auc": 0.40223746916130343, + "eval_precision": 0.6869037294015612, + "eval_precision_macro": 0.7810970172797707, + "eval_pred_class_0": 18515, + "eval_pred_class_1": 1153, + "eval_predicted_binding_ratio": 0.058623144193613995, + "eval_recall": 0.25540148339245405, + "eval_recall_macro": 0.6168055886811972, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.77, + "eval_steps_per_second": 3.888, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1014 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.867246288387228, + "eval_auc": 0.8102996097248453, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.4125984251968504, + "eval_f1_macro": 0.6688826868467987, + "eval_loss": 0.3446972072124481, + "eval_pr_auc": 0.43559149314237056, + "eval_precision": 0.6822916666666666, + "eval_precision_macro": 0.7815518582187295, + "eval_pred_class_0": 18324, + "eval_pred_class_1": 1344, + "eval_predicted_binding_ratio": 0.06833435021354485, + "eval_recall": 0.29571106094808125, + "eval_recall_macro": 0.634968465827454, + "eval_runtime": 0.2532, + "eval_samples_per_second": 643.748, + "eval_steps_per_second": 3.949, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1040 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.8715171852755745, + "eval_auc": 0.8272611461298317, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.4583065380493033, + "eval_f1_macro": 0.6927107089600444, + "eval_loss": 0.33654505014419556, + "eval_pr_auc": 0.4645782536288223, + "eval_precision": 0.6835038363171355, + "eval_precision_macro": 0.7856317237263981, + "eval_pred_class_0": 18104, + "eval_pred_class_1": 1564, + "eval_predicted_binding_ratio": 0.07952003254016676, + "eval_recall": 0.344727507255724, + "eval_recall_macro": 0.6574244163911867, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.484, + "eval_steps_per_second": 3.843, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1066 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.8743136058572301, + "eval_auc": 0.8416796876148132, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.4898885678910442, + "eval_f1_macro": 0.7091078096051335, + "eval_loss": 0.329649955034256, + "eval_pr_auc": 0.49067495219464874, + "eval_precision": 0.6802292263610316, + "eval_precision_macro": 0.7867195342316791, + "eval_pred_class_0": 17923, + "eval_pred_class_1": 1745, + "eval_predicted_binding_ratio": 0.08872279845434208, + "eval_recall": 0.38277974846823604, + "eval_recall_macro": 0.674549166803684, + "eval_runtime": 0.2641, + "eval_samples_per_second": 617.276, + "eval_steps_per_second": 3.787, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1092 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.8758897701850722, + "eval_auc": 0.8534597097025247, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5176842521240862, + "eval_f1_macro": 0.7232330815689724, + "eval_loss": 0.32387641072273254, + "eval_pr_auc": 0.5115876936649595, + "eval_precision": 0.6683673469387755, + "eval_precision_macro": 0.7836133097919538, + "eval_pred_class_0": 17708, + "eval_pred_class_1": 1960, + "eval_predicted_binding_ratio": 0.09965426072808622, + "eval_recall": 0.42244437278297325, + "eval_recall_macro": 0.6916048748685797, + "eval_runtime": 0.2631, + "eval_samples_per_second": 619.429, + "eval_steps_per_second": 3.8, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1118 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.878991254830181, + "eval_auc": 0.863260959032272, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5468392993145469, + "eval_f1_macro": 0.7385059071387897, + "eval_loss": 0.3189404308795929, + "eval_pr_auc": 0.5291286431025274, + "eval_precision": 0.6675964667596467, + "eval_precision_macro": 0.7862729722049646, + "eval_pred_class_0": 17517, + "eval_pred_class_1": 2151, + "eval_predicted_binding_ratio": 0.10936546674801709, + "eval_recall": 0.4630764269590455, + "eval_recall_macro": 0.7099591708043251, + "eval_runtime": 0.2583, + "eval_samples_per_second": 630.993, + "eval_steps_per_second": 3.871, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1144 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.8797030709782387, + "eval_auc": 0.8710211865407248, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5603864734299517, + "eval_f1_macro": 0.7453519808982827, + "eval_loss": 0.3149340748786926, + "eval_pr_auc": 0.5420378923897758, + "eval_precision": 0.6611135466900482, + "eval_precision_macro": 0.7847466853482449, + "eval_pred_class_0": 17387, + "eval_pred_class_1": 2281, + "eval_predicted_binding_ratio": 0.11597518812283913, + "eval_recall": 0.48629474363108677, + "eval_recall_macro": 0.719817861342917, + "eval_runtime": 0.2505, + "eval_samples_per_second": 650.753, + "eval_steps_per_second": 3.992, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1170 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.8811775472849298, + "eval_auc": 0.8772876506442417, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5748590140076405, + "eval_f1_macro": 0.7528983447354317, + "eval_loss": 0.3115498721599579, + "eval_pr_auc": 0.5526462799402374, + "eval_precision": 0.659432387312187, + "eval_precision_macro": 0.7856853923591968, + "eval_pred_class_0": 17272, + "eval_pred_class_1": 2396, + "eval_predicted_binding_ratio": 0.12182224933902787, + "eval_recall": 0.509513060303128, + "eval_recall_macro": 0.7301292590704993, + "eval_runtime": 0.247, + "eval_samples_per_second": 659.808, + "eval_steps_per_second": 4.048, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1196 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.8817368314012609, + "eval_auc": 0.8824923380415335, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5840486409155937, + "eval_f1_macro": 0.7575589340187262, + "eval_loss": 0.3087034523487091, + "eval_pr_auc": 0.5616002050007283, + "eval_precision": 0.6555600160578081, + "eval_precision_macro": 0.7850484483851945, + "eval_pred_class_0": 17177, + "eval_pred_class_1": 2491, + "eval_predicted_binding_ratio": 0.12665243034370552, + "eval_recall": 0.526604321186714, + "eval_recall_macro": 0.7374073093831197, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.602, + "eval_steps_per_second": 3.924, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1222 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.8833638397396787, + "eval_auc": 0.8867719903429474, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5951288386869044, + "eval_f1_macro": 0.7634984852775182, + "eval_loss": 0.30617523193359375, + "eval_pr_auc": 0.5687331552143856, + "eval_precision": 0.6573099415204678, + "eval_precision_macro": 0.7872879591248483, + "eval_pred_class_0": 17103, + "eval_pred_class_1": 2565, + "eval_predicted_binding_ratio": 0.13041488712629654, + "eval_recall": 0.5436955820702999, + "eval_recall_macro": 0.7453191497603264, + "eval_runtime": 0.2573, + "eval_samples_per_second": 633.489, + "eval_steps_per_second": 3.886, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1248 + }, + { + "epoch": 49.0, + "eval_accuracy": 0.8840248118771609, + "eval_auc": 0.8901050792607902, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6022667829119442, + "eval_f1_macro": 0.7671909492667516, + "eval_loss": 0.3041446805000305, + "eval_pr_auc": 0.5742293420515451, + "eval_precision": 0.6556567957479119, + "eval_precision_macro": 0.7874972953730754, + "eval_pred_class_0": 17034, + "eval_pred_class_1": 2634, + "eval_predicted_binding_ratio": 0.13392312385600977, + "eval_recall": 0.5569171235085456, + "eval_recall_macro": 0.751084867060001, + "eval_runtime": 0.2527, + "eval_samples_per_second": 644.934, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1274 + }, + { + "epoch": 50.0, + "eval_accuracy": 0.8846349400040675, + "eval_auc": 0.8931467576948593, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6055970797844603, + "eval_f1_macro": 0.7690165668701654, + "eval_loss": 0.30221912264823914, + "eval_pr_auc": 0.5797467982851593, + "eval_precision": 0.6568627450980392, + "eval_precision_macro": 0.7884983683177079, + "eval_pred_class_0": 17016, + "eval_pred_class_1": 2652, + "eval_predicted_binding_ratio": 0.13483831604636973, + "eval_recall": 0.561754272815221, + "eval_recall_macro": 0.7534129002755408, + "eval_runtime": 0.2505, + "eval_samples_per_second": 650.805, + "eval_steps_per_second": 3.993, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1300 + }, + { + "epoch": 51.0, + "eval_accuracy": 0.8854484441732764, + "eval_auc": 0.8956789398085232, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6095997227516895, + "eval_f1_macro": 0.7712381155096151, + "eval_loss": 0.30062082409858704, + "eval_pr_auc": 0.5844826815319759, + "eval_precision": 0.6588014981273408, + "eval_precision_macro": 0.7899255166833903, + "eval_pred_class_0": 16998, + "eval_pred_class_1": 2670, + "eval_predicted_binding_ratio": 0.13575350823672971, + "eval_recall": 0.5672363753627861, + "eval_recall_macro": 0.7561237710700572, + "eval_runtime": 0.2579, + "eval_samples_per_second": 631.92, + "eval_steps_per_second": 3.877, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1326 + }, + { + "epoch": 52.0, + "eval_accuracy": 0.8860077282896075, + "eval_auc": 0.8977014114868052, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6157010627356874, + "eval_f1_macro": 0.774389842453749, + "eval_loss": 0.2989857792854309, + "eval_pr_auc": 0.5879586440077966, + "eval_precision": 0.6571533113794366, + "eval_precision_macro": 0.7900469834133675, + "eval_pred_class_0": 16935, + "eval_pred_class_1": 2733, + "eval_predicted_binding_ratio": 0.13895668090298963, + "eval_recall": 0.5791680103192518, + "eval_recall_macro": 0.7613048960873738, + "eval_runtime": 0.2404, + "eval_samples_per_second": 677.963, + "eval_steps_per_second": 4.159, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1352 + }, + { + "epoch": 53.0, + "eval_accuracy": 0.8864144803742119, + "eval_auc": 0.8993954307902827, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6165465156196361, + "eval_f1_macro": 0.7749399244764847, + "eval_loss": 0.29775407910346985, + "eval_pr_auc": 0.5914083972949268, + "eval_precision": 0.6590825688073394, + "eval_precision_macro": 0.7910298047365505, + "eval_pred_class_0": 16943, + "eval_pred_class_1": 2725, + "eval_predicted_binding_ratio": 0.13854992881838518, + "eval_recall": 0.5791680103192518, + "eval_recall_macro": 0.7615463399215019, + "eval_runtime": 0.2547, + "eval_samples_per_second": 639.94, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1378 + }, + { + "epoch": 54.0, + "eval_accuracy": 0.8866178564165141, + "eval_auc": 0.9007296980023092, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6186730506155951, + "eval_f1_macro": 0.7760410164688105, + "eval_loss": 0.296587198972702, + "eval_pr_auc": 0.59415963293408, + "eval_precision": 0.6585365853658537, + "eval_precision_macro": 0.7910908800004612, + "eval_pred_class_0": 16921, + "eval_pred_class_1": 2747, + "eval_predicted_binding_ratio": 0.1396684970510474, + "eval_recall": 0.583360206385037, + "eval_recall_macro": 0.7633708136410005, + "eval_runtime": 0.2605, + "eval_samples_per_second": 625.694, + "eval_steps_per_second": 3.839, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1404 + }, + { + "epoch": 55.0, + "eval_accuracy": 0.8872279845434208, + "eval_auc": 0.9019074471661075, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6215017064846416, + "eval_f1_macro": 0.7776226419864958, + "eval_loss": 0.2955063581466675, + "eval_pr_auc": 0.5967231989416606, + "eval_precision": 0.6600217470097861, + "eval_precision_macro": 0.7921612076464745, + "eval_pred_class_0": 16909, + "eval_pred_class_1": 2759, + "eval_predicted_binding_ratio": 0.14027862517795403, + "eval_recall": 0.5872299258303773, + "eval_recall_macro": 0.7653056733636705, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.741, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1430 + }, + { + "epoch": 56.0, + "eval_accuracy": 0.8881431767337807, + "eval_auc": 0.9030401348597343, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6235455167693361, + "eval_f1_macro": 0.778929094226063, + "eval_loss": 0.29450690746307373, + "eval_pr_auc": 0.5995879576354929, + "eval_precision": 0.6642362376959533, + "eval_precision_macro": 0.7943337761596458, + "eval_pred_class_0": 16925, + "eval_pred_class_1": 2743, + "eval_predicted_binding_ratio": 0.13946512100874517, + "eval_recall": 0.5875524024508223, + "eval_recall_macro": 0.7659799798214153, + "eval_runtime": 0.2658, + "eval_samples_per_second": 613.135, + "eval_steps_per_second": 3.762, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1456 + }, + { + "epoch": 57.0, + "eval_accuracy": 0.8882957087655075, + "eval_auc": 0.9040587382005859, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6243802359377671, + "eval_f1_macro": 0.7793863433697854, + "eval_loss": 0.2936408519744873, + "eval_pr_auc": 0.6024898616264603, + "eval_precision": 0.6644832605531296, + "eval_precision_macro": 0.7945643253120258, + "eval_pred_class_0": 16920, + "eval_pred_class_1": 2748, + "eval_predicted_binding_ratio": 0.13971934106162295, + "eval_recall": 0.5888423089326024, + "eval_recall_macro": 0.7665947525830392, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.369, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1482 + }, + { + "epoch": 57.69230769230769, + "grad_norm": 15613.5302734375, + "learning_rate": 9.992863736980368e-07, + "loss": 0.3115, + "step": 1500 + }, + { + "epoch": 58.0, + "eval_accuracy": 0.887888956680903, + "eval_auc": 0.9048886089216611, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6246808510638298, + "eval_f1_macro": 0.7793916194591735, + "eval_loss": 0.29281434416770935, + "eval_pr_auc": 0.603713292882509, + "eval_precision": 0.6614996395097332, + "eval_precision_macro": 0.7932808958765667, + "eval_pred_class_0": 16894, + "eval_pred_class_1": 2774, + "eval_predicted_binding_ratio": 0.14104128533658736, + "eval_recall": 0.5917445985166075, + "eval_recall_macro": 0.7675328292275196, + "eval_runtime": 0.2518, + "eval_samples_per_second": 647.428, + "eval_steps_per_second": 3.972, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1508 + }, + { + "epoch": 59.0, + "eval_accuracy": 0.887888956680903, + "eval_auc": 0.9056672866982218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6251912289648138, + "eval_f1_macro": 0.7796389289833485, + "eval_loss": 0.2920655906200409, + "eval_pr_auc": 0.6054694565410179, + "eval_precision": 0.6610352264557872, + "eval_precision_macro": 0.7931493791878604, + "eval_pred_class_0": 16886, + "eval_pred_class_1": 2782, + "eval_predicted_binding_ratio": 0.14144803742119177, + "eval_recall": 0.5930345049983876, + "eval_recall_macro": 0.7680570605513457, + "eval_runtime": 0.235, + "eval_samples_per_second": 693.644, + "eval_steps_per_second": 4.255, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1534 + }, + { + "epoch": 60.0, + "eval_accuracy": 0.8882957087655075, + "eval_auc": 0.9063294664622661, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6254049445865303, + "eval_f1_macro": 0.7798830166450921, + "eval_loss": 0.29136955738067627, + "eval_pr_auc": 0.6071731602747702, + "eval_precision": 0.6635311143270622, + "eval_precision_macro": 0.7942892202018652, + "eval_pred_class_0": 16904, + "eval_pred_class_1": 2764, + "eval_predicted_binding_ratio": 0.14053284523083182, + "eval_recall": 0.5914221218961625, + "eval_recall_macro": 0.7676432152306912, + "eval_runtime": 0.2576, + "eval_samples_per_second": 632.702, + "eval_steps_per_second": 3.882, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1560 + }, + { + "epoch": 61.0, + "eval_accuracy": 0.8886007728289608, + "eval_auc": 0.9070085321120007, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6269368295589988, + "eval_f1_macro": 0.7807307642400976, + "eval_loss": 0.290680855512619, + "eval_pr_auc": 0.6088679721523397, + "eval_precision": 0.6641414141414141, + "eval_precision_macro": 0.7947837752525253, + "eval_pred_class_0": 16896, + "eval_pred_class_1": 2772, + "eval_predicted_binding_ratio": 0.14093959731543623, + "eval_recall": 0.5936794582392777, + "eval_recall_macro": 0.7687417029229828, + "eval_runtime": 0.2486, + "eval_samples_per_second": 655.642, + "eval_steps_per_second": 4.022, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1586 + }, + { + "epoch": 62.0, + "eval_accuracy": 0.8887533048606874, + "eval_auc": 0.9076136113046634, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6273841961852861, + "eval_f1_macro": 0.7810002501366307, + "eval_loss": 0.29004454612731934, + "eval_pr_auc": 0.6100991712198425, + "eval_precision": 0.664741970407795, + "eval_precision_macro": 0.7951158511564335, + "eval_pred_class_0": 16897, + "eval_pred_class_1": 2771, + "eval_predicted_binding_ratio": 0.1408887533048607, + "eval_recall": 0.5940019348597226, + "eval_recall_macro": 0.7689633021917374, + "eval_runtime": 0.2604, + "eval_samples_per_second": 625.871, + "eval_steps_per_second": 3.84, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1612 + }, + { + "epoch": 63.0, + "eval_accuracy": 0.8888041488712629, + "eval_auc": 0.9081136281710841, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6286296484971982, + "eval_f1_macro": 0.781621309135136, + "eval_loss": 0.28950682282447815, + "eval_pr_auc": 0.6111782063777282, + "eval_precision": 0.6639167862266858, + "eval_precision_macro": 0.794932326762632, + "eval_pred_class_0": 16880, + "eval_pred_class_1": 2788, + "eval_predicted_binding_ratio": 0.14175310148464512, + "eval_recall": 0.5969042244437278, + "eval_recall_macro": 0.7701730031496119, + "eval_runtime": 0.2489, + "eval_samples_per_second": 654.907, + "eval_steps_per_second": 4.018, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1638 + }, + { + "epoch": 64.0, + "eval_accuracy": 0.889363432987594, + "eval_auc": 0.9086206913667498, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6299319727891156, + "eval_f1_macro": 0.7824456611913058, + "eval_loss": 0.2888965606689453, + "eval_pr_auc": 0.6126297306007413, + "eval_precision": 0.6664267722202231, + "eval_precision_macro": 0.7962366556938643, + "eval_pred_class_0": 16889, + "eval_pred_class_1": 2779, + "eval_predicted_binding_ratio": 0.14129550538946511, + "eval_recall": 0.5972267010641729, + "eval_recall_macro": 0.7706360462524945, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.362, + "eval_steps_per_second": 3.935, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1664 + }, + { + "epoch": 65.0, + "eval_accuracy": 0.8900244051250763, + "eval_auc": 0.9091278518874051, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6305721605465414, + "eval_f1_macro": 0.782984177701663, + "eval_loss": 0.2884848117828369, + "eval_pr_auc": 0.6142560104629078, + "eval_precision": 0.6702977487291213, + "eval_precision_macro": 0.7980494301171916, + "eval_pred_class_0": 16914, + "eval_pred_class_1": 2754, + "eval_predicted_binding_ratio": 0.14002440512507627, + "eval_recall": 0.5952918413415027, + "eval_recall_macro": 0.7702420454972136, + "eval_runtime": 0.265, + "eval_samples_per_second": 614.992, + "eval_steps_per_second": 3.773, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1690 + }, + { + "epoch": 66.0, + "eval_accuracy": 0.8905836892414074, + "eval_auc": 0.9094545718773954, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6327645051194539, + "eval_f1_macro": 0.78423982216183, + "eval_loss": 0.2880232632160187, + "eval_pr_auc": 0.6147358252333397, + "eval_precision": 0.6719826023921711, + "eval_precision_macro": 0.7991174470355793, + "eval_pred_class_0": 16909, + "eval_pred_class_1": 2759, + "eval_predicted_binding_ratio": 0.14027862517795403, + "eval_recall": 0.5978716543050628, + "eval_recall_macro": 0.7716224934167917, + "eval_runtime": 0.2505, + "eval_samples_per_second": 650.665, + "eval_steps_per_second": 3.992, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1716 + }, + { + "epoch": 67.0, + "eval_accuracy": 0.8908887533048607, + "eval_auc": 0.9099424231201196, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6360244233378561, + "eval_f1_macro": 0.7859248910947654, + "eval_loss": 0.28752708435058594, + "eval_pr_auc": 0.6159928290925853, + "eval_precision": 0.6708407871198568, + "eval_precision_macro": 0.7990901618287602, + "eval_pred_class_0": 16873, + "eval_pred_class_1": 2795, + "eval_predicted_binding_ratio": 0.14210900955867398, + "eval_recall": 0.6046436633344082, + "eval_recall_macro": 0.7745557907424743, + "eval_runtime": 0.2471, + "eval_samples_per_second": 659.527, + "eval_steps_per_second": 4.046, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1742 + }, + { + "epoch": 68.0, + "eval_accuracy": 0.8910412853365873, + "eval_auc": 0.910346516476819, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6362247496180614, + "eval_f1_macro": 0.7860747010162366, + "eval_loss": 0.2870826721191406, + "eval_pr_auc": 0.6168347475575285, + "eval_precision": 0.6716845878136201, + "eval_precision_macro": 0.7994932004123201, + "eval_pred_class_0": 16878, + "eval_pred_class_1": 2790, + "eval_predicted_binding_ratio": 0.14185478950579622, + "eval_recall": 0.6043211867139633, + "eval_recall_macro": 0.7745152743493158, + "eval_runtime": 0.266, + "eval_samples_per_second": 612.866, + "eval_steps_per_second": 3.76, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1768 + }, + { + "epoch": 69.0, + "eval_accuracy": 0.8910921293471629, + "eval_auc": 0.9107640601470772, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6352179836512262, + "eval_f1_macro": 0.7856044496310159, + "eval_loss": 0.28665614128112793, + "eval_pr_auc": 0.6181373929491851, + "eval_precision": 0.673042223024179, + "eval_precision_macro": 0.7999465716529429, + "eval_pred_class_0": 16897, + "eval_pred_class_1": 2771, + "eval_predicted_binding_ratio": 0.1408887533048607, + "eval_recall": 0.6014188971299581, + "eval_recall_macro": 0.7733659343499732, + "eval_runtime": 0.2549, + "eval_samples_per_second": 639.359, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1794 + }, + { + "epoch": 70.0, + "eval_accuracy": 0.8910412853365873, + "eval_auc": 0.9111344401273891, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6354822248681748, + "eval_f1_macro": 0.7857149295725039, + "eval_loss": 0.28624698519706726, + "eval_pr_auc": 0.6190938884927122, + "eval_precision": 0.6724262059035278, + "eval_precision_macro": 0.7997122148522967, + "eval_pred_class_0": 16890, + "eval_pred_class_1": 2778, + "eval_predicted_binding_ratio": 0.14124466137888958, + "eval_recall": 0.6023863269912931, + "eval_recall_macro": 0.7737289273635768, + "eval_runtime": 0.2673, + "eval_samples_per_second": 609.866, + "eval_steps_per_second": 3.742, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1820 + }, + { + "epoch": 71.0, + "eval_accuracy": 0.891193817368314, + "eval_auc": 0.9114138601724477, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.63642541624193, + "eval_f1_macro": 0.7862246662674524, + "eval_loss": 0.2858646512031555, + "eval_pr_auc": 0.6197061363545492, + "eval_precision": 0.6725314183123878, + "eval_precision_macro": 0.7998977650704271, + "eval_pred_class_0": 16883, + "eval_pred_class_1": 2785, + "eval_predicted_binding_ratio": 0.14160056945291843, + "eval_recall": 0.6039987100935182, + "eval_recall_macro": 0.7744747579561573, + "eval_runtime": 0.2386, + "eval_samples_per_second": 683.037, + "eval_steps_per_second": 4.19, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1846 + }, + { + "epoch": 72.0, + "eval_accuracy": 0.8913971934106162, + "eval_auc": 0.9118766988928523, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6373514431239389, + "eval_f1_macro": 0.7867436519572335, + "eval_loss": 0.2853938341140747, + "eval_pr_auc": 0.6212208808374569, + "eval_precision": 0.6730010756543564, + "eval_precision_macro": 0.8002424656665053, + "eval_pred_class_0": 16879, + "eval_pred_class_1": 2789, + "eval_predicted_binding_ratio": 0.14180394549522066, + "eval_recall": 0.6052886165752983, + "eval_recall_macro": 0.7751197111970474, + "eval_runtime": 0.249, + "eval_samples_per_second": 654.683, + "eval_steps_per_second": 4.016, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1872 + }, + { + "epoch": 73.0, + "eval_accuracy": 0.891193817368314, + "eval_auc": 0.9121406831945651, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.637411047102677, + "eval_f1_macro": 0.7867021736679862, + "eval_loss": 0.2850610911846161, + "eval_pr_auc": 0.6219405042066507, + "eval_precision": 0.6715458764726884, + "eval_precision_macro": 0.79960764506032, + "eval_pred_class_0": 16867, + "eval_pred_class_1": 2801, + "eval_predicted_binding_ratio": 0.14241407362212732, + "eval_recall": 0.6065785230570784, + "eval_recall_macro": 0.7755232206038093, + "eval_runtime": 0.2633, + "eval_samples_per_second": 618.95, + "eval_steps_per_second": 3.797, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1898 + }, + { + "epoch": 74.0, + "eval_accuracy": 0.8912446613788896, + "eval_auc": 0.9124869655074592, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.637026981164093, + "eval_f1_macro": 0.7865337040796394, + "eval_loss": 0.284681111574173, + "eval_pr_auc": 0.6229948438184316, + "eval_precision": 0.6722779369627507, + "eval_precision_macro": 0.7998744508231626, + "eval_pred_class_0": 16876, + "eval_pred_class_1": 2792, + "eval_predicted_binding_ratio": 0.14195647752694732, + "eval_recall": 0.6052886165752983, + "eval_recall_macro": 0.7750291697592493, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.745, + "eval_steps_per_second": 3.925, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1924 + }, + { + "epoch": 75.0, + "eval_accuracy": 0.8913971934106162, + "eval_auc": 0.9128360118500571, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6385786802030456, + "eval_f1_macro": 0.7873381643700563, + "eval_loss": 0.2842992842197418, + "eval_pr_auc": 0.6238239183047751, + "eval_precision": 0.6717693129227483, + "eval_precision_macro": 0.7998801484834395, + "eval_pred_class_0": 16859, + "eval_pred_class_1": 2809, + "eval_predicted_binding_ratio": 0.14282082570673174, + "eval_recall": 0.6085133827797484, + "eval_recall_macro": 0.7764302895066123, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.282, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1950 + }, + { + "epoch": 76.0, + "eval_accuracy": 0.8913971934106162, + "eval_auc": 0.9131794425407568, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.638700947225981, + "eval_f1_macro": 0.7873973860112672, + "eval_loss": 0.2839708924293518, + "eval_pr_auc": 0.6248797725776689, + "eval_precision": 0.671647100675916, + "eval_precision_macro": 0.7998444318708524, + "eval_pred_class_0": 16857, + "eval_pred_class_1": 2811, + "eval_predicted_binding_ratio": 0.14292251372788287, + "eval_recall": 0.6088358594001935, + "eval_recall_macro": 0.7765613473375688, + "eval_runtime": 0.2644, + "eval_samples_per_second": 616.603, + "eval_steps_per_second": 3.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1976 + }, + { + "epoch": 76.92307692307692, + "grad_norm": 18483.060546875, + "learning_rate": 9.912189372587507e-07, + "loss": 0.2796, + "step": 2000 + }, + { + "epoch": 77.0, + "eval_accuracy": 0.891651413463494, + "eval_auc": 0.9134005357195656, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6389971201084195, + "eval_f1_macro": 0.7876288504858192, + "eval_loss": 0.28368592262268066, + "eval_pr_auc": 0.6256253637409228, + "eval_precision": 0.6730906495360457, + "eval_precision_macro": 0.8005261145225586, + "eval_pred_class_0": 16866, + "eval_pred_class_1": 2802, + "eval_predicted_binding_ratio": 0.14246491763270286, + "eval_recall": 0.6081909061593035, + "eval_recall_macro": 0.7764501340719858, + "eval_runtime": 0.2557, + "eval_samples_per_second": 637.408, + "eval_steps_per_second": 3.91, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2002 + }, + { + "epoch": 78.0, + "eval_accuracy": 0.8918547895057962, + "eval_auc": 0.9135784263355038, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6399187404773997, + "eval_f1_macro": 0.788145675542478, + "eval_loss": 0.283357173204422, + "eval_pr_auc": 0.6259773419133142, + "eval_precision": 0.6735566642908054, + "eval_precision_macro": 0.8008691873227245, + "eval_pred_class_0": 16862, + "eval_pred_class_1": 2806, + "eval_predicted_binding_ratio": 0.14266829367500508, + "eval_recall": 0.6094808126410836, + "eval_recall_macro": 0.7770950873128759, + "eval_runtime": 0.2408, + "eval_samples_per_second": 676.804, + "eval_steps_per_second": 4.152, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2028 + }, + { + "epoch": 79.0, + "eval_accuracy": 0.8920073215375229, + "eval_auc": 0.9139080660751812, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6396335256192739, + "eval_f1_macro": 0.788060288914535, + "eval_loss": 0.28298139572143555, + "eval_pr_auc": 0.6270064177031266, + "eval_precision": 0.6749015395631937, + "eval_precision_macro": 0.8014211401519672, + "eval_pred_class_0": 16875, + "eval_pred_class_1": 2793, + "eval_predicted_binding_ratio": 0.14200732153752288, + "eval_recall": 0.6078684295388584, + "eval_recall_macro": 0.7765303395958915, + "eval_runtime": 0.255, + "eval_samples_per_second": 639.262, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2054 + }, + { + "epoch": 80.0, + "eval_accuracy": 0.8923123856009763, + "eval_auc": 0.9140997087121456, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6410169491525424, + "eval_f1_macro": 0.7888360257187523, + "eval_loss": 0.28268861770629883, + "eval_pr_auc": 0.6271691682976167, + "eval_precision": 0.6755984280100036, + "eval_precision_macro": 0.8019346102940528, + "eval_pred_class_0": 16869, + "eval_pred_class_1": 2799, + "eval_predicted_binding_ratio": 0.1423123856009762, + "eval_recall": 0.6098032892615285, + "eval_recall_macro": 0.7774977694572265, + "eval_runtime": 0.2121, + "eval_samples_per_second": 768.576, + "eval_steps_per_second": 4.715, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2080 + }, + { + "epoch": 81.0, + "eval_accuracy": 0.8924140736221273, + "eval_auc": 0.9143275951752264, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6415989159891599, + "eval_f1_macro": 0.7891531311221224, + "eval_loss": 0.28239867091178894, + "eval_pr_auc": 0.6278526459152028, + "eval_precision": 0.6757046022119158, + "eval_precision_macro": 0.8020681327098713, + "eval_pred_class_0": 16865, + "eval_pred_class_1": 2803, + "eval_predicted_binding_ratio": 0.14251576164327842, + "eval_recall": 0.6107707191228636, + "eval_recall_macro": 0.7779513039086281, + "eval_runtime": 0.2642, + "eval_samples_per_second": 617.069, + "eval_steps_per_second": 3.786, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2106 + }, + { + "epoch": 82.0, + "eval_accuracy": 0.8925666056538539, + "eval_auc": 0.9145931950717662, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.641317263622475, + "eval_f1_macro": 0.7890694555517069, + "eval_loss": 0.2821619510650635, + "eval_pr_auc": 0.6287354538303637, + "eval_precision": 0.6770609318996416, + "eval_precision_macro": 0.8026257379014738, + "eval_pred_class_0": 16878, + "eval_pred_class_1": 2790, + "eval_predicted_binding_ratio": 0.14185478950579622, + "eval_recall": 0.6091583360206385, + "eval_recall_macro": 0.7773865561916435, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.835, + "eval_steps_per_second": 3.772, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2132 + }, + { + "epoch": 83.0, + "eval_accuracy": 0.8929733577384584, + "eval_auc": 0.9148203126674294, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6442453946256549, + "eval_f1_macro": 0.7906286370477088, + "eval_loss": 0.2817782461643219, + "eval_pr_auc": 0.6293872239214393, + "eval_precision": 0.6768465909090909, + "eval_precision_macro": 0.8029675631972466, + "eval_pred_class_0": 16852, + "eval_pred_class_1": 2816, + "eval_predicted_binding_ratio": 0.14317673378076062, + "eval_recall": 0.6146404385682038, + "eval_recall_macro": 0.7798559831520322, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.502, + "eval_steps_per_second": 3.88, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2158 + }, + { + "epoch": 84.0, + "eval_accuracy": 0.8929225137278829, + "eval_auc": 0.9150136584917113, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6435341909275558, + "eval_f1_macro": 0.7902665569930348, + "eval_loss": 0.2815438210964203, + "eval_pr_auc": 0.6300492382313454, + "eval_precision": 0.677235482721767, + "eval_precision_macro": 0.8030326633702543, + "eval_pred_class_0": 16861, + "eval_pred_class_1": 2807, + "eval_predicted_binding_ratio": 0.14271913768558064, + "eval_recall": 0.6130280554659787, + "eval_recall_macro": 0.7791705135179836, + "eval_runtime": 0.2597, + "eval_samples_per_second": 627.685, + "eval_steps_per_second": 3.851, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2184 + }, + { + "epoch": 85.0, + "eval_accuracy": 0.8928716697173072, + "eval_auc": 0.9151760160393141, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6437869822485207, + "eval_f1_macro": 0.7903713942390684, + "eval_loss": 0.28126296401023865, + "eval_pr_auc": 0.6304146488380505, + "eval_precision": 0.6766169154228856, + "eval_precision_macro": 0.8027975997548746, + "eval_pred_class_0": 16854, + "eval_pred_class_1": 2814, + "eval_predicted_binding_ratio": 0.14307504575960953, + "eval_recall": 0.6139954853273137, + "eval_recall_macro": 0.7795335065315872, + "eval_runtime": 0.2553, + "eval_samples_per_second": 638.417, + "eval_steps_per_second": 3.917, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2210 + }, + { + "epoch": 86.0, + "eval_accuracy": 0.8930750457596095, + "eval_auc": 0.9154795142867925, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.643861134631668, + "eval_f1_macro": 0.7904777241463208, + "eval_loss": 0.2809857428073883, + "eval_pr_auc": 0.6313964494387146, + "eval_precision": 0.677960057061341, + "eval_precision_macro": 0.803401280902587, + "eval_pred_class_0": 16864, + "eval_pred_class_1": 2804, + "eval_predicted_binding_ratio": 0.14256660565385398, + "eval_recall": 0.6130280554659787, + "eval_recall_macro": 0.7792610549557817, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.65, + "eval_steps_per_second": 3.924, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2236 + }, + { + "epoch": 87.0, + "eval_accuracy": 0.8930750457596095, + "eval_auc": 0.9156233995513745, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6458999831621485, + "eval_f1_macro": 0.7914651276711422, + "eval_loss": 0.28072693943977356, + "eval_pr_auc": 0.631527672626228, + "eval_precision": 0.6758280479210712, + "eval_precision_macro": 0.8027684505796682, + "eval_pred_class_0": 16830, + "eval_pred_class_1": 2838, + "eval_predicted_binding_ratio": 0.14429530201342283, + "eval_recall": 0.618510158013544, + "eval_recall_macro": 0.7814890380820421, + "eval_runtime": 0.2629, + "eval_samples_per_second": 620.026, + "eval_steps_per_second": 3.804, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2262 + }, + { + "epoch": 88.0, + "eval_accuracy": 0.8932275777913362, + "eval_auc": 0.9158623713307676, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6449103821440649, + "eval_f1_macro": 0.7910387587819241, + "eval_loss": 0.28049618005752563, + "eval_pr_auc": 0.6324229662687507, + "eval_precision": 0.6779239246356203, + "eval_precision_macro": 0.8035422055690709, + "eval_pred_class_0": 16855, + "eval_pred_class_1": 2813, + "eval_predicted_binding_ratio": 0.14302420174903396, + "eval_recall": 0.6149629151886489, + "eval_recall_macro": 0.7801379433793187, + "eval_runtime": 0.2576, + "eval_samples_per_second": 632.715, + "eval_steps_per_second": 3.882, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2288 + }, + { + "epoch": 89.0, + "eval_accuracy": 0.893125889770185, + "eval_auc": 0.9160425393514616, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6452919338508268, + "eval_f1_macro": 0.7911883195144587, + "eval_loss": 0.28025364875793457, + "eval_pr_auc": 0.6329798450144843, + "eval_precision": 0.6768141592920354, + "eval_precision_macro": 0.8031105172758937, + "eval_pred_class_0": 16843, + "eval_pred_class_1": 2825, + "eval_predicted_binding_ratio": 0.1436343298759406, + "eval_recall": 0.6165752982908739, + "eval_recall_macro": 0.7807328715755691, + "eval_runtime": 0.2595, + "eval_samples_per_second": 628.097, + "eval_steps_per_second": 3.853, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2314 + }, + { + "epoch": 90.0, + "eval_accuracy": 0.8936343298759406, + "eval_auc": 0.9161711835226769, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6463826910074375, + "eval_f1_macro": 0.7918927219250234, + "eval_loss": 0.2800801396369934, + "eval_pr_auc": 0.6332605675535015, + "eval_precision": 0.6792184724689165, + "eval_precision_macro": 0.8043336176502299, + "eval_pred_class_0": 16853, + "eval_pred_class_1": 2815, + "eval_predicted_binding_ratio": 0.14312588977018506, + "eval_recall": 0.6165752982908739, + "eval_recall_macro": 0.7810346763682292, + "eval_runtime": 0.2564, + "eval_samples_per_second": 635.634, + "eval_steps_per_second": 3.9, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2340 + }, + { + "epoch": 91.0, + "eval_accuracy": 0.8934309538336384, + "eval_auc": 0.9163414730569294, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6463044211947351, + "eval_f1_macro": 0.7917843566614202, + "eval_loss": 0.2798333764076233, + "eval_pr_auc": 0.633657166273441, + "eval_precision": 0.6778761061946903, + "eval_precision_macro": 0.8037305484960271, + "eval_pred_class_0": 16843, + "eval_pred_class_1": 2825, + "eval_predicted_binding_ratio": 0.1436343298759406, + "eval_recall": 0.617542728152209, + "eval_recall_macro": 0.7813071279440347, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.372, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2366 + }, + { + "epoch": 92.0, + "eval_accuracy": 0.8937868619076673, + "eval_auc": 0.9165959292421633, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6469494676356261, + "eval_f1_macro": 0.7922200583338069, + "eval_loss": 0.27958908677101135, + "eval_pr_auc": 0.6345494466448222, + "eval_precision": 0.6796875, + "eval_precision_macro": 0.8046253782933777, + "eval_pred_class_0": 16852, + "eval_pred_class_1": 2816, + "eval_predicted_binding_ratio": 0.14317673378076062, + "eval_recall": 0.617220251531764, + "eval_recall_macro": 0.7813873334679403, + "eval_runtime": 0.2594, + "eval_samples_per_second": 628.29, + "eval_steps_per_second": 3.855, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2392 + }, + { + "epoch": 93.0, + "eval_accuracy": 0.8938885499288184, + "eval_auc": 0.9168139566838005, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6482386650935446, + "eval_f1_macro": 0.7928796235385993, + "eval_loss": 0.27935075759887695, + "eval_pr_auc": 0.6350396647674293, + "eval_precision": 0.6790254237288136, + "eval_precision_macro": 0.8045281549625298, + "eval_pred_class_0": 16836, + "eval_pred_class_1": 2832, + "eval_predicted_binding_ratio": 0.14399023794996949, + "eval_recall": 0.6201225411157691, + "eval_recall_macro": 0.7826272149050808, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.307, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2418 + }, + { + "epoch": 94.0, + "eval_accuracy": 0.8941936139922717, + "eval_auc": 0.916949978089225, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6492499578628013, + "eval_f1_macro": 0.7934750822155368, + "eval_loss": 0.27906060218811035, + "eval_pr_auc": 0.6354743744677446, + "eval_precision": 0.6800847457627118, + "eval_precision_macro": 0.8051469107763429, + "eval_pred_class_0": 16836, + "eval_pred_class_1": 2832, + "eval_predicted_binding_ratio": 0.14399023794996949, + "eval_recall": 0.6210899709771042, + "eval_recall_macro": 0.7832014712735463, + "eval_runtime": 0.255, + "eval_samples_per_second": 639.231, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2444 + }, + { + "epoch": 95.0, + "eval_accuracy": 0.894498678055725, + "eval_auc": 0.9171692902207247, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6496707749451291, + "eval_f1_macro": 0.7937845988573549, + "eval_loss": 0.2788851261138916, + "eval_pr_auc": 0.6362118552671664, + "eval_precision": 0.6817859673990078, + "eval_precision_macro": 0.8059588747122072, + "eval_pred_class_0": 16846, + "eval_pred_class_1": 2822, + "eval_predicted_binding_ratio": 0.14348179784421394, + "eval_recall": 0.6204450177362141, + "eval_recall_macro": 0.7831204384872295, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.948, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2470 + }, + { + "epoch": 96.0, + "eval_accuracy": 0.8946003660768761, + "eval_auc": 0.9172832383185145, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.64977192093259, + "eval_f1_macro": 0.7938688135051675, + "eval_loss": 0.27873092889785767, + "eval_pr_auc": 0.6365172336600542, + "eval_precision": 0.6823988644428672, + "eval_precision_macro": 0.8062439426071903, + "eval_pred_class_0": 16850, + "eval_pred_class_1": 2818, + "eval_predicted_binding_ratio": 0.14327842180191175, + "eval_recall": 0.6201225411157691, + "eval_recall_macro": 0.783049741614805, + "eval_runtime": 0.2642, + "eval_samples_per_second": 617.058, + "eval_steps_per_second": 3.786, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2496 + }, + { + "epoch": 96.15384615384616, + "grad_norm": 12855.328125, + "learning_rate": 9.74310718484651e-07, + "loss": 0.268, + "step": 2500 + }, + { + "epoch": 97.0, + "eval_accuracy": 0.8948037421191783, + "eval_auc": 0.9174495472606937, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6506837751139625, + "eval_f1_macro": 0.7943808843546348, + "eval_loss": 0.27859047055244446, + "eval_pr_auc": 0.636938752781715, + "eval_precision": 0.6828490432317506, + "eval_precision_macro": 0.8065794545376371, + "eval_pred_class_0": 16846, + "eval_pred_class_1": 2822, + "eval_predicted_binding_ratio": 0.14348179784421394, + "eval_recall": 0.6214124475975492, + "eval_recall_macro": 0.783694694855695, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.744, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2522 + }, + { + "epoch": 98.0, + "eval_accuracy": 0.8951596501932072, + "eval_auc": 0.9175816853990344, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6521592442645074, + "eval_f1_macro": 0.7952187504847441, + "eval_loss": 0.2782803475856781, + "eval_pr_auc": 0.6372336473067074, + "eval_precision": 0.6837637071100107, + "eval_precision_macro": 0.8072045778587877, + "eval_pred_class_0": 16841, + "eval_pred_class_1": 2827, + "eval_predicted_binding_ratio": 0.14373601789709173, + "eval_recall": 0.6233473073202193, + "eval_recall_macro": 0.784692305196296, + "eval_runtime": 0.2192, + "eval_samples_per_second": 743.723, + "eval_steps_per_second": 4.563, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2548 + }, + { + "epoch": 99.0, + "eval_accuracy": 0.8950579621720561, + "eval_auc": 0.917689375499995, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6515867656988521, + "eval_f1_macro": 0.7949062764205981, + "eval_loss": 0.2782030701637268, + "eval_pr_auc": 0.6376582660543189, + "eval_precision": 0.683669854764435, + "eval_precision_macro": 0.8070768389286704, + "eval_pred_class_0": 16845, + "eval_pred_class_1": 2823, + "eval_predicted_binding_ratio": 0.1435326418547895, + "eval_recall": 0.6223798774588842, + "eval_recall_macro": 0.7842387707448946, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.364, + "eval_steps_per_second": 3.935, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2574 + }, + { + "epoch": 100.0, + "eval_accuracy": 0.8950579621720561, + "eval_auc": 0.917862049496492, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6513513513513514, + "eval_f1_macro": 0.7947922665303561, + "eval_loss": 0.2779688835144043, + "eval_pr_auc": 0.6381115995039711, + "eval_precision": 0.6839304717985101, + "eval_precision_macro": 0.8071560484103832, + "eval_pred_class_0": 16849, + "eval_pred_class_1": 2819, + "eval_predicted_binding_ratio": 0.14332926581248728, + "eval_recall": 0.6217349242179941, + "eval_recall_macro": 0.7839766550829814, + "eval_runtime": 0.219, + "eval_samples_per_second": 744.395, + "eval_steps_per_second": 4.567, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2600 + }, + { + "epoch": 101.0, + "eval_accuracy": 0.8951088061826317, + "eval_auc": 0.9180189763096767, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6513435862768294, + "eval_f1_macro": 0.7948061179237165, + "eval_loss": 0.27778077125549316, + "eval_pr_auc": 0.6385730633658938, + "eval_precision": 0.6843039772727273, + "eval_precision_macro": 0.8073193278245905, + "eval_pred_class_0": 16852, + "eval_pred_class_1": 2816, + "eval_predicted_binding_ratio": 0.14317673378076062, + "eval_recall": 0.6214124475975492, + "eval_recall_macro": 0.783875777731291, + "eval_runtime": 0.2529, + "eval_samples_per_second": 644.591, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2626 + }, + { + "epoch": 102.0, + "eval_accuracy": 0.895413870246085, + "eval_auc": 0.9182058500221522, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6518869521069555, + "eval_f1_macro": 0.7951749356520059, + "eval_loss": 0.2776651084423065, + "eval_pr_auc": 0.6390385071928153, + "eval_precision": 0.6858974358974359, + "eval_precision_macro": 0.8081029290993704, + "eval_pred_class_0": 16860, + "eval_pred_class_1": 2808, + "eval_predicted_binding_ratio": 0.1427699816961562, + "eval_recall": 0.6210899709771042, + "eval_recall_macro": 0.7839258027759306, + "eval_runtime": 0.2557, + "eval_samples_per_second": 637.358, + "eval_steps_per_second": 3.91, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2652 + }, + { + "epoch": 103.0, + "eval_accuracy": 0.8952104942037828, + "eval_auc": 0.9183596527031714, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6527379949452401, + "eval_f1_macro": 0.7955166277830898, + "eval_loss": 0.2773846685886383, + "eval_pr_auc": 0.6394513455183966, + "eval_precision": 0.6834862385321101, + "eval_precision_macro": 0.8071702310636076, + "eval_pred_class_0": 16834, + "eval_pred_class_1": 2834, + "eval_predicted_binding_ratio": 0.1440919259711206, + "eval_recall": 0.6246372138019993, + "eval_recall_macro": 0.785246716999388, + "eval_runtime": 0.2264, + "eval_samples_per_second": 719.926, + "eval_steps_per_second": 4.417, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2678 + }, + { + "epoch": 104.0, + "eval_accuracy": 0.8953630262355095, + "eval_auc": 0.9185293680199856, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6535353535353535, + "eval_f1_macro": 0.7959556034654849, + "eval_loss": 0.2772791385650635, + "eval_pr_auc": 0.6398373166129732, + "eval_precision": 0.6836914406481155, + "eval_precision_macro": 0.8073814027769664, + "eval_pred_class_0": 16829, + "eval_pred_class_1": 2839, + "eval_predicted_binding_ratio": 0.14434614602399837, + "eval_recall": 0.6259271202837794, + "eval_recall_macro": 0.7858614897610121, + "eval_runtime": 0.2593, + "eval_samples_per_second": 628.649, + "eval_steps_per_second": 3.857, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2704 + }, + { + "epoch": 105.0, + "eval_accuracy": 0.8953630262355095, + "eval_auc": 0.9186085224340037, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6536519690339953, + "eval_f1_macro": 0.7960120658489734, + "eval_loss": 0.27707138657569885, + "eval_pr_auc": 0.6400560131071933, + "eval_precision": 0.6835621260119676, + "eval_precision_macro": 0.8073423632971826, + "eval_pred_class_0": 16827, + "eval_pred_class_1": 2841, + "eval_predicted_binding_ratio": 0.1444478340451495, + "eval_recall": 0.6262495969042244, + "eval_recall_macro": 0.7859925475919686, + "eval_runtime": 0.2509, + "eval_samples_per_second": 649.533, + "eval_steps_per_second": 3.985, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2730 + }, + { + "epoch": 106.0, + "eval_accuracy": 0.8953630262355095, + "eval_auc": 0.9187703544266627, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6535353535353535, + "eval_f1_macro": 0.7959556034654849, + "eval_loss": 0.27683117985725403, + "eval_pr_auc": 0.6407556071793965, + "eval_precision": 0.6836914406481155, + "eval_precision_macro": 0.8073814027769664, + "eval_pred_class_0": 16829, + "eval_pred_class_1": 2839, + "eval_predicted_binding_ratio": 0.14434614602399837, + "eval_recall": 0.6259271202837794, + "eval_recall_macro": 0.7858614897610121, + "eval_runtime": 0.3724, + "eval_samples_per_second": 437.667, + "eval_steps_per_second": 2.685, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2756 + }, + { + "epoch": 107.0, + "eval_accuracy": 0.8955664022778117, + "eval_auc": 0.918983612943811, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6543251430494783, + "eval_f1_macro": 0.7964085438550979, + "eval_loss": 0.2766495645046234, + "eval_pr_auc": 0.6413517959683596, + "eval_precision": 0.6842661034846885, + "eval_precision_macro": 0.8077537803332993, + "eval_pred_class_0": 16827, + "eval_pred_class_1": 2841, + "eval_predicted_binding_ratio": 0.1444478340451495, + "eval_recall": 0.6268945501451145, + "eval_recall_macro": 0.7863753851709456, + "eval_runtime": 0.2579, + "eval_samples_per_second": 631.95, + "eval_steps_per_second": 3.877, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2782 + }, + { + "epoch": 108.0, + "eval_accuracy": 0.8957189343095383, + "eval_auc": 0.9190842761805244, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.655583543240974, + "eval_f1_macro": 0.7970707027489733, + "eval_loss": 0.27643415331840515, + "eval_pr_auc": 0.6415079341486267, + "eval_precision": 0.6839523475823406, + "eval_precision_macro": 0.8078082185158045, + "eval_pred_class_0": 16814, + "eval_pred_class_1": 2854, + "eval_predicted_binding_ratio": 0.1451088061826317, + "eval_recall": 0.6294743631086747, + "eval_recall_macro": 0.7875143892563956, + "eval_runtime": 0.2441, + "eval_samples_per_second": 667.885, + "eval_steps_per_second": 4.097, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2808 + }, + { + "epoch": 109.0, + "eval_accuracy": 0.8958206223306895, + "eval_auc": 0.9192001707781057, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6554565327055658, + "eval_f1_macro": 0.7970445082288499, + "eval_loss": 0.2763550579547882, + "eval_pr_auc": 0.6419306315808602, + "eval_precision": 0.6848208011243851, + "eval_precision_macro": 0.8081695255176081, + "eval_pred_class_0": 16822, + "eval_pred_class_1": 2846, + "eval_predicted_binding_ratio": 0.14470205409802725, + "eval_recall": 0.6285069332473395, + "eval_recall_macro": 0.7871815767220581, + "eval_runtime": 0.2526, + "eval_samples_per_second": 645.285, + "eval_steps_per_second": 3.959, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2834 + }, + { + "epoch": 110.0, + "eval_accuracy": 0.8958206223306895, + "eval_auc": 0.9193565525713485, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.655919395465995, + "eval_f1_macro": 0.7972685860227431, + "eval_loss": 0.27626872062683105, + "eval_pr_auc": 0.6423732230660918, + "eval_precision": 0.684302733006307, + "eval_precision_macro": 0.8080131483516131, + "eval_pred_class_0": 16814, + "eval_pred_class_1": 2854, + "eval_predicted_binding_ratio": 0.1451088061826317, + "eval_recall": 0.6297968397291196, + "eval_recall_macro": 0.7877058080458841, + "eval_runtime": 0.2566, + "eval_samples_per_second": 635.337, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2860 + }, + { + "epoch": 111.0, + "eval_accuracy": 0.8959223103518406, + "eval_auc": 0.9193956188221624, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.656485987581809, + "eval_f1_macro": 0.7975781647169913, + "eval_loss": 0.2761881351470947, + "eval_pr_auc": 0.6423085789727141, + "eval_precision": 0.6843946815955213, + "eval_precision_macro": 0.8081402319339891, + "eval_pred_class_0": 16810, + "eval_pred_class_1": 2858, + "eval_predicted_binding_ratio": 0.1453121822249339, + "eval_recall": 0.6307642695904547, + "eval_recall_macro": 0.7881593424972857, + "eval_runtime": 0.2618, + "eval_samples_per_second": 622.648, + "eval_steps_per_second": 3.82, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2886 + }, + { + "epoch": 112.0, + "eval_accuracy": 0.8960239983729916, + "eval_auc": 0.9196281671522437, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6564757265244414, + "eval_f1_macro": 0.7976085010119736, + "eval_loss": 0.2759994864463806, + "eval_pr_auc": 0.6431408853405497, + "eval_precision": 0.685133239831697, + "eval_precision_macro": 0.808462195558094, + "eval_pred_class_0": 16816, + "eval_pred_class_1": 2852, + "eval_predicted_binding_ratio": 0.14500711816148057, + "eval_recall": 0.6301193163495646, + "eval_recall_macro": 0.7879575877939047, + "eval_runtime": 0.2423, + "eval_samples_per_second": 672.612, + "eval_steps_per_second": 4.126, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2912 + }, + { + "epoch": 113.0, + "eval_accuracy": 0.8960239983729916, + "eval_auc": 0.9197686265771928, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6561291407432319, + "eval_f1_macro": 0.797440712214738, + "eval_loss": 0.27580633759498596, + "eval_pr_auc": 0.6436439478836922, + "eval_precision": 0.685523541813071, + "eval_precision_macro": 0.8085803418255701, + "eval_pred_class_0": 16822, + "eval_pred_class_1": 2846, + "eval_predicted_binding_ratio": 0.14470205409802725, + "eval_recall": 0.6291518864882296, + "eval_recall_macro": 0.7875644143010352, + "eval_runtime": 0.2575, + "eval_samples_per_second": 632.943, + "eval_steps_per_second": 3.883, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2938 + }, + { + "epoch": 114.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9199272079152, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6573731944910984, + "eval_f1_macro": 0.7981312081136818, + "eval_loss": 0.27558717131614685, + "eval_pr_auc": 0.6441453864761489, + "eval_precision": 0.6859446196985629, + "eval_precision_macro": 0.8089550633431857, + "eval_pred_class_0": 16815, + "eval_pred_class_1": 2853, + "eval_predicted_binding_ratio": 0.14505796217205613, + "eval_recall": 0.6310867462108997, + "eval_recall_macro": 0.7885016636831041, + "eval_runtime": 0.259, + "eval_samples_per_second": 629.272, + "eval_steps_per_second": 3.861, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2964 + }, + { + "epoch": 115.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9200543727465736, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6571428571428571, + "eval_f1_macro": 0.7980197003020941, + "eval_loss": 0.2754935324192047, + "eval_pr_auc": 0.6445859889828064, + "eval_precision": 0.6862056862056862, + "eval_precision_macro": 0.8090342302245508, + "eval_pred_class_0": 16819, + "eval_pred_class_1": 2849, + "eval_predicted_binding_ratio": 0.1448545861297539, + "eval_recall": 0.6304417929700097, + "eval_recall_macro": 0.7882395480211912, + "eval_runtime": 0.2588, + "eval_samples_per_second": 629.903, + "eval_steps_per_second": 3.864, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2990 + }, + { + "epoch": 115.38461538461539, + "grad_norm": 13551.1435546875, + "learning_rate": 9.488660254357756e-07, + "loss": 0.2594, + "step": 3000 + }, + { + "epoch": 116.0, + "eval_accuracy": 0.8964815944681717, + "eval_auc": 0.9201159794649721, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6579301075268817, + "eval_f1_macro": 0.7984714041109127, + "eval_loss": 0.2753925323486328, + "eval_pr_auc": 0.6447630589609926, + "eval_precision": 0.6867765696246931, + "eval_precision_macro": 0.8094048157037065, + "eval_pred_class_0": 16817, + "eval_pred_class_1": 2851, + "eval_predicted_binding_ratio": 0.14495627415090503, + "eval_recall": 0.6314092228313447, + "eval_recall_macro": 0.7887534434311247, + "eval_runtime": 0.2556, + "eval_samples_per_second": 637.638, + "eval_steps_per_second": 3.912, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3016 + }, + { + "epoch": 117.0, + "eval_accuracy": 0.8964815944681717, + "eval_auc": 0.920259670079575, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6579301075268817, + "eval_f1_macro": 0.7984714041109127, + "eval_loss": 0.27515658736228943, + "eval_pr_auc": 0.6451647424161069, + "eval_precision": 0.6867765696246931, + "eval_precision_macro": 0.8094048157037065, + "eval_pred_class_0": 16817, + "eval_pred_class_1": 2851, + "eval_predicted_binding_ratio": 0.14495627415090503, + "eval_recall": 0.6314092228313447, + "eval_recall_macro": 0.7887534434311247, + "eval_runtime": 0.2523, + "eval_samples_per_second": 645.985, + "eval_steps_per_second": 3.963, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3042 + }, + { + "epoch": 118.0, + "eval_accuracy": 0.8963799064470206, + "eval_auc": 0.920327525062304, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6583975863224941, + "eval_f1_macro": 0.7986623832121911, + "eval_loss": 0.2750197649002075, + "eval_pr_auc": 0.6454382852682906, + "eval_precision": 0.6855148342059337, + "eval_precision_macro": 0.8089241730394068, + "eval_pred_class_0": 16803, + "eval_pred_class_1": 2865, + "eval_predicted_binding_ratio": 0.14566809029896277, + "eval_recall": 0.6333440825540149, + "eval_recall_macro": 0.7894794294583318, + "eval_runtime": 0.2545, + "eval_samples_per_second": 640.495, + "eval_steps_per_second": 3.929, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3068 + }, + { + "epoch": 119.0, + "eval_accuracy": 0.8963799064470206, + "eval_auc": 0.9205032745284716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6585120643431636, + "eval_f1_macro": 0.7987177919414212, + "eval_loss": 0.27491119503974915, + "eval_pr_auc": 0.6460050422984182, + "eval_precision": 0.6853854202999651, + "eval_precision_macro": 0.8088851986923312, + "eval_pred_class_0": 16801, + "eval_pred_class_1": 2867, + "eval_predicted_binding_ratio": 0.1457697783201139, + "eval_recall": 0.6336665591744598, + "eval_recall_macro": 0.7896104872892882, + "eval_runtime": 0.2295, + "eval_samples_per_second": 710.293, + "eval_steps_per_second": 4.358, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3094 + }, + { + "epoch": 120.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9205394599595942, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6587487453997992, + "eval_f1_macro": 0.7987969999557303, + "eval_loss": 0.2747833728790283, + "eval_pr_auc": 0.6459576422229408, + "eval_precision": 0.6843934654153633, + "eval_precision_macro": 0.8084881983738124, + "eval_pred_class_0": 16791, + "eval_pred_class_1": 2877, + "eval_predicted_binding_ratio": 0.14627821842586944, + "eval_recall": 0.63495646565624, + "eval_recall_macro": 0.7900743576545822, + "eval_runtime": 0.2633, + "eval_samples_per_second": 619.079, + "eval_steps_per_second": 3.798, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3120 + }, + { + "epoch": 121.0, + "eval_accuracy": 0.8966341264998983, + "eval_auc": 0.9206969317927203, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6591785414920369, + "eval_f1_macro": 0.7991286912009045, + "eval_loss": 0.2746541202068329, + "eval_pr_auc": 0.6466379382676535, + "eval_precision": 0.6864525139664804, + "eval_precision_macro": 0.8094545359644352, + "eval_pred_class_0": 16804, + "eval_pred_class_1": 2864, + "eval_predicted_binding_ratio": 0.14561724628838724, + "eval_recall": 0.6339890357949048, + "eval_recall_macro": 0.7898924475165747, + "eval_runtime": 0.2553, + "eval_samples_per_second": 638.354, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3146 + }, + { + "epoch": 122.0, + "eval_accuracy": 0.8966341264998983, + "eval_auc": 0.9208301697034432, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6592927769398358, + "eval_f1_macro": 0.7991839832435101, + "eval_loss": 0.2745382785797119, + "eval_pr_auc": 0.6470920919458031, + "eval_precision": 0.6863224005582693, + "eval_precision_macro": 0.809415217658018, + "eval_pred_class_0": 16802, + "eval_pred_class_1": 2866, + "eval_predicted_binding_ratio": 0.14571893430953833, + "eval_recall": 0.6343115124153499, + "eval_recall_macro": 0.7900235053475313, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.372, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3172 + }, + { + "epoch": 123.0, + "eval_accuracy": 0.8967358145210494, + "eval_auc": 0.9209352222971863, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6597420003350645, + "eval_f1_macro": 0.7994367387715422, + "eval_loss": 0.2744734585285187, + "eval_pr_auc": 0.6474021950727136, + "eval_precision": 0.6865411436541143, + "eval_precision_macro": 0.809580095636581, + "eval_pred_class_0": 16800, + "eval_pred_class_1": 2868, + "eval_predicted_binding_ratio": 0.14582062233068943, + "eval_recall": 0.63495646565624, + "eval_recall_macro": 0.7903459819679763, + "eval_runtime": 0.2539, + "eval_samples_per_second": 641.926, + "eval_steps_per_second": 3.938, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3198 + }, + { + "epoch": 124.0, + "eval_accuracy": 0.896786658531625, + "eval_auc": 0.9210344159265571, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6599664991624791, + "eval_f1_macro": 0.7995630613656908, + "eval_loss": 0.2742863893508911, + "eval_pr_auc": 0.6479185906925482, + "eval_precision": 0.6866504008365284, + "eval_precision_macro": 0.8096624823993346, + "eval_pred_class_0": 16799, + "eval_pred_class_1": 2869, + "eval_predicted_binding_ratio": 0.145871466341265, + "eval_recall": 0.6352789422766849, + "eval_recall_macro": 0.7905072202781989, + "eval_runtime": 0.2336, + "eval_samples_per_second": 697.863, + "eval_steps_per_second": 4.281, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3224 + }, + { + "epoch": 125.0, + "eval_accuracy": 0.896888346552776, + "eval_auc": 0.9211382130279347, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6603015075376885, + "eval_f1_macro": 0.7997605361820792, + "eval_loss": 0.27425193786621094, + "eval_pr_auc": 0.6481518613470144, + "eval_precision": 0.6869989543394911, + "eval_precision_macro": 0.8098665228272252, + "eval_pred_class_0": 16799, + "eval_pred_class_1": 2869, + "eval_predicted_binding_ratio": 0.145871466341265, + "eval_recall": 0.63560141889713, + "eval_recall_macro": 0.7906986390676873, + "eval_runtime": 0.2655, + "eval_samples_per_second": 613.835, + "eval_steps_per_second": 3.766, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3250 + }, + { + "epoch": 126.0, + "eval_accuracy": 0.896888346552776, + "eval_auc": 0.9212811737051158, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6598456893659845, + "eval_f1_macro": 0.7995399118610351, + "eval_loss": 0.2741680145263672, + "eval_pr_auc": 0.6487724280436702, + "eval_precision": 0.6875218455085634, + "eval_precision_macro": 0.8100249793973471, + "eval_pred_class_0": 16807, + "eval_pred_class_1": 2861, + "eval_predicted_binding_ratio": 0.14546471425666058, + "eval_recall": 0.6343115124153499, + "eval_recall_macro": 0.7901744077438613, + "eval_runtime": 0.2775, + "eval_samples_per_second": 587.468, + "eval_steps_per_second": 3.604, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3276 + }, + { + "epoch": 127.0, + "eval_accuracy": 0.8966849705104739, + "eval_auc": 0.9212962201485035, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6608811748998665, + "eval_f1_macro": 0.7999703379297798, + "eval_loss": 0.2739817500114441, + "eval_pr_auc": 0.648761125236648, + "eval_precision": 0.6848841231407817, + "eval_precision_macro": 0.809033228048307, + "eval_pred_class_0": 16777, + "eval_pred_class_1": 2891, + "eval_predicted_binding_ratio": 0.1469900345739272, + "eval_recall": 0.6385037084811351, + "eval_recall_macro": 0.7917574376292318, + "eval_runtime": 0.2571, + "eval_samples_per_second": 634.003, + "eval_steps_per_second": 3.89, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3302 + }, + { + "epoch": 128.0, + "eval_accuracy": 0.8971425666056538, + "eval_auc": 0.9213870048987755, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6608549874266555, + "eval_f1_macro": 0.8001167448595325, + "eval_loss": 0.2739529609680176, + "eval_pr_auc": 0.6489216955933252, + "eval_precision": 0.6881983240223464, + "eval_precision_macro": 0.8104762150937725, + "eval_pred_class_0": 16804, + "eval_pred_class_1": 2864, + "eval_predicted_binding_ratio": 0.14561724628838724, + "eval_recall": 0.63560141889713, + "eval_recall_macro": 0.7908495414640173, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.828, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3328 + }, + { + "epoch": 129.0, + "eval_accuracy": 0.8970408785845028, + "eval_auc": 0.9215601460552225, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6604058359885964, + "eval_f1_macro": 0.7998640212813866, + "eval_loss": 0.27383002638816833, + "eval_pr_auc": 0.6495277773578755, + "eval_precision": 0.6879804332634522, + "eval_precision_macro": 0.8103117684584547, + "eval_pred_class_0": 16806, + "eval_pred_class_1": 2862, + "eval_predicted_binding_ratio": 0.1455155582672361, + "eval_recall": 0.63495646565624, + "eval_recall_macro": 0.7905270648435724, + "eval_runtime": 0.2665, + "eval_samples_per_second": 611.602, + "eval_steps_per_second": 3.752, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3354 + }, + { + "epoch": 130.0, + "eval_accuracy": 0.8969900345739272, + "eval_auc": 0.9216559138449605, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6610906657745065, + "eval_f1_macro": 0.8001778048579948, + "eval_loss": 0.27363157272338867, + "eval_pr_auc": 0.6497825902519792, + "eval_precision": 0.6868265554396942, + "eval_precision_macro": 0.8099131883862756, + "eval_pred_class_0": 16791, + "eval_pred_class_1": 2877, + "eval_predicted_binding_ratio": 0.14627821842586944, + "eval_recall": 0.6372138019993551, + "eval_recall_macro": 0.7914142891810019, + "eval_runtime": 0.2147, + "eval_samples_per_second": 759.086, + "eval_steps_per_second": 4.657, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3380 + }, + { + "epoch": 131.0, + "eval_accuracy": 0.8970917225950783, + "eval_auc": 0.9217394576160085, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6619906479625919, + "eval_f1_macro": 0.800648676506185, + "eval_loss": 0.27347350120544434, + "eval_pr_auc": 0.6501280761818352, + "eval_precision": 0.686525805334257, + "eval_precision_macro": 0.8099216238398834, + "eval_pred_class_0": 16781, + "eval_pred_class_1": 2887, + "eval_predicted_binding_ratio": 0.14678665853162498, + "eval_recall": 0.6391486617220251, + "eval_recall_macro": 0.7922609971252728, + "eval_runtime": 0.2669, + "eval_samples_per_second": 610.642, + "eval_steps_per_second": 3.746, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3406 + }, + { + "epoch": 132.0, + "eval_accuracy": 0.8970917225950783, + "eval_auc": 0.9218276924515536, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6609715242881072, + "eval_f1_macro": 0.8001554858148563, + "eval_loss": 0.2734222412109375, + "eval_pr_auc": 0.6505561229387223, + "eval_precision": 0.6876960613454165, + "eval_precision_macro": 0.8102746036830064, + "eval_pred_class_0": 16799, + "eval_pred_class_1": 2869, + "eval_predicted_binding_ratio": 0.145871466341265, + "eval_recall": 0.63624637213802, + "eval_recall_macro": 0.7910814766466644, + "eval_runtime": 0.2569, + "eval_samples_per_second": 634.527, + "eval_steps_per_second": 3.893, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3432 + }, + { + "epoch": 133.0, + "eval_accuracy": 0.8976001627008339, + "eval_auc": 0.9219583415175538, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6620805369127517, + "eval_f1_macro": 0.8008688878235859, + "eval_loss": 0.273334801197052, + "eval_pr_auc": 0.6510448354697362, + "eval_precision": 0.6901014340678558, + "eval_precision_macro": 0.8114972635268781, + "eval_pred_class_0": 16809, + "eval_pred_class_1": 2859, + "eval_predicted_binding_ratio": 0.14536302623550945, + "eval_recall": 0.63624637213802, + "eval_recall_macro": 0.7913832814393245, + "eval_runtime": 0.2542, + "eval_samples_per_second": 641.276, + "eval_steps_per_second": 3.934, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3458 + }, + { + "epoch": 134.0, + "eval_accuracy": 0.8976001627008339, + "eval_auc": 0.92201595791138, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6632107023411371, + "eval_f1_macro": 0.8014158800109571, + "eval_loss": 0.2731546461582184, + "eval_pr_auc": 0.6511084632800272, + "eval_precision": 0.6887808266759291, + "eval_precision_macro": 0.8110948031169865, + "eval_pred_class_0": 16789, + "eval_pred_class_1": 2879, + "eval_predicted_binding_ratio": 0.14637990644702054, + "eval_recall": 0.6394711383424702, + "eval_recall_macro": 0.7926938597488895, + "eval_runtime": 0.2551, + "eval_samples_per_second": 638.927, + "eval_steps_per_second": 3.92, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3484 + }, + { + "epoch": 134.6153846153846, + "grad_norm": 16295.5498046875, + "learning_rate": 9.153428025759045e-07, + "loss": 0.2515, + "step": 3500 + }, + { + "epoch": 135.0, + "eval_accuracy": 0.8977526947325605, + "eval_auc": 0.9221653809678686, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.663543583737661, + "eval_f1_macro": 0.8016300010477627, + "eval_loss": 0.2730526030063629, + "eval_pr_auc": 0.6517474669432921, + "eval_precision": 0.6894993045897079, + "eval_precision_macro": 0.8114599905511665, + "eval_pred_class_0": 16792, + "eval_pred_class_1": 2876, + "eval_predicted_binding_ratio": 0.14622737441529388, + "eval_recall": 0.6394711383424702, + "eval_recall_macro": 0.7927844011866875, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.318, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3510 + }, + { + "epoch": 136.0, + "eval_accuracy": 0.8976510067114094, + "eval_auc": 0.9222144814251072, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6634342083263668, + "eval_f1_macro": 0.8015417181640828, + "eval_loss": 0.27299538254737854, + "eval_pr_auc": 0.6516584418617962, + "eval_precision": 0.6888888888888889, + "eval_precision_macro": 0.8111766341037249, + "eval_pred_class_0": 16788, + "eval_pred_class_1": 2880, + "eval_predicted_binding_ratio": 0.1464307504575961, + "eval_recall": 0.6397936149629152, + "eval_recall_macro": 0.792855098059112, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.888, + "eval_steps_per_second": 3.901, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3536 + }, + { + "epoch": 137.0, + "eval_accuracy": 0.8978543827537117, + "eval_auc": 0.922406970789481, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6637656903765691, + "eval_f1_macro": 0.8017728364954996, + "eval_loss": 0.27294018864631653, + "eval_pr_auc": 0.6525345183514315, + "eval_precision": 0.6899791231732777, + "eval_precision_macro": 0.8117038643138033, + "eval_pred_class_0": 16794, + "eval_pred_class_1": 2874, + "eval_predicted_binding_ratio": 0.14612568639414278, + "eval_recall": 0.6394711383424702, + "eval_recall_macro": 0.7928447621452195, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.531, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3562 + }, + { + "epoch": 138.0, + "eval_accuracy": 0.8979560707748627, + "eval_auc": 0.9224206449505158, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6643251379829402, + "eval_f1_macro": 0.8020789283763069, + "eval_loss": 0.272890567779541, + "eval_pr_auc": 0.652455646685698, + "eval_precision": 0.6900625434329395, + "eval_precision_macro": 0.8118269834496443, + "eval_pred_class_0": 16790, + "eval_pred_class_1": 2878, + "eval_predicted_binding_ratio": 0.14632906243644497, + "eval_recall": 0.6404385682038052, + "eval_recall_macro": 0.793298296596621, + "eval_runtime": 0.2465, + "eval_samples_per_second": 661.298, + "eval_steps_per_second": 4.057, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3588 + }, + { + "epoch": 139.0, + "eval_accuracy": 0.8980069147854383, + "eval_auc": 0.9225580970332872, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6647727272727273, + "eval_f1_macro": 0.802313204605421, + "eval_loss": 0.2726689577102661, + "eval_pr_auc": 0.6528952820360587, + "eval_precision": 0.6899063475546305, + "eval_precision_macro": 0.8118283599554506, + "eval_pred_class_0": 16785, + "eval_pred_class_1": 2883, + "eval_predicted_binding_ratio": 0.14658328248932276, + "eval_recall": 0.6414059980651403, + "eval_recall_macro": 0.7937216505687565, + "eval_runtime": 0.1981, + "eval_samples_per_second": 822.798, + "eval_steps_per_second": 5.048, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3614 + }, + { + "epoch": 140.0, + "eval_accuracy": 0.898159446817165, + "eval_auc": 0.9226611836622408, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6642078792958928, + "eval_f1_macro": 0.8020928521767887, + "eval_loss": 0.2726409435272217, + "eval_pr_auc": 0.6533512747607447, + "eval_precision": 0.6916899441340782, + "eval_precision_macro": 0.8125195733524473, + "eval_pred_class_0": 16804, + "eval_pred_class_1": 2864, + "eval_predicted_binding_ratio": 0.14561724628838724, + "eval_recall": 0.6388261851015802, + "eval_recall_macro": 0.7927637293589026, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.759, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3640 + }, + { + "epoch": 141.0, + "eval_accuracy": 0.8983119788488916, + "eval_auc": 0.9227806014244446, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.666110183639399, + "eval_f1_macro": 0.8030664874893451, + "eval_loss": 0.2725253105163574, + "eval_pr_auc": 0.6539162157851042, + "eval_precision": 0.6905503634475597, + "eval_precision_macro": 0.8123173177271173, + "eval_pred_class_0": 16779, + "eval_pred_class_1": 2889, + "eval_predicted_binding_ratio": 0.14688834655277608, + "eval_recall": 0.6433408577878104, + "eval_recall_macro": 0.7946890804300916, + "eval_runtime": 0.2669, + "eval_samples_per_second": 610.673, + "eval_steps_per_second": 3.746, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3666 + }, + { + "epoch": 142.0, + "eval_accuracy": 0.898159446817165, + "eval_auc": 0.9228251470721713, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6665556850341269, + "eval_f1_macro": 0.8032289361592369, + "eval_loss": 0.27245020866394043, + "eval_pr_auc": 0.6537875546315605, + "eval_precision": 0.6889194769442533, + "eval_precision_macro": 0.8116772542816959, + "eval_pred_class_0": 16762, + "eval_pred_class_1": 2906, + "eval_predicted_binding_ratio": 0.1477526947325605, + "eval_recall": 0.6455981941309256, + "eval_recall_macro": 0.795515943808989, + "eval_runtime": 0.2562, + "eval_samples_per_second": 636.157, + "eval_steps_per_second": 3.903, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3692 + }, + { + "epoch": 143.0, + "eval_accuracy": 0.8984645108806183, + "eval_auc": 0.9229446329618678, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6663324979114453, + "eval_f1_macro": 0.8032271166958205, + "eval_loss": 0.2723686695098877, + "eval_pr_auc": 0.6543603067705216, + "eval_precision": 0.6914008321775312, + "eval_precision_macro": 0.8127225800544472, + "eval_pred_class_0": 16784, + "eval_pred_class_1": 2884, + "eval_predicted_binding_ratio": 0.14663412649989832, + "eval_recall": 0.6430183811673653, + "eval_recall_macro": 0.7946485640369331, + "eval_runtime": 0.2531, + "eval_samples_per_second": 644.012, + "eval_steps_per_second": 3.951, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3718 + }, + { + "epoch": 144.0, + "eval_accuracy": 0.898159446817165, + "eval_auc": 0.9229539761608667, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6658882402001668, + "eval_f1_macro": 0.8029060288610683, + "eval_loss": 0.2724270820617676, + "eval_pr_auc": 0.6540924505284794, + "eval_precision": 0.6897028334485141, + "eval_precision_macro": 0.8119135366717949, + "eval_pred_class_0": 16774, + "eval_pred_class_1": 2894, + "eval_predicted_binding_ratio": 0.14714256660565386, + "eval_recall": 0.6436633344082554, + "eval_recall_macro": 0.7947295968232501, + "eval_runtime": 0.2578, + "eval_samples_per_second": 632.29, + "eval_steps_per_second": 3.879, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3744 + }, + { + "epoch": 145.0, + "eval_accuracy": 0.8983119788488916, + "eval_auc": 0.923075982767793, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6671105193075899, + "eval_f1_macro": 0.8035504588856721, + "eval_loss": 0.27218085527420044, + "eval_pr_auc": 0.6548951387334131, + "eval_precision": 0.6893704850361198, + "eval_precision_macro": 0.8119604647601695, + "eval_pred_class_0": 16761, + "eval_pred_class_1": 2907, + "eval_predicted_binding_ratio": 0.14780353874313606, + "eval_recall": 0.6462431473718155, + "eval_recall_macro": 0.7958686009087, + "eval_runtime": 0.2047, + "eval_samples_per_second": 796.353, + "eval_steps_per_second": 4.886, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3770 + }, + { + "epoch": 146.0, + "eval_accuracy": 0.8984645108806183, + "eval_auc": 0.9231917800403848, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6671111851975329, + "eval_f1_macro": 0.8036038872863508, + "eval_loss": 0.27211907505989075, + "eval_pr_auc": 0.6553363499797747, + "eval_precision": 0.6904761904761905, + "eval_precision_macro": 0.8124414345344577, + "eval_pred_class_0": 16770, + "eval_pred_class_1": 2898, + "eval_predicted_binding_ratio": 0.14734594264795606, + "eval_recall": 0.6452757175104805, + "eval_recall_macro": 0.7955659688536286, + "eval_runtime": 0.2587, + "eval_samples_per_second": 629.971, + "eval_steps_per_second": 3.865, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3796 + }, + { + "epoch": 147.0, + "eval_accuracy": 0.8985661989017694, + "eval_auc": 0.9232496689441817, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6676661669165417, + "eval_f1_macro": 0.8039077842052783, + "eval_loss": 0.2721000015735626, + "eval_pr_auc": 0.655340056976625, + "eval_precision": 0.6905582356995176, + "eval_precision_macro": 0.812564099359958, + "eval_pred_class_0": 16766, + "eval_pred_class_1": 2902, + "eval_predicted_binding_ratio": 0.14754931869025828, + "eval_recall": 0.6462431473718155, + "eval_recall_macro": 0.7960195033050301, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.767, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3822 + }, + { + "epoch": 148.0, + "eval_accuracy": 0.8985661989017694, + "eval_auc": 0.9233701767462686, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6673336668334167, + "eval_f1_macro": 0.8037469198020228, + "eval_loss": 0.2719952464103699, + "eval_pr_auc": 0.6558934902075464, + "eval_precision": 0.6909530386740331, + "eval_precision_macro": 0.8126837695158862, + "eval_pred_class_0": 16772, + "eval_pred_class_1": 2896, + "eval_predicted_binding_ratio": 0.14724425462680496, + "eval_recall": 0.6452757175104805, + "eval_recall_macro": 0.7956263298121606, + "eval_runtime": 0.2562, + "eval_samples_per_second": 636.275, + "eval_steps_per_second": 3.904, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3848 + }, + { + "epoch": 149.0, + "eval_accuracy": 0.8983628228594671, + "eval_auc": 0.9234171749837325, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6676641729010806, + "eval_f1_macro": 0.8038359878940744, + "eval_loss": 0.27190613746643066, + "eval_pr_auc": 0.6558672612955618, + "eval_precision": 0.6890871654083733, + "eval_precision_macro": 0.8119245066626444, + "eval_pred_class_0": 16754, + "eval_pred_class_1": 2914, + "eval_predicted_binding_ratio": 0.14815944681716495, + "eval_recall": 0.6475330538535956, + "eval_recall_macro": 0.796423012711792, + "eval_runtime": 0.1964, + "eval_samples_per_second": 830.09, + "eval_steps_per_second": 5.093, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3874 + }, + { + "epoch": 150.0, + "eval_accuracy": 0.8986678869229204, + "eval_auc": 0.9235316972989609, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6682204095222241, + "eval_f1_macro": 0.8042113179058208, + "eval_loss": 0.2718164622783661, + "eval_pr_auc": 0.6562427373314145, + "eval_precision": 0.6906400550584997, + "eval_precision_macro": 0.8126866902186664, + "eval_pred_class_0": 16762, + "eval_pred_class_1": 2906, + "eval_predicted_binding_ratio": 0.1477526947325605, + "eval_recall": 0.6472105772331506, + "eval_recall_macro": 0.7964730377564316, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.207, + "eval_steps_per_second": 3.842, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3900 + }, + { + "epoch": 151.0, + "eval_accuracy": 0.8984645108806183, + "eval_auc": 0.923607970893288, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.66744379683597, + "eval_f1_macro": 0.8037648014211952, + "eval_loss": 0.27192452549934387, + "eval_pr_auc": 0.6563118529429329, + "eval_precision": 0.6900826446280992, + "eval_precision_macro": 0.8123224008156005, + "eval_pred_class_0": 16764, + "eval_pred_class_1": 2904, + "eval_predicted_binding_ratio": 0.1476510067114094, + "eval_recall": 0.6462431473718155, + "eval_recall_macro": 0.7959591423464981, + "eval_runtime": 0.1771, + "eval_samples_per_second": 920.181, + "eval_steps_per_second": 5.645, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3926 + }, + { + "epoch": 152.0, + "eval_accuracy": 0.8986170429123449, + "eval_auc": 0.9236869111923289, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6687707641196013, + "eval_f1_macro": 0.8044598207679289, + "eval_loss": 0.2716231048107147, + "eval_pr_auc": 0.6568319718650024, + "eval_precision": 0.6896197327852004, + "eval_precision_macro": 0.812330315374629, + "eval_pred_class_0": 16749, + "eval_pred_class_1": 2919, + "eval_predicted_binding_ratio": 0.1484136668700427, + "eval_recall": 0.6491454369558207, + "eval_recall_macro": 0.7972292042629046, + "eval_runtime": 0.241, + "eval_samples_per_second": 676.369, + "eval_steps_per_second": 4.15, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3952 + }, + { + "epoch": 153.0, + "eval_accuracy": 0.8986678869229204, + "eval_auc": 0.9238590985638783, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6671120761650242, + "eval_f1_macro": 0.8036750821318089, + "eval_loss": 0.2716236114501953, + "eval_pr_auc": 0.657752547087354, + "eval_precision": 0.691961191961192, + "eval_precision_macro": 0.8130882112827054, + "eval_pred_class_0": 16782, + "eval_pred_class_1": 2886, + "eval_predicted_binding_ratio": 0.14673581452104942, + "eval_recall": 0.6439858110287005, + "eval_recall_macro": 0.7951624594468667, + "eval_runtime": 0.2469, + "eval_samples_per_second": 660.315, + "eval_steps_per_second": 4.051, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3978 + }, + { + "epoch": 153.84615384615384, + "grad_norm": 13863.017578125, + "learning_rate": 8.743443888522679e-07, + "loss": 0.244, + "step": 4000 + }, + { + "epoch": 154.0, + "eval_accuracy": 0.8986170429123449, + "eval_auc": 0.9239073328287097, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6674449633088726, + "eval_f1_macro": 0.8038184624582756, + "eval_loss": 0.27161940932273865, + "eval_pr_auc": 0.6577434889769634, + "eval_precision": 0.6911917098445596, + "eval_precision_macro": 0.8128050601926549, + "eval_pred_class_0": 16773, + "eval_pred_class_1": 2895, + "eval_predicted_binding_ratio": 0.1471934106162294, + "eval_recall": 0.6452757175104805, + "eval_recall_macro": 0.7956565102914266, + "eval_runtime": 0.2311, + "eval_samples_per_second": 705.281, + "eval_steps_per_second": 4.327, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4004 + }, + { + "epoch": 155.0, + "eval_accuracy": 0.8983119788488916, + "eval_auc": 0.9240325316952941, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6666666666666666, + "eval_f1_macro": 0.8033357331413487, + "eval_loss": 0.2715211510658264, + "eval_pr_auc": 0.6581299979478261, + "eval_precision": 0.689893066574681, + "eval_precision_macro": 0.812118099868532, + "eval_pred_class_0": 16769, + "eval_pred_class_1": 2899, + "eval_predicted_binding_ratio": 0.14739678665853162, + "eval_recall": 0.6449532408900355, + "eval_recall_macro": 0.7953443695848741, + "eval_runtime": 0.2265, + "eval_samples_per_second": 719.735, + "eval_steps_per_second": 4.416, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4030 + }, + { + "epoch": 156.0, + "eval_accuracy": 0.8987187309334961, + "eval_auc": 0.9240861966945435, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6688829787234043, + "eval_f1_macro": 0.8045495325789891, + "eval_loss": 0.27123013138771057, + "eval_pr_auc": 0.6585643355556502, + "eval_precision": 0.6902229845626072, + "eval_precision_macro": 0.8126098507842583, + "eval_pred_class_0": 16753, + "eval_pred_class_1": 2915, + "eval_predicted_binding_ratio": 0.14821029082774048, + "eval_recall": 0.6488229603353757, + "eval_recall_macro": 0.7971585073904801, + "eval_runtime": 0.2445, + "eval_samples_per_second": 666.601, + "eval_steps_per_second": 4.09, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4056 + }, + { + "epoch": 157.0, + "eval_accuracy": 0.8987695749440716, + "eval_auc": 0.9242226073999265, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.668111351891982, + "eval_f1_macro": 0.8041939607346642, + "eval_loss": 0.2712385952472687, + "eval_pr_auc": 0.6591839305732792, + "eval_precision": 0.6915113871635611, + "eval_precision_macro": 0.8130484783164258, + "eval_pred_class_0": 16770, + "eval_pred_class_1": 2898, + "eval_predicted_binding_ratio": 0.14734594264795606, + "eval_recall": 0.6462431473718155, + "eval_recall_macro": 0.7961402252220942, + "eval_runtime": 0.2571, + "eval_samples_per_second": 634.046, + "eval_steps_per_second": 3.89, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4082 + }, + { + "epoch": 158.0, + "eval_accuracy": 0.8989729509863738, + "eval_auc": 0.9241500613527003, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6699883740242485, + "eval_f1_macro": 0.8051727852411502, + "eval_loss": 0.27118799090385437, + "eval_pr_auc": 0.6587023192472763, + "eval_precision": 0.6907534246575342, + "eval_precision_macro": 0.8130146392454138, + "eval_pred_class_0": 16748, + "eval_pred_class_1": 2920, + "eval_predicted_binding_ratio": 0.14846451088061827, + "eval_recall": 0.6504353434376008, + "eval_recall_macro": 0.7979646989415927, + "eval_runtime": 0.2165, + "eval_samples_per_second": 752.927, + "eval_steps_per_second": 4.619, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4108 + }, + { + "epoch": 159.0, + "eval_accuracy": 0.8990237949969494, + "eval_auc": 0.9243365652302152, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6693306693306693, + "eval_f1_macro": 0.8048723553674049, + "eval_loss": 0.2712218463420868, + "eval_pr_auc": 0.6594296109425748, + "eval_precision": 0.6919104991394148, + "eval_precision_macro": 0.8134133417966358, + "eval_pred_class_0": 16763, + "eval_pred_class_1": 2905, + "eval_predicted_binding_ratio": 0.14770185072198494, + "eval_recall": 0.6481780070944857, + "eval_recall_macro": 0.7970774746041632, + "eval_runtime": 0.256, + "eval_samples_per_second": 636.627, + "eval_steps_per_second": 3.906, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4134 + }, + { + "epoch": 160.0, + "eval_accuracy": 0.8990746390075249, + "eval_auc": 0.9243664342695147, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6709762970329852, + "eval_f1_macro": 0.805686028587357, + "eval_loss": 0.2712063789367676, + "eval_pr_auc": 0.6596905428752633, + "eval_precision": 0.6903137789904502, + "eval_precision_macro": 0.8129807422676916, + "eval_pred_class_0": 16736, + "eval_pred_class_1": 2932, + "eval_predicted_binding_ratio": 0.1490746390075249, + "eval_recall": 0.6526926797807159, + "eval_recall_macro": 0.7989424647168202, + "eval_runtime": 0.1787, + "eval_samples_per_second": 912.327, + "eval_steps_per_second": 5.597, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4160 + }, + { + "epoch": 161.0, + "eval_accuracy": 0.8989729509863738, + "eval_auc": 0.9244822704721024, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6700979578283247, + "eval_f1_macro": 0.8052257867669526, + "eval_loss": 0.2710643708705902, + "eval_pr_auc": 0.6603246471492675, + "eval_precision": 0.6906228610540726, + "eval_precision_macro": 0.8129753502690642, + "eval_pred_class_0": 16746, + "eval_pred_class_1": 2922, + "eval_predicted_binding_ratio": 0.14856619890176936, + "eval_recall": 0.6507578200580458, + "eval_recall_macro": 0.7980957567725492, + "eval_runtime": 0.2165, + "eval_samples_per_second": 752.853, + "eval_steps_per_second": 4.619, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4186 + }, + { + "epoch": 162.0, + "eval_accuracy": 0.8989729509863738, + "eval_auc": 0.9245231275027241, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6700979578283247, + "eval_f1_macro": 0.8052257867669526, + "eval_loss": 0.2710554301738739, + "eval_pr_auc": 0.6603878428843051, + "eval_precision": 0.6906228610540726, + "eval_precision_macro": 0.8129753502690642, + "eval_pred_class_0": 16746, + "eval_pred_class_1": 2922, + "eval_predicted_binding_ratio": 0.14856619890176936, + "eval_recall": 0.6507578200580458, + "eval_recall_macro": 0.7980957567725492, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.523, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4212 + }, + { + "epoch": 163.0, + "eval_accuracy": 0.8993288590604027, + "eval_auc": 0.9246038585815736, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6719681908548708, + "eval_f1_macro": 0.8062543656977057, + "eval_loss": 0.27094030380249023, + "eval_pr_auc": 0.6606722300563197, + "eval_precision": 0.6909710391822828, + "eval_precision_macro": 0.8134231279100321, + "eval_pred_class_0": 16733, + "eval_pred_class_1": 2935, + "eval_predicted_binding_ratio": 0.14922717103925157, + "eval_recall": 0.653982586262496, + "eval_recall_macro": 0.7996175984369762, + "eval_runtime": 0.2218, + "eval_samples_per_second": 735.026, + "eval_steps_per_second": 4.509, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4238 + }, + { + "epoch": 164.0, + "eval_accuracy": 0.8992271710392515, + "eval_auc": 0.9247262642209572, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6713101160862355, + "eval_f1_macro": 0.8059006594362601, + "eval_loss": 0.2709755003452301, + "eval_pr_auc": 0.6610856256039915, + "eval_precision": 0.691020826220553, + "eval_precision_macro": 0.8133400325618567, + "eval_pred_class_0": 16739, + "eval_pred_class_1": 2929, + "eval_predicted_binding_ratio": 0.14892210697579825, + "eval_recall": 0.6526926797807159, + "eval_recall_macro": 0.7990330061546183, + "eval_runtime": 0.2493, + "eval_samples_per_second": 653.878, + "eval_steps_per_second": 4.012, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4264 + }, + { + "epoch": 165.0, + "eval_accuracy": 0.8998881431767338, + "eval_auc": 0.9247699826062725, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6738446248136492, + "eval_f1_macro": 0.8073568599908361, + "eval_loss": 0.2707850933074951, + "eval_pr_auc": 0.6613417671448518, + "eval_precision": 0.6927792915531336, + "eval_precision_macro": 0.8145046350187375, + "eval_pred_class_0": 16732, + "eval_pred_class_1": 2936, + "eval_predicted_binding_ratio": 0.14927801504982713, + "eval_recall": 0.6559174459851661, + "eval_recall_macro": 0.8007359306946413, + "eval_runtime": 0.1724, + "eval_samples_per_second": 945.745, + "eval_steps_per_second": 5.802, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4290 + }, + { + "epoch": 166.0, + "eval_accuracy": 0.9001423632296115, + "eval_auc": 0.9248060123174118, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6753719008264463, + "eval_f1_macro": 0.8081840577256068, + "eval_loss": 0.2708885669708252, + "eval_pr_auc": 0.6612557604235284, + "eval_precision": 0.6927772126144456, + "eval_precision_macro": 0.8147479579430862, + "eval_pred_class_0": 16719, + "eval_pred_class_1": 2949, + "eval_predicted_binding_ratio": 0.14993898718730933, + "eval_recall": 0.6588197355691713, + "eval_recall_macro": 0.8020663535695799, + "eval_runtime": 0.2656, + "eval_samples_per_second": 613.671, + "eval_steps_per_second": 3.765, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4316 + }, + { + "epoch": 167.0, + "eval_accuracy": 0.8999389871873094, + "eval_auc": 0.9248204358808662, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6741721854304635, + "eval_f1_macro": 0.8075329932438238, + "eval_loss": 0.270906925201416, + "eval_pr_auc": 0.6612342465257918, + "eval_precision": 0.692752636951344, + "eval_precision_macro": 0.8145453662370445, + "eval_pred_class_0": 16729, + "eval_pred_class_1": 2939, + "eval_predicted_binding_ratio": 0.1494305470815538, + "eval_recall": 0.6565623992260561, + "eval_recall_macro": 0.8010282268358203, + "eval_runtime": 0.2263, + "eval_samples_per_second": 720.252, + "eval_steps_per_second": 4.419, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4342 + }, + { + "epoch": 168.0, + "eval_accuracy": 0.900091519219036, + "eval_auc": 0.9249249239896699, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6750454770960806, + "eval_f1_macro": 0.8080084845902765, + "eval_loss": 0.2706829011440277, + "eval_pr_auc": 0.6618605064537387, + "eval_precision": 0.6928038017651053, + "eval_precision_macro": 0.8147071275300828, + "eval_pred_class_0": 16722, + "eval_pred_class_1": 2946, + "eval_predicted_binding_ratio": 0.14978645515558267, + "eval_recall": 0.6581747823282812, + "eval_recall_macro": 0.8017740574284009, + "eval_runtime": 0.2625, + "eval_samples_per_second": 620.89, + "eval_steps_per_second": 3.809, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4368 + }, + { + "epoch": 169.0, + "eval_accuracy": 0.8999898311978849, + "eval_auc": 0.9250021027063997, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6742838218248054, + "eval_f1_macro": 0.8076047153692607, + "eval_loss": 0.2705872058868408, + "eval_pr_auc": 0.6621173041985378, + "eval_precision": 0.6929884275017019, + "eval_precision_macro": 0.8146651641393745, + "eval_pred_class_0": 16730, + "eval_pred_class_1": 2938, + "eval_predicted_binding_ratio": 0.14937970307097823, + "eval_recall": 0.6565623992260561, + "eval_recall_macro": 0.8010584073150864, + "eval_runtime": 0.2622, + "eval_samples_per_second": 621.716, + "eval_steps_per_second": 3.814, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4394 + }, + { + "epoch": 170.0, + "eval_accuracy": 0.900091519219036, + "eval_auc": 0.9250495097088196, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6750454770960806, + "eval_f1_macro": 0.8080084845902765, + "eval_loss": 0.2706546485424042, + "eval_pr_auc": 0.6620136434657915, + "eval_precision": 0.6928038017651053, + "eval_precision_macro": 0.8147071275300828, + "eval_pred_class_0": 16722, + "eval_pred_class_1": 2946, + "eval_predicted_binding_ratio": 0.14978645515558267, + "eval_recall": 0.6581747823282812, + "eval_recall_macro": 0.8017740574284009, + "eval_runtime": 0.2696, + "eval_samples_per_second": 604.539, + "eval_steps_per_second": 3.709, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4420 + }, + { + "epoch": 171.0, + "eval_accuracy": 0.8999389871873094, + "eval_auc": 0.9250856659424456, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6748182419035029, + "eval_f1_macro": 0.8078453665953039, + "eval_loss": 0.27060601115226746, + "eval_pr_auc": 0.6621340116082275, + "eval_precision": 0.6919688241274145, + "eval_precision_macro": 0.814310068581025, + "eval_pred_class_0": 16717, + "eval_pred_class_1": 2951, + "eval_predicted_binding_ratio": 0.15004067520846046, + "eval_recall": 0.6584972589487262, + "eval_recall_macro": 0.8018145738215594, + "eval_runtime": 0.2527, + "eval_samples_per_second": 645.143, + "eval_steps_per_second": 3.958, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4446 + }, + { + "epoch": 172.0, + "eval_accuracy": 0.9000406752084604, + "eval_auc": 0.9252334636716082, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6741796486576069, + "eval_f1_macro": 0.8075720776469225, + "eval_loss": 0.27041611075401306, + "eval_pr_auc": 0.6630610344326174, + "eval_precision": 0.6934878963518581, + "eval_precision_macro": 0.8148646532849819, + "eval_pred_class_0": 16735, + "eval_pred_class_1": 2933, + "eval_predicted_binding_ratio": 0.14912548301810047, + "eval_recall": 0.6559174459851661, + "eval_recall_macro": 0.8008264721324394, + "eval_runtime": 0.2332, + "eval_samples_per_second": 698.832, + "eval_steps_per_second": 4.287, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4472 + }, + { + "epoch": 173.0, + "eval_accuracy": 0.8998372991661582, + "eval_auc": 0.9253198298673536, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6740569159497022, + "eval_f1_macro": 0.8074417704823604, + "eval_loss": 0.2705075442790985, + "eval_pr_auc": 0.662984991174244, + "eval_precision": 0.6921508664627931, + "eval_precision_macro": 0.8142667635751932, + "eval_pred_class_0": 16725, + "eval_pred_class_1": 2943, + "eval_predicted_binding_ratio": 0.149633923123856, + "eval_recall": 0.6568848758465011, + "eval_recall_macro": 0.8010989237082449, + "eval_runtime": 0.2134, + "eval_samples_per_second": 763.793, + "eval_steps_per_second": 4.686, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4498 + }, + { + "epoch": 173.07692307692307, + "grad_norm": 15784.1748046875, + "learning_rate": 8.266086590174684e-07, + "loss": 0.2376, + "step": 4500 + }, + { + "epoch": 174.0, + "eval_accuracy": 0.8999898311978849, + "eval_auc": 0.9254431016991443, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6740679370339685, + "eval_f1_macro": 0.8075003208787752, + "eval_loss": 0.2703414559364319, + "eval_pr_auc": 0.6637127837233647, + "eval_precision": 0.6932515337423313, + "eval_precision_macro": 0.8147445669189726, + "eval_pred_class_0": 16734, + "eval_pred_class_1": 2934, + "eval_predicted_binding_ratio": 0.14917632702867603, + "eval_recall": 0.6559174459851661, + "eval_recall_macro": 0.8007962916531735, + "eval_runtime": 0.1979, + "eval_samples_per_second": 823.53, + "eval_steps_per_second": 5.052, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4524 + }, + { + "epoch": 175.0, + "eval_accuracy": 0.9001423632296115, + "eval_auc": 0.9255452150782025, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6746189529489728, + "eval_f1_macro": 0.8078199869849969, + "eval_loss": 0.27038928866386414, + "eval_pr_auc": 0.6639609479782242, + "eval_precision": 0.6936967632027258, + "eval_precision_macro": 0.8150250385068789, + "eval_pred_class_0": 16733, + "eval_pred_class_1": 2935, + "eval_predicted_binding_ratio": 0.14922717103925157, + "eval_recall": 0.6565623992260561, + "eval_recall_macro": 0.8011489487528844, + "eval_runtime": 0.2587, + "eval_samples_per_second": 630.051, + "eval_steps_per_second": 3.865, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4550 + }, + { + "epoch": 176.0, + "eval_accuracy": 0.8999898311978849, + "eval_auc": 0.9255934298780361, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6742838218248054, + "eval_f1_macro": 0.8076047153692607, + "eval_loss": 0.2702932059764862, + "eval_pr_auc": 0.6640830183725597, + "eval_precision": 0.6929884275017019, + "eval_precision_macro": 0.8146651641393745, + "eval_pred_class_0": 16730, + "eval_pred_class_1": 2938, + "eval_predicted_binding_ratio": 0.14937970307097823, + "eval_recall": 0.6565623992260561, + "eval_recall_macro": 0.8010584073150864, + "eval_runtime": 0.1918, + "eval_samples_per_second": 849.843, + "eval_steps_per_second": 5.214, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4576 + }, + { + "epoch": 177.0, + "eval_accuracy": 0.8998881431767338, + "eval_auc": 0.9255964274877148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6745992397950752, + "eval_f1_macro": 0.8077217319600283, + "eval_loss": 0.27021023631095886, + "eval_pr_auc": 0.6640237853140233, + "eval_precision": 0.691864406779661, + "eval_precision_macro": 0.8142298466485935, + "eval_pred_class_0": 16718, + "eval_pred_class_1": 2950, + "eval_predicted_binding_ratio": 0.1499898311978849, + "eval_recall": 0.6581747823282812, + "eval_recall_macro": 0.8016533355113369, + "eval_runtime": 0.2437, + "eval_samples_per_second": 668.962, + "eval_steps_per_second": 4.104, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4602 + }, + { + "epoch": 178.0, + "eval_accuracy": 0.900549115314216, + "eval_auc": 0.9257395146873824, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6756218905472637, + "eval_f1_macro": 0.8084468667292255, + "eval_loss": 0.27011793851852417, + "eval_pr_auc": 0.6647736112265655, + "eval_precision": 0.695459201092523, + "eval_precision_macro": 0.8159475347119822, + "eval_pred_class_0": 16739, + "eval_pred_class_1": 2929, + "eval_predicted_binding_ratio": 0.14892210697579825, + "eval_recall": 0.6568848758465011, + "eval_recall_macro": 0.801521450417969, + "eval_runtime": 0.2294, + "eval_samples_per_second": 710.491, + "eval_steps_per_second": 4.359, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4628 + }, + { + "epoch": 179.0, + "eval_accuracy": 0.9003965832824893, + "eval_auc": 0.9258087711499611, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6755010766937221, + "eval_f1_macro": 0.8083352405901716, + "eval_loss": 0.2701837122440338, + "eval_pr_auc": 0.6649310182754444, + "eval_precision": 0.6944822888283378, + "eval_precision_macro": 0.8155055479522995, + "eval_pred_class_0": 16732, + "eval_pred_class_1": 2936, + "eval_predicted_binding_ratio": 0.14927801504982713, + "eval_recall": 0.6575298290873912, + "eval_recall_macro": 0.8016930246420839, + "eval_runtime": 0.1963, + "eval_samples_per_second": 830.526, + "eval_steps_per_second": 5.095, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4654 + }, + { + "epoch": 180.0, + "eval_accuracy": 0.9004474272930649, + "eval_auc": 0.9258260560681089, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6762566137566137, + "eval_f1_macro": 0.8087183092815753, + "eval_loss": 0.27005937695503235, + "eval_pr_auc": 0.6649666141685525, + "eval_precision": 0.6939260264675942, + "eval_precision_macro": 0.8153859544454471, + "eval_pred_class_0": 16721, + "eval_pred_class_1": 2947, + "eval_predicted_binding_ratio": 0.14983729916615823, + "eval_recall": 0.6594646888100613, + "eval_recall_macro": 0.802509552107089, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.51, + "eval_steps_per_second": 3.85, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4680 + }, + { + "epoch": 181.0, + "eval_accuracy": 0.9005999593247915, + "eval_auc": 0.9259436830505047, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6764851894754261, + "eval_f1_macro": 0.8088820685009666, + "eval_loss": 0.2699625492095947, + "eval_pr_auc": 0.6656768815498022, + "eval_precision": 0.6947654656696125, + "eval_precision_macro": 0.8157852199805673, + "eval_pred_class_0": 16726, + "eval_pred_class_1": 2942, + "eval_predicted_binding_ratio": 0.14958307911328045, + "eval_recall": 0.6591422121896162, + "eval_recall_macro": 0.8024690357139305, + "eval_runtime": 0.1825, + "eval_samples_per_second": 893.316, + "eval_steps_per_second": 5.48, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4706 + }, + { + "epoch": 182.0, + "eval_accuracy": 0.9011083994305471, + "eval_auc": 0.9259346220939755, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6787778695293146, + "eval_f1_macro": 0.8101680579881181, + "eval_loss": 0.2698967456817627, + "eval_pr_auc": 0.6657214869012321, + "eval_precision": 0.6956668923493569, + "eval_precision_macro": 0.8165423129929146, + "eval_pred_class_0": 16714, + "eval_pred_class_1": 2954, + "eval_predicted_binding_ratio": 0.15019320724018712, + "eval_recall": 0.6626894550145115, + "eval_recall_macro": 0.8042124766471122, + "eval_runtime": 0.2487, + "eval_samples_per_second": 655.302, + "eval_steps_per_second": 4.02, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4732 + }, + { + "epoch": 183.0, + "eval_accuracy": 0.9010575554199716, + "eval_auc": 0.9260415822575143, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6782407407407407, + "eval_f1_macro": 0.8098906179070202, + "eval_loss": 0.26987963914871216, + "eval_pr_auc": 0.6659658035928079, + "eval_precision": 0.6959619952494062, + "eval_precision_macro": 0.8165833539431051, + "eval_pred_class_0": 16721, + "eval_pred_class_1": 2947, + "eval_predicted_binding_ratio": 0.14983729916615823, + "eval_recall": 0.6613995485327314, + "eval_recall_macro": 0.8036580648440201, + "eval_runtime": 0.2595, + "eval_samples_per_second": 628.055, + "eval_steps_per_second": 3.853, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4758 + }, + { + "epoch": 184.0, + "eval_accuracy": 0.9010575554199716, + "eval_auc": 0.9260998701937684, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6781343036718491, + "eval_f1_macro": 0.8098391554406106, + "eval_loss": 0.26986023783683777, + "eval_pr_auc": 0.6661468987350531, + "eval_precision": 0.6960950764006791, + "eval_precision_macro": 0.8166237506024205, + "eval_pred_class_0": 16723, + "eval_pred_class_1": 2945, + "eval_predicted_binding_ratio": 0.1497356111450071, + "eval_recall": 0.6610770719122864, + "eval_recall_macro": 0.8035270070130636, + "eval_runtime": 0.237, + "eval_samples_per_second": 687.681, + "eval_steps_per_second": 4.219, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4784 + }, + { + "epoch": 185.0, + "eval_accuracy": 0.9012609314622737, + "eval_auc": 0.9262103340569317, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6787958981144558, + "eval_f1_macro": 0.810230030763446, + "eval_loss": 0.26986950635910034, + "eval_pr_auc": 0.6665273243194801, + "eval_precision": 0.6967741935483871, + "eval_precision_macro": 0.8170231070594294, + "eval_pred_class_0": 16723, + "eval_pred_class_1": 2945, + "eval_predicted_binding_ratio": 0.1497356111450071, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8039098445920405, + "eval_runtime": 0.2365, + "eval_samples_per_second": 689.338, + "eval_steps_per_second": 4.229, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4810 + }, + { + "epoch": 186.0, + "eval_accuracy": 0.9014134634940004, + "eval_auc": 0.9262454683781669, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6797687861271676, + "eval_f1_macro": 0.8107536578092345, + "eval_loss": 0.2697572410106659, + "eval_pr_auc": 0.666716804071224, + "eval_precision": 0.6966824644549763, + "eval_precision_macro": 0.8171398441695726, + "eval_pred_class_0": 16714, + "eval_pred_class_1": 2954, + "eval_predicted_binding_ratio": 0.15019320724018712, + "eval_recall": 0.6636568848758465, + "eval_recall_macro": 0.8047867330155776, + "eval_runtime": 0.1794, + "eval_samples_per_second": 908.74, + "eval_steps_per_second": 5.575, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4836 + }, + { + "epoch": 187.0, + "eval_accuracy": 0.9011083994305471, + "eval_auc": 0.9263210315000697, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6784592494627211, + "eval_f1_macro": 0.810014015033881, + "eval_loss": 0.26983824372291565, + "eval_pr_auc": 0.6669884807739552, + "eval_precision": 0.6960651289009498, + "eval_precision_macro": 0.8166629472255945, + "eval_pred_class_0": 16720, + "eval_pred_class_1": 2948, + "eval_predicted_binding_ratio": 0.14988814317673377, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8038193031542425, + "eval_runtime": 0.1795, + "eval_samples_per_second": 908.044, + "eval_steps_per_second": 5.571, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4862 + }, + { + "epoch": 188.0, + "eval_accuracy": 0.9012100874516982, + "eval_auc": 0.9264191350895575, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6786836447825368, + "eval_f1_macro": 0.8101580079180191, + "eval_loss": 0.26967287063598633, + "eval_pr_auc": 0.667515048707415, + "eval_precision": 0.6965376782077393, + "eval_precision_macro": 0.8169029737767557, + "eval_pred_class_0": 16722, + "eval_pred_class_1": 2946, + "eval_predicted_binding_ratio": 0.14978645515558267, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8038796641127746, + "eval_runtime": 0.1851, + "eval_samples_per_second": 880.782, + "eval_steps_per_second": 5.404, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4888 + }, + { + "epoch": 189.0, + "eval_accuracy": 0.9013626194834249, + "eval_auc": 0.9265750497228504, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6782752902155887, + "eval_f1_macro": 0.8100137635248964, + "eval_loss": 0.2697126567363739, + "eval_pr_auc": 0.6680073523436961, + "eval_precision": 0.698190508706043, + "eval_precision_macro": 0.8175521514197519, + "eval_pred_class_0": 16739, + "eval_pred_class_1": 2929, + "eval_predicted_binding_ratio": 0.14892210697579825, + "eval_recall": 0.6594646888100613, + "eval_recall_macro": 0.8030528007338771, + "eval_runtime": 0.2626, + "eval_samples_per_second": 620.766, + "eval_steps_per_second": 3.808, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4914 + }, + { + "epoch": 190.0, + "eval_accuracy": 0.9017185275574537, + "eval_auc": 0.9265310393625665, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6808651147432723, + "eval_f1_macro": 0.8113902183590456, + "eval_loss": 0.26952171325683594, + "eval_pr_auc": 0.6676611850618266, + "eval_precision": 0.6975642760487145, + "eval_precision_macro": 0.8176966904417818, + "eval_pred_class_0": 16712, + "eval_pred_class_1": 2956, + "eval_predicted_binding_ratio": 0.1502948952613382, + "eval_recall": 0.6649467913576266, + "eval_recall_macro": 0.8054920472149997, + "eval_runtime": 0.2625, + "eval_samples_per_second": 620.883, + "eval_steps_per_second": 3.809, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4940 + }, + { + "epoch": 191.0, + "eval_accuracy": 0.901921903599756, + "eval_auc": 0.9266825451738318, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6812097174020822, + "eval_f1_macro": 0.8116278420268636, + "eval_loss": 0.2694892883300781, + "eval_pr_auc": 0.668268169384226, + "eval_precision": 0.6986440677966101, + "eval_precision_macro": 0.8182178348314311, + "eval_pred_class_0": 16718, + "eval_pred_class_1": 2950, + "eval_predicted_binding_ratio": 0.1499898311978849, + "eval_recall": 0.6646243147371815, + "eval_recall_macro": 0.8054817113011072, + "eval_runtime": 0.2617, + "eval_samples_per_second": 622.763, + "eval_steps_per_second": 3.821, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4966 + }, + { + "epoch": 192.0, + "eval_accuracy": 0.9018710595891803, + "eval_auc": 0.9266840147811743, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6814130075932651, + "eval_f1_macro": 0.8117083668893665, + "eval_loss": 0.26940062642097473, + "eval_pr_auc": 0.6682450098479531, + "eval_precision": 0.6980047345282381, + "eval_precision_macro": 0.817974900326174, + "eval_pred_class_0": 16711, + "eval_pred_class_1": 2957, + "eval_predicted_binding_ratio": 0.15034573927191378, + "eval_recall": 0.6655917445985166, + "eval_recall_macro": 0.8058447043147107, + "eval_runtime": 0.2532, + "eval_samples_per_second": 643.715, + "eval_steps_per_second": 3.949, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4992 + }, + { + "epoch": 192.30769230769232, + "grad_norm": 15858.0107421875, + "learning_rate": 7.72994743624204e-07, + "loss": 0.2316, + "step": 5000 + }, + { + "epoch": 193.0, + "eval_accuracy": 0.9021761236526337, + "eval_auc": 0.9268377687996988, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6816677696889477, + "eval_f1_macro": 0.8119380540142443, + "eval_loss": 0.26932862401008606, + "eval_pr_auc": 0.6692251134414691, + "eval_precision": 0.6999660210669385, + "eval_precision_macro": 0.8188619343002854, + "eval_pred_class_0": 16725, + "eval_pred_class_1": 2943, + "eval_predicted_binding_ratio": 0.149633923123856, + "eval_recall": 0.6643018381167365, + "eval_recall_macro": 0.8055015558664808, + "eval_runtime": 0.1767, + "eval_samples_per_second": 922.594, + "eval_steps_per_second": 5.66, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5018 + }, + { + "epoch": 194.0, + "eval_accuracy": 0.9022269676632093, + "eval_auc": 0.92693509378927, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6815697963238947, + "eval_f1_macro": 0.8119084228038069, + "eval_loss": 0.2693846523761749, + "eval_pr_auc": 0.6695232673057094, + "eval_precision": 0.7004765146358066, + "eval_precision_macro": 0.8190667092007484, + "eval_pred_class_0": 16730, + "eval_pred_class_1": 2938, + "eval_predicted_binding_ratio": 0.14937970307097823, + "eval_recall": 0.6636568848758465, + "eval_recall_macro": 0.8052696206838338, + "eval_runtime": 0.2611, + "eval_samples_per_second": 624.258, + "eval_steps_per_second": 3.83, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5044 + }, + { + "epoch": 195.0, + "eval_accuracy": 0.9022778116737848, + "eval_auc": 0.926939395553809, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6825239511067063, + "eval_f1_macro": 0.8123875088746679, + "eval_loss": 0.269380122423172, + "eval_pr_auc": 0.6693235837806535, + "eval_precision": 0.6996274974602099, + "eval_precision_macro": 0.8188535333546936, + "eval_pred_class_0": 16715, + "eval_pred_class_1": 2953, + "eval_predicted_binding_ratio": 0.15014236322961155, + "eval_recall": 0.6662366978394066, + "eval_recall_macro": 0.8063482638107518, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.818, + "eval_steps_per_second": 3.944, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5070 + }, + { + "epoch": 196.0, + "eval_accuracy": 0.9018202155786048, + "eval_auc": 0.9269277165550606, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6819304892110031, + "eval_f1_macro": 0.8119407443800393, + "eval_loss": 0.2693455219268799, + "eval_pr_auc": 0.6690854783865479, + "eval_precision": 0.696969696969697, + "eval_precision_macro": 0.8176128877709905, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.6675266043211867, + "eval_recall_macro": 0.8066008708211837, + "eval_runtime": 0.2679, + "eval_samples_per_second": 608.505, + "eval_steps_per_second": 3.733, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5096 + }, + { + "epoch": 197.0, + "eval_accuracy": 0.9018202155786048, + "eval_auc": 0.926915803976337, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6826622843056697, + "eval_f1_macro": 0.8122944214527055, + "eval_loss": 0.2691311538219452, + "eval_pr_auc": 0.6692617138980786, + "eval_precision": 0.6960455764075067, + "eval_precision_macro": 0.8173347038115213, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6697839406643018, + "eval_recall_macro": 0.8075182756378791, + "eval_runtime": 0.261, + "eval_samples_per_second": 624.485, + "eval_steps_per_second": 3.831, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5122 + }, + { + "epoch": 198.0, + "eval_accuracy": 0.9023794996949359, + "eval_auc": 0.9270834170733764, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6822244289970208, + "eval_f1_macro": 0.8122781903500151, + "eval_loss": 0.26926350593566895, + "eval_pr_auc": 0.6700425139918407, + "eval_precision": 0.7007820469228153, + "eval_precision_macro": 0.8193035600788525, + "eval_pred_class_0": 16727, + "eval_pred_class_1": 2941, + "eval_predicted_binding_ratio": 0.1495322351027049, + "eval_recall": 0.6646243147371815, + "eval_recall_macro": 0.8057533356145012, + "eval_runtime": 0.1796, + "eval_samples_per_second": 907.821, + "eval_steps_per_second": 5.569, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5148 + }, + { + "epoch": 199.0, + "eval_accuracy": 0.9023286556843604, + "eval_auc": 0.9271362061477199, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6822167080231596, + "eval_f1_macro": 0.812256712426767, + "eval_loss": 0.26931333541870117, + "eval_pr_auc": 0.6701662107301889, + "eval_precision": 0.7004076086956522, + "eval_precision_macro": 0.8191406615590195, + "eval_pred_class_0": 16724, + "eval_pred_class_1": 2944, + "eval_predicted_binding_ratio": 0.14968476713443157, + "eval_recall": 0.6649467913576266, + "eval_recall_macro": 0.8058542129661919, + "eval_runtime": 0.2544, + "eval_samples_per_second": 640.682, + "eval_steps_per_second": 3.931, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5174 + }, + { + "epoch": 200.0, + "eval_accuracy": 0.9019727476103315, + "eval_auc": 0.9271006630615285, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6823723228995058, + "eval_f1_macro": 0.8122076248057319, + "eval_loss": 0.26923447847366333, + "eval_pr_auc": 0.6700054234044599, + "eval_precision": 0.6975412596833951, + "eval_precision_macro": 0.8179304597716335, + "eval_pred_class_0": 16699, + "eval_pred_class_1": 2969, + "eval_predicted_binding_ratio": 0.15095586739882041, + "eval_recall": 0.6678490809416318, + "eval_recall_macro": 0.8068224700899382, + "eval_runtime": 0.2001, + "eval_samples_per_second": 814.446, + "eval_steps_per_second": 4.997, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5200 + }, + { + "epoch": 201.0, + "eval_accuracy": 0.9022269676632093, + "eval_auc": 0.9272154481542286, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6827256228345158, + "eval_f1_macro": 0.8124672441745833, + "eval_loss": 0.2691180408000946, + "eval_pr_auc": 0.6705128874375396, + "eval_precision": 0.6989864864864865, + "eval_precision_macro": 0.8186098340979236, + "eval_pred_class_0": 16708, + "eval_pred_class_1": 2960, + "eval_predicted_binding_ratio": 0.15049827130364044, + "eval_recall": 0.6672041277007417, + "eval_recall_macro": 0.8067112568243553, + "eval_runtime": 0.2326, + "eval_samples_per_second": 700.909, + "eval_steps_per_second": 4.3, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5226 + }, + { + "epoch": 202.0, + "eval_accuracy": 0.9027354077689648, + "eval_auc": 0.9272916730860608, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6847915636842973, + "eval_f1_macro": 0.8136435649304945, + "eval_loss": 0.2689346969127655, + "eval_pr_auc": 0.6709998700001464, + "eval_precision": 0.7001347708894878, + "eval_precision_macro": 0.8194386429297739, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.6701064172847468, + "eval_recall_macro": 0.8081925820956238, + "eval_runtime": 0.1664, + "eval_samples_per_second": 979.381, + "eval_steps_per_second": 6.008, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5252 + }, + { + "epoch": 203.0, + "eval_accuracy": 0.9025828757372382, + "eval_auc": 0.9274415730349983, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6833057851239669, + "eval_f1_macro": 0.8128720237282395, + "eval_loss": 0.269077867269516, + "eval_pr_auc": 0.6716085952371595, + "eval_precision": 0.7009155645981688, + "eval_precision_macro": 0.819534880211639, + "eval_pred_class_0": 16719, + "eval_pred_class_1": 2949, + "eval_predicted_binding_ratio": 0.14993898718730933, + "eval_recall": 0.6665591744598517, + "eval_recall_macro": 0.8066604045173044, + "eval_runtime": 0.2556, + "eval_samples_per_second": 637.756, + "eval_steps_per_second": 3.913, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5278 + }, + { + "epoch": 204.0, + "eval_accuracy": 0.902837095790116, + "eval_auc": 0.9274026625041677, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6848095002474023, + "eval_f1_macro": 0.8136877723940104, + "eval_loss": 0.2691201865673065, + "eval_pr_auc": 0.6711363619469519, + "eval_precision": 0.700877785280216, + "eval_precision_macro": 0.8197612917781423, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.6694614640438569, + "eval_recall_macro": 0.8079908273922429, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.544, + "eval_steps_per_second": 3.868, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5304 + }, + { + "epoch": 205.0, + "eval_accuracy": 0.9029896278218426, + "eval_auc": 0.9274924350745481, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6850445691647409, + "eval_f1_macro": 0.8138546963859644, + "eval_loss": 0.2691231071949005, + "eval_pr_auc": 0.6713794419677425, + "eval_precision": 0.7017247210010146, + "eval_precision_macro": 0.8201640180913157, + "eval_pred_class_0": 16711, + "eval_pred_class_1": 2957, + "eval_predicted_binding_ratio": 0.15034573927191378, + "eval_recall": 0.6691389874234118, + "eval_recall_macro": 0.8079503109990844, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.331, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5330 + }, + { + "epoch": 206.0, + "eval_accuracy": 0.9031421598535693, + "eval_auc": 0.9275284258556915, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6860062633921213, + "eval_f1_macro": 0.8143728753012186, + "eval_loss": 0.26890990138053894, + "eval_pr_auc": 0.6717808840440014, + "eval_precision": 0.7016183412002697, + "eval_precision_macro": 0.8202739053624388, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6710738471460819, + "eval_recall_macro": 0.8088271994226215, + "eval_runtime": 0.2637, + "eval_samples_per_second": 618.148, + "eval_steps_per_second": 3.792, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5356 + }, + { + "epoch": 207.0, + "eval_accuracy": 0.9030913158429937, + "eval_auc": 0.9275997261430513, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6854785478547855, + "eval_f1_macro": 0.8141000144010073, + "eval_loss": 0.2689792513847351, + "eval_pr_auc": 0.6718662790463032, + "eval_precision": 0.7019263264616424, + "eval_precision_macro": 0.8203209943398044, + "eval_pred_class_0": 16709, + "eval_pred_class_1": 2959, + "eval_predicted_binding_ratio": 0.15044742729306487, + "eval_recall": 0.6697839406643018, + "eval_recall_macro": 0.8082727876195295, + "eval_runtime": 0.195, + "eval_samples_per_second": 835.813, + "eval_steps_per_second": 5.128, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5382 + }, + { + "epoch": 208.0, + "eval_accuracy": 0.9032946918852959, + "eval_auc": 0.9276906860783045, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6862421643022105, + "eval_f1_macro": 0.8145402081954642, + "eval_loss": 0.2688303291797638, + "eval_pr_auc": 0.6723694322774509, + "eval_precision": 0.7024653833164471, + "eval_precision_macro": 0.8206766373097469, + "eval_pred_class_0": 16707, + "eval_pred_class_1": 2961, + "eval_predicted_binding_ratio": 0.150549115314216, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.808786683029463, + "eval_runtime": 0.2599, + "eval_samples_per_second": 627.22, + "eval_steps_per_second": 3.848, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5408 + }, + { + "epoch": 209.0, + "eval_accuracy": 0.9031421598535693, + "eval_auc": 0.9277029684919884, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6851760039662865, + "eval_f1_macro": 0.8139715080669648, + "eval_loss": 0.2689387798309326, + "eval_pr_auc": 0.6722283267888528, + "eval_precision": 0.7027118644067797, + "eval_precision_macro": 0.8206106277411336, + "eval_pred_class_0": 16718, + "eval_pred_class_1": 2950, + "eval_predicted_binding_ratio": 0.1499898311978849, + "eval_recall": 0.6684940341825217, + "eval_recall_macro": 0.8077787367749694, + "eval_runtime": 0.2435, + "eval_samples_per_second": 669.46, + "eval_steps_per_second": 4.107, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5434 + }, + { + "epoch": 210.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9277600593308708, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6846370100876468, + "eval_f1_macro": 0.8136754097270521, + "eval_loss": 0.26892563700675964, + "eval_pr_auc": 0.6725269853749476, + "eval_precision": 0.7026476578411406, + "eval_precision_macro": 0.8204961767258567, + "eval_pred_class_0": 16722, + "eval_pred_class_1": 2946, + "eval_predicted_binding_ratio": 0.14978645515558267, + "eval_recall": 0.6675266043211867, + "eval_recall_macro": 0.807325202323568, + "eval_runtime": 0.2601, + "eval_samples_per_second": 626.763, + "eval_steps_per_second": 3.845, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5460 + }, + { + "epoch": 211.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9278453744167288, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.686128979053274, + "eval_f1_macro": 0.8144677293907912, + "eval_loss": 0.26878559589385986, + "eval_pr_auc": 0.6729846306066621, + "eval_precision": 0.7022282241728561, + "eval_precision_macro": 0.8205562286912407, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.808756502550197, + "eval_runtime": 0.2433, + "eval_samples_per_second": 669.879, + "eval_steps_per_second": 4.11, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5486 + }, + { + "epoch": 211.53846153846155, + "grad_norm": 16655.041015625, + "learning_rate": 7.144675667015729e-07, + "loss": 0.2259, + "step": 5500 + }, + { + "epoch": 212.0, + "eval_accuracy": 0.9028879398006915, + "eval_auc": 0.9279623006591996, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6845061116617113, + "eval_f1_macro": 0.8135588667797169, + "eval_loss": 0.26880088448524475, + "eval_pr_auc": 0.6734939312101108, + "eval_precision": 0.7016593294954284, + "eval_precision_macro": 0.8200489288817256, + "eval_pred_class_0": 16715, + "eval_pred_class_1": 2953, + "eval_predicted_binding_ratio": 0.15014236322961155, + "eval_recall": 0.6681715575620768, + "eval_recall_macro": 0.8074967765476829, + "eval_runtime": 0.2166, + "eval_samples_per_second": 752.433, + "eval_steps_per_second": 4.616, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5512 + }, + { + "epoch": 213.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9279055601902797, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6863359156090325, + "eval_f1_macro": 0.8145677594216373, + "eval_loss": 0.2687283456325531, + "eval_pr_auc": 0.6734074239428265, + "eval_precision": 0.7019554956169926, + "eval_precision_macro": 0.820472419105347, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6713963237665269, + "eval_recall_macro": 0.80901861821211, + "eval_runtime": 0.2336, + "eval_samples_per_second": 697.742, + "eval_steps_per_second": 4.281, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5538 + }, + { + "epoch": 214.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9279595755594916, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6864392815949909, + "eval_f1_macro": 0.8146177229810407, + "eval_loss": 0.2687055766582489, + "eval_pr_auc": 0.6734235549479375, + "eval_precision": 0.7018194070080862, + "eval_precision_macro": 0.8204306615878754, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8091496760430665, + "eval_runtime": 0.225, + "eval_samples_per_second": 724.337, + "eval_steps_per_second": 4.444, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5564 + }, + { + "epoch": 215.0, + "eval_accuracy": 0.9031421598535693, + "eval_auc": 0.9280330753916157, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.685383980181668, + "eval_f1_macro": 0.8140720567955604, + "eval_loss": 0.2687467932701111, + "eval_pr_auc": 0.6737104569152422, + "eval_precision": 0.7024373730534867, + "eval_precision_macro": 0.8205258541706347, + "eval_pred_class_0": 16714, + "eval_pred_class_1": 2954, + "eval_predicted_binding_ratio": 0.15019320724018712, + "eval_recall": 0.6691389874234118, + "eval_recall_macro": 0.8080408524368825, + "eval_runtime": 0.1833, + "eval_samples_per_second": 889.114, + "eval_steps_per_second": 5.455, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5590 + }, + { + "epoch": 216.0, + "eval_accuracy": 0.9031930038641448, + "eval_auc": 0.9281060496687963, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6860158311345647, + "eval_f1_macro": 0.8143952682963037, + "eval_loss": 0.26888203620910645, + "eval_pr_auc": 0.6738155361634312, + "eval_precision": 0.7019912251096861, + "eval_precision_macro": 0.8204358998939631, + "eval_pred_class_0": 16705, + "eval_pred_class_1": 2963, + "eval_predicted_binding_ratio": 0.1506508033353671, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.808726322070931, + "eval_runtime": 0.1764, + "eval_samples_per_second": 923.938, + "eval_steps_per_second": 5.668, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5616 + }, + { + "epoch": 217.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9281046287239484, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6865425794761983, + "eval_f1_macro": 0.8146676522813128, + "eval_loss": 0.2686736285686493, + "eval_pr_auc": 0.6740322097472393, + "eval_precision": 0.7016835016835017, + "eval_precision_macro": 0.8203890020095554, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.672041277007417, + "eval_recall_macro": 0.809280733874023, + "eval_runtime": 0.2537, + "eval_samples_per_second": 642.37, + "eval_steps_per_second": 3.941, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5642 + }, + { + "epoch": 218.0, + "eval_accuracy": 0.9033963799064471, + "eval_auc": 0.9282425576991689, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6862615587846763, + "eval_f1_macro": 0.814585106315415, + "eval_loss": 0.26872488856315613, + "eval_pr_auc": 0.6745604450622946, + "eval_precision": 0.7032148900169205, + "eval_precision_macro": 0.8210025266814094, + "eval_pred_class_0": 16713, + "eval_pred_class_1": 2955, + "eval_predicted_binding_ratio": 0.15024405125076265, + "eval_recall": 0.6701064172847468, + "eval_recall_macro": 0.808584928326082, + "eval_runtime": 0.2515, + "eval_samples_per_second": 648.077, + "eval_steps_per_second": 3.976, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5668 + }, + { + "epoch": 219.0, + "eval_accuracy": 0.9032946918852959, + "eval_auc": 0.9283728174652107, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6862421643022105, + "eval_f1_macro": 0.8145402081954642, + "eval_loss": 0.26861947774887085, + "eval_pr_auc": 0.675175157595335, + "eval_precision": 0.7024653833164471, + "eval_precision_macro": 0.8206766373097469, + "eval_pred_class_0": 16707, + "eval_pred_class_1": 2961, + "eval_predicted_binding_ratio": 0.150549115314216, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.808786683029463, + "eval_runtime": 0.269, + "eval_samples_per_second": 605.917, + "eval_steps_per_second": 3.717, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5694 + }, + { + "epoch": 220.0, + "eval_accuracy": 0.9035997559487492, + "eval_auc": 0.9283705984554486, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6875411997363217, + "eval_f1_macro": 0.8152747479984963, + "eval_loss": 0.2684967517852783, + "eval_pr_auc": 0.6752603675091132, + "eval_precision": 0.703067071115605, + "eval_precision_macro": 0.8211461336058236, + "eval_pred_class_0": 16701, + "eval_pred_class_1": 2967, + "eval_predicted_binding_ratio": 0.15085417937766932, + "eval_recall": 0.672686230248307, + "eval_recall_macro": 0.809754112890798, + "eval_runtime": 0.2663, + "eval_samples_per_second": 612.018, + "eval_steps_per_second": 3.755, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5720 + }, + { + "epoch": 221.0, + "eval_accuracy": 0.9037014439699004, + "eval_auc": 0.9285080992007146, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6871489924017179, + "eval_f1_macro": 0.8151206773197821, + "eval_loss": 0.2685548961162567, + "eval_pr_auc": 0.6758164431668767, + "eval_precision": 0.7043684388757196, + "eval_precision_macro": 0.8216427895844347, + "eval_pred_class_0": 16715, + "eval_pred_class_1": 2953, + "eval_predicted_binding_ratio": 0.15014236322961155, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.809028126863591, + "eval_runtime": 0.1986, + "eval_samples_per_second": 820.602, + "eval_steps_per_second": 5.034, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5746 + }, + { + "epoch": 222.0, + "eval_accuracy": 0.9034980679275981, + "eval_auc": 0.9285004105265384, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6870052770448549, + "eval_f1_macro": 0.8149801571567146, + "eval_loss": 0.2685752809047699, + "eval_pr_auc": 0.6755687553750968, + "eval_precision": 0.7030037124535943, + "eval_precision_macro": 0.8210319370409247, + "eval_pred_class_0": 16705, + "eval_pred_class_1": 2963, + "eval_predicted_binding_ratio": 0.1506508033353671, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8093005784393965, + "eval_runtime": 0.2128, + "eval_samples_per_second": 765.976, + "eval_steps_per_second": 4.699, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5772 + }, + { + "epoch": 223.0, + "eval_accuracy": 0.9033963799064471, + "eval_auc": 0.9285113303903685, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6871913072110636, + "eval_f1_macro": 0.815034532807023, + "eval_loss": 0.2685534358024597, + "eval_pr_auc": 0.675465436437485, + "eval_precision": 0.7019845274133871, + "eval_precision_macro": 0.8206238899420935, + "eval_pred_class_0": 16695, + "eval_pred_class_1": 2973, + "eval_predicted_binding_ratio": 0.15115924344112264, + "eval_recall": 0.673008706868752, + "eval_recall_macro": 0.8097644488046905, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.434, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5798 + }, + { + "epoch": 224.0, + "eval_accuracy": 0.9033963799064471, + "eval_auc": 0.9286790602773953, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.685742639761826, + "eval_f1_macro": 0.814334221653217, + "eval_loss": 0.2686038315296173, + "eval_pr_auc": 0.6763975611431872, + "eval_precision": 0.7039049235993209, + "eval_precision_macro": 0.8212163498580232, + "eval_pred_class_0": 16723, + "eval_pred_class_1": 2945, + "eval_predicted_binding_ratio": 0.1497356111450071, + "eval_recall": 0.6684940341825217, + "eval_recall_macro": 0.8079296391712996, + "eval_runtime": 0.2587, + "eval_samples_per_second": 630.051, + "eval_steps_per_second": 3.865, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5824 + }, + { + "epoch": 225.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9286844910118134, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6857801944307135, + "eval_f1_macro": 0.8142280597608222, + "eval_loss": 0.268373966217041, + "eval_pr_auc": 0.6766849702960268, + "eval_precision": 0.7011455525606469, + "eval_precision_macro": 0.8200338541246348, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.6710738471460819, + "eval_recall_macro": 0.8087668384640894, + "eval_runtime": 0.2618, + "eval_samples_per_second": 622.527, + "eval_steps_per_second": 3.819, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5850 + }, + { + "epoch": 226.0, + "eval_accuracy": 0.9031421598535693, + "eval_auc": 0.9286265437130228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6868321551865856, + "eval_f1_macro": 0.8147720454758898, + "eval_loss": 0.268480122089386, + "eval_pr_auc": 0.6762864798501788, + "eval_precision": 0.7005365526492288, + "eval_precision_macro": 0.8199434531195322, + "eval_pred_class_0": 16686, + "eval_pred_class_1": 2982, + "eval_predicted_binding_ratio": 0.15161683953630262, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8098756620702734, + "eval_runtime": 0.1801, + "eval_samples_per_second": 905.249, + "eval_steps_per_second": 5.554, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5876 + }, + { + "epoch": 227.0, + "eval_accuracy": 0.9034472239170226, + "eval_auc": 0.9286911091111043, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6878185106033208, + "eval_f1_macro": 0.8153554406082493, + "eval_loss": 0.2684793770313263, + "eval_pr_auc": 0.6763719243742072, + "eval_precision": 0.7015425888665325, + "eval_precision_macro": 0.8205363669491479, + "eval_pred_class_0": 16686, + "eval_pred_class_1": 2982, + "eval_predicted_binding_ratio": 0.15161683953630262, + "eval_recall": 0.6746210899709771, + "eval_recall_macro": 0.810449918438739, + "eval_runtime": 0.2526, + "eval_samples_per_second": 645.327, + "eval_steps_per_second": 3.959, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5902 + }, + { + "epoch": 228.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9287447935753516, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6870580496628844, + "eval_f1_macro": 0.8149167860703537, + "eval_loss": 0.2684626877307892, + "eval_pr_auc": 0.6765555444970285, + "eval_precision": 0.701006711409396, + "eval_precision_macro": 0.8201821668264622, + "eval_pred_class_0": 16688, + "eval_pred_class_1": 2980, + "eval_predicted_binding_ratio": 0.15151515151515152, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8099360230288054, + "eval_runtime": 0.253, + "eval_samples_per_second": 644.184, + "eval_steps_per_second": 3.952, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5928 + }, + { + "epoch": 229.0, + "eval_accuracy": 0.9037522879804759, + "eval_auc": 0.92885044958403, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6877783275606135, + "eval_f1_macro": 0.815442675636767, + "eval_loss": 0.2684047222137451, + "eval_pr_auc": 0.6771563538797724, + "eval_precision": 0.7039162727886563, + "eval_precision_macro": 0.8215498998326138, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.6723637536278619, + "eval_recall_macro": 0.8097135964976395, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.352, + "eval_steps_per_second": 3.886, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5954 + }, + { + "epoch": 230.0, + "eval_accuracy": 0.9031930038641448, + "eval_auc": 0.9288052907888691, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6866359447004609, + "eval_f1_macro": 0.814695001815053, + "eval_loss": 0.2683703601360321, + "eval_pr_auc": 0.6770692308922137, + "eval_precision": 0.7011764705882353, + "eval_precision_macro": 0.8201862703986524, + "eval_pred_class_0": 16693, + "eval_pred_class_1": 2975, + "eval_predicted_binding_ratio": 0.15126093146227373, + "eval_recall": 0.672686230248307, + "eval_recall_macro": 0.80951266905667, + "eval_runtime": 0.1803, + "eval_samples_per_second": 903.896, + "eval_steps_per_second": 5.545, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5980 + }, + { + "epoch": 230.76923076923077, + "grad_norm": 18226.349609375, + "learning_rate": 6.520804793983146e-07, + "loss": 0.2213, + "step": 6000 + }, + { + "epoch": 231.0, + "eval_accuracy": 0.9036505999593248, + "eval_auc": 0.9289028104284194, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6881685041961494, + "eval_f1_macro": 0.8155957226774667, + "eval_loss": 0.26838722825050354, + "eval_pr_auc": 0.677382862396771, + "eval_precision": 0.7026209677419355, + "eval_precision_macro": 0.821056469972094, + "eval_pred_class_0": 16692, + "eval_pred_class_1": 2976, + "eval_predicted_binding_ratio": 0.1513117754728493, + "eval_recall": 0.6742986133505321, + "eval_recall_macro": 0.8104395825248465, + "eval_runtime": 0.2471, + "eval_samples_per_second": 659.676, + "eval_steps_per_second": 4.047, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6006 + }, + { + "epoch": 232.0, + "eval_accuracy": 0.9036505999593248, + "eval_auc": 0.9289413705892875, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6882710972199375, + "eval_f1_macro": 0.8156453059542872, + "eval_loss": 0.2681979238986969, + "eval_pr_auc": 0.6777962434595076, + "eval_precision": 0.7024848891873741, + "eval_precision_macro": 0.8210147633474318, + "eval_pred_class_0": 16690, + "eval_pred_class_1": 2978, + "eval_predicted_binding_ratio": 0.1514134634940004, + "eval_recall": 0.6746210899709771, + "eval_recall_macro": 0.8105706403558031, + "eval_runtime": 0.2338, + "eval_samples_per_second": 697.187, + "eval_steps_per_second": 4.277, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6032 + }, + { + "epoch": 233.0, + "eval_accuracy": 0.9039556640227782, + "eval_auc": 0.9290857424788171, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6879233437964646, + "eval_f1_macro": 0.8155838213438953, + "eval_loss": 0.2683457136154175, + "eval_pr_auc": 0.6782535995592411, + "eval_precision": 0.7052845528455285, + "eval_precision_macro": 0.8221624965711252, + "eval_pred_class_0": 16716, + "eval_pred_class_1": 2952, + "eval_predicted_binding_ratio": 0.150091519219036, + "eval_recall": 0.6713963237665269, + "eval_recall_macro": 0.8094411449218342, + "eval_runtime": 0.2259, + "eval_samples_per_second": 721.682, + "eval_steps_per_second": 4.427, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6058 + }, + { + "epoch": 234.0, + "eval_accuracy": 0.9040065080333537, + "eval_auc": 0.9291390181781085, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6881400726792204, + "eval_f1_macro": 0.8157063562723066, + "eval_loss": 0.26850852370262146, + "eval_pr_auc": 0.678201898193761, + "eval_precision": 0.7053843548933288, + "eval_precision_macro": 0.8222404873479507, + "eval_pred_class_0": 16715, + "eval_pred_class_1": 2953, + "eval_predicted_binding_ratio": 0.15014236322961155, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8096023832320567, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.187, + "eval_steps_per_second": 3.915, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6084 + }, + { + "epoch": 235.0, + "eval_accuracy": 0.9037014439699004, + "eval_auc": 0.9291422688327602, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.687767886580943, + "eval_f1_macro": 0.8154198615351363, + "eval_loss": 0.26855188608169556, + "eval_pr_auc": 0.6780321638936206, + "eval_precision": 0.7035413153456999, + "eval_precision_macro": 0.8213868942770528, + "eval_pred_class_0": 16703, + "eval_pred_class_1": 2965, + "eval_predicted_binding_ratio": 0.1507524913565182, + "eval_recall": 0.672686230248307, + "eval_recall_macro": 0.80981447384933, + "eval_runtime": 0.1711, + "eval_samples_per_second": 952.835, + "eval_steps_per_second": 5.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6110 + }, + { + "epoch": 236.0, + "eval_accuracy": 0.903853976001627, + "eval_auc": 0.9292183185796111, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6881081972620815, + "eval_f1_macro": 0.8156376648859622, + "eval_loss": 0.2684246003627777, + "eval_pr_auc": 0.6783782374353945, + "eval_precision": 0.7042538825118163, + "eval_precision_macro": 0.8217486340608884, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.672686230248307, + "eval_recall_macro": 0.8099050152871281, + "eval_runtime": 0.2571, + "eval_samples_per_second": 633.985, + "eval_steps_per_second": 3.889, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6136 + }, + { + "epoch": 237.0, + "eval_accuracy": 0.9039048200122025, + "eval_auc": 0.9291723227895398, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6891447368421053, + "eval_f1_macro": 0.8161564434751782, + "eval_loss": 0.2682003080844879, + "eval_pr_auc": 0.678474943261759, + "eval_precision": 0.7032561262168513, + "eval_precision_macro": 0.8214884501897367, + "eval_pred_class_0": 16689, + "eval_pred_class_1": 2979, + "eval_predicted_binding_ratio": 0.15146430750457596, + "eval_recall": 0.6755885198323122, + "eval_recall_macro": 0.8111147162450025, + "eval_runtime": 0.2533, + "eval_samples_per_second": 643.44, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6162 + }, + { + "epoch": 238.0, + "eval_accuracy": 0.9041081960545048, + "eval_auc": 0.9292599542101496, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6891891891891891, + "eval_f1_macro": 0.8162490373023017, + "eval_loss": 0.2683660686016083, + "eval_pr_auc": 0.6787382085049865, + "eval_precision": 0.704752275025278, + "eval_precision_macro": 0.8221384271958916, + "eval_pred_class_0": 16701, + "eval_pred_class_1": 2967, + "eval_predicted_binding_ratio": 0.15085417937766932, + "eval_recall": 0.6742986133505321, + "eval_recall_macro": 0.8107112068382407, + "eval_runtime": 0.251, + "eval_samples_per_second": 649.396, + "eval_steps_per_second": 3.984, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6188 + }, + { + "epoch": 239.0, + "eval_accuracy": 0.9039556640227782, + "eval_auc": 0.9292687329242089, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6886434811274106, + "eval_f1_macro": 0.8159319482645679, + "eval_loss": 0.2683408558368683, + "eval_pr_auc": 0.6788700814485856, + "eval_precision": 0.7043155765340526, + "eval_precision_macro": 0.8218620153057044, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8103585497385295, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.65, + "eval_steps_per_second": 3.826, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6214 + }, + { + "epoch": 240.0, + "eval_accuracy": 0.9038031319910514, + "eval_auc": 0.9293738244479479, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6886109282422647, + "eval_f1_macro": 0.8158628904590758, + "eval_loss": 0.26828694343566895, + "eval_pr_auc": 0.6792947923537326, + "eval_precision": 0.7031932773109244, + "eval_precision_macro": 0.8213743898086402, + "eval_pred_class_0": 16693, + "eval_pred_class_1": 2975, + "eval_predicted_binding_ratio": 0.15126093146227373, + "eval_recall": 0.6746210899709771, + "eval_recall_macro": 0.8106611817936011, + "eval_runtime": 0.1692, + "eval_samples_per_second": 963.308, + "eval_steps_per_second": 5.91, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6240 + }, + { + "epoch": 241.0, + "eval_accuracy": 0.9039048200122025, + "eval_auc": 0.929388987681323, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6891447368421053, + "eval_f1_macro": 0.8161564434751782, + "eval_loss": 0.26832982897758484, + "eval_pr_auc": 0.679377997729221, + "eval_precision": 0.7032561262168513, + "eval_precision_macro": 0.8214884501897367, + "eval_pred_class_0": 16689, + "eval_pred_class_1": 2979, + "eval_predicted_binding_ratio": 0.15146430750457596, + "eval_recall": 0.6755885198323122, + "eval_recall_macro": 0.8111147162450025, + "eval_runtime": 0.2624, + "eval_samples_per_second": 621.159, + "eval_steps_per_second": 3.811, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6266 + }, + { + "epoch": 242.0, + "eval_accuracy": 0.9040065080333537, + "eval_auc": 0.9294373582011398, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6891669410602568, + "eval_f1_macro": 0.8162027357577154, + "eval_loss": 0.26806166768074036, + "eval_pr_auc": 0.6798375856033828, + "eval_precision": 0.7040026908846283, + "eval_precision_macro": 0.8218126661970311, + "eval_pred_class_0": 16695, + "eval_pred_class_1": 2973, + "eval_predicted_binding_ratio": 0.15115924344112264, + "eval_recall": 0.6749435665914221, + "eval_recall_macro": 0.8109129615416215, + "eval_runtime": 0.2508, + "eval_samples_per_second": 649.948, + "eval_steps_per_second": 3.987, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6292 + }, + { + "epoch": 243.0, + "eval_accuracy": 0.9040065080333537, + "eval_auc": 0.929443917905437, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6897798225435425, + "eval_f1_macro": 0.8164989338281623, + "eval_loss": 0.26814863085746765, + "eval_pr_auc": 0.6797263827568155, + "eval_precision": 0.7031825795644892, + "eval_precision_macro": 0.8215607197408852, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6768784263140922, + "eval_recall_macro": 0.8116993085273606, + "eval_runtime": 0.2535, + "eval_samples_per_second": 642.959, + "eval_steps_per_second": 3.945, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6318 + }, + { + "epoch": 244.0, + "eval_accuracy": 0.903853976001627, + "eval_auc": 0.9295684160320964, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6882110469909316, + "eval_f1_macro": 0.8156873815700654, + "eval_loss": 0.26816216111183167, + "eval_pr_auc": 0.6803755613590039, + "eval_precision": 0.7041160593792173, + "eval_precision_macro": 0.8217060181953557, + "eval_pred_class_0": 16704, + "eval_pred_class_1": 2964, + "eval_predicted_binding_ratio": 0.15070164734594266, + "eval_recall": 0.673008706868752, + "eval_recall_macro": 0.8100360731180846, + "eval_runtime": 0.2375, + "eval_samples_per_second": 686.328, + "eval_steps_per_second": 4.211, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6344 + }, + { + "epoch": 245.0, + "eval_accuracy": 0.9041081960545048, + "eval_auc": 0.9295755402213329, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6888815572418344, + "eval_f1_macro": 0.8161003326270482, + "eval_loss": 0.2682454288005829, + "eval_pr_auc": 0.6803390373338819, + "eval_precision": 0.7051671732522796, + "eval_precision_macro": 0.8222669528798059, + "eval_pred_class_0": 16707, + "eval_pred_class_1": 2961, + "eval_predicted_binding_ratio": 0.150549115314216, + "eval_recall": 0.6733311834891971, + "eval_recall_macro": 0.8103180333453712, + "eval_runtime": 0.2519, + "eval_samples_per_second": 647.048, + "eval_steps_per_second": 3.97, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6370 + }, + { + "epoch": 246.0, + "eval_accuracy": 0.903853976001627, + "eval_auc": 0.9296204070415253, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6882110469909316, + "eval_f1_macro": 0.8156873815700654, + "eval_loss": 0.2682053744792938, + "eval_pr_auc": 0.6804463740899893, + "eval_precision": 0.7041160593792173, + "eval_precision_macro": 0.8217060181953557, + "eval_pred_class_0": 16704, + "eval_pred_class_1": 2964, + "eval_predicted_binding_ratio": 0.15070164734594266, + "eval_recall": 0.673008706868752, + "eval_recall_macro": 0.8100360731180846, + "eval_runtime": 0.2411, + "eval_samples_per_second": 676.2, + "eval_steps_per_second": 4.148, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6396 + }, + { + "epoch": 247.0, + "eval_accuracy": 0.9045657921496848, + "eval_auc": 0.9296705294111545, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6909270541742137, + "eval_f1_macro": 0.817248994423186, + "eval_loss": 0.2681281566619873, + "eval_pr_auc": 0.6807214505356617, + "eval_precision": 0.7059219380888291, + "eval_precision_macro": 0.8229238344013863, + "eval_pred_class_0": 16696, + "eval_pred_class_1": 2972, + "eval_predicted_binding_ratio": 0.15110839943054707, + "eval_recall": 0.6765559496936472, + "eval_recall_macro": 0.8119002359683303, + "eval_runtime": 0.1963, + "eval_samples_per_second": 830.371, + "eval_steps_per_second": 5.094, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6422 + }, + { + "epoch": 248.0, + "eval_accuracy": 0.9046166361602603, + "eval_auc": 0.9296871135893773, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6912442396313364, + "eval_f1_macro": 0.8174200753177728, + "eval_loss": 0.26824310421943665, + "eval_pr_auc": 0.6805326752113899, + "eval_precision": 0.7058823529411765, + "eval_precision_macro": 0.8229585490219571, + "eval_pred_class_0": 16693, + "eval_pred_class_1": 2975, + "eval_predicted_binding_ratio": 0.15126093146227373, + "eval_recall": 0.6772009029345373, + "eval_recall_macro": 0.8121925321095093, + "eval_runtime": 0.2645, + "eval_samples_per_second": 616.162, + "eval_steps_per_second": 3.78, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6448 + }, + { + "epoch": 249.0, + "eval_accuracy": 0.9047691681919869, + "eval_auc": 0.929768866580617, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6914841047603361, + "eval_f1_macro": 0.8175893393183914, + "eval_loss": 0.2681940495967865, + "eval_pr_auc": 0.6810052289277716, + "eval_precision": 0.7067340067340068, + "eval_precision_macro": 0.8233634101223034, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.6768784263140922, + "eval_recall_macro": 0.8121520157163508, + "eval_runtime": 0.2396, + "eval_samples_per_second": 680.397, + "eval_steps_per_second": 4.174, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6474 + }, + { + "epoch": 250.0, + "grad_norm": 35924.55078125, + "learning_rate": 5.869563021464528e-07, + "loss": 0.2171, + "step": 6500 + }, + { + "epoch": 250.0, + "eval_accuracy": 0.9048200122025626, + "eval_auc": 0.9298367215633461, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6912928759894459, + "eval_f1_macro": 0.817514675551828, + "eval_loss": 0.2681121826171875, + "eval_pr_auc": 0.681415067318076, + "eval_precision": 0.7073911576105298, + "eval_precision_macro": 0.8236147646777582, + "eval_pred_class_0": 16705, + "eval_pred_class_1": 2963, + "eval_predicted_binding_ratio": 0.1506508033353671, + "eval_recall": 0.6759109964527572, + "eval_recall_macro": 0.8117890227027473, + "eval_runtime": 0.2606, + "eval_samples_per_second": 625.453, + "eval_steps_per_second": 3.837, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6500 + }, + { + "epoch": 251.0, + "eval_accuracy": 0.9046674801708359, + "eval_auc": 0.9298684105799504, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6909510466457887, + "eval_f1_macro": 0.8172961371074986, + "eval_loss": 0.2681705951690674, + "eval_pr_auc": 0.6814913977659953, + "eval_precision": 0.7066756574511126, + "eval_precision_macro": 0.8232516115060616, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6759109964527572, + "eval_recall_macro": 0.8116984812649493, + "eval_runtime": 0.271, + "eval_samples_per_second": 601.566, + "eval_steps_per_second": 3.691, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6526 + }, + { + "epoch": 252.0, + "eval_accuracy": 0.9045657921496848, + "eval_auc": 0.9300244614682288, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6893926857521099, + "eval_f1_macro": 0.8165072340543806, + "eval_loss": 0.2683667540550232, + "eval_pr_auc": 0.6820860933741758, + "eval_precision": 0.7080217539089055, + "eval_precision_macro": 0.8235792136757251, + "eval_pred_class_0": 16726, + "eval_pred_class_1": 2942, + "eval_predicted_binding_ratio": 0.14958307911328045, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8099343685039828, + "eval_runtime": 0.2316, + "eval_samples_per_second": 703.782, + "eval_steps_per_second": 4.318, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6552 + }, + { + "epoch": 253.0, + "eval_accuracy": 0.9046166361602603, + "eval_auc": 0.9299541441632636, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6910408432147562, + "eval_f1_macro": 0.8173217684087248, + "eval_loss": 0.26824095845222473, + "eval_pr_auc": 0.6816317339832768, + "eval_precision": 0.7061595422416694, + "eval_precision_macro": 0.8230444354317887, + "eval_pred_class_0": 16697, + "eval_pred_class_1": 2971, + "eval_predicted_binding_ratio": 0.15105755541997154, + "eval_recall": 0.6765559496936472, + "eval_recall_macro": 0.8119304164475962, + "eval_runtime": 0.2652, + "eval_samples_per_second": 614.632, + "eval_steps_per_second": 3.771, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6578 + }, + { + "epoch": 254.0, + "eval_accuracy": 0.9044641041285336, + "eval_auc": 0.9299257349988078, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6913093477903729, + "eval_f1_macro": 0.8173981849782266, + "eval_loss": 0.2680823504924774, + "eval_pr_auc": 0.6815799614400636, + "eval_precision": 0.7046215673141326, + "eval_precision_macro": 0.8224282755645115, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6784908094163173, + "eval_recall_macro": 0.8126262219955371, + "eval_runtime": 0.1721, + "eval_samples_per_second": 947.131, + "eval_steps_per_second": 5.811, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6604 + }, + { + "epoch": 255.0, + "eval_accuracy": 0.9045657921496848, + "eval_auc": 0.9300487927156216, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6902129064202014, + "eval_f1_macro": 0.8169037907104764, + "eval_loss": 0.2682023346424103, + "eval_pr_auc": 0.6819970017971004, + "eval_precision": 0.7068965517241379, + "eval_precision_macro": 0.8232268515652408, + "eval_pred_class_0": 16710, + "eval_pred_class_1": 2958, + "eval_predicted_binding_ratio": 0.1503965832824893, + "eval_recall": 0.6742986133505321, + "eval_recall_macro": 0.8109828311516347, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.732, + "eval_steps_per_second": 3.925, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6630 + }, + { + "epoch": 256.0, + "eval_accuracy": 0.9046166361602603, + "eval_auc": 0.9300701263533355, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6908371786420567, + "eval_f1_macro": 0.8172233266061071, + "eval_loss": 0.26801708340644836, + "eval_pr_auc": 0.6822973012887885, + "eval_precision": 0.7064374789349511, + "eval_precision_macro": 0.8231307207859595, + "eval_pred_class_0": 16701, + "eval_pred_class_1": 2967, + "eval_predicted_binding_ratio": 0.15085417937766932, + "eval_recall": 0.6759109964527572, + "eval_recall_macro": 0.8116683007856833, + "eval_runtime": 0.2302, + "eval_samples_per_second": 708.13, + "eval_steps_per_second": 4.344, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6656 + }, + { + "epoch": 257.0, + "eval_accuracy": 0.9048708562131381, + "eval_auc": 0.9301923470752391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6911012052171042, + "eval_f1_macro": 0.8174397819709127, + "eval_loss": 0.26805296540260315, + "eval_pr_auc": 0.6828138126269635, + "eval_precision": 0.7080514208389715, + "eval_precision_macro": 0.8238677400987582, + "eval_pred_class_0": 16712, + "eval_pred_class_1": 2956, + "eval_predicted_binding_ratio": 0.1502948952613382, + "eval_recall": 0.6749435665914221, + "eval_recall_macro": 0.8114260296891438, + "eval_runtime": 0.2566, + "eval_samples_per_second": 635.189, + "eval_steps_per_second": 3.897, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6682 + }, + { + "epoch": 258.0, + "eval_accuracy": 0.9047691681919869, + "eval_auc": 0.9301935928351055, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6912806988626998, + "eval_f1_macro": 0.8174910212279173, + "eval_loss": 0.26811105012893677, + "eval_pr_auc": 0.6827059930276215, + "eval_precision": 0.7070128118678355, + "eval_precision_macro": 0.8234501252489699, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6762334730732021, + "eval_recall_macro": 0.8118899000544377, + "eval_runtime": 0.2455, + "eval_samples_per_second": 663.913, + "eval_steps_per_second": 4.073, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6708 + }, + { + "epoch": 259.0, + "eval_accuracy": 0.9049217002237137, + "eval_auc": 0.9302954726341887, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6913172664245626, + "eval_f1_macro": 0.817561992789179, + "eval_loss": 0.2680983245372772, + "eval_pr_auc": 0.6832649652047296, + "eval_precision": 0.7081501521812648, + "eval_precision_macro": 0.8239452215038332, + "eval_pred_class_0": 16711, + "eval_pred_class_1": 2957, + "eval_predicted_binding_ratio": 0.15034573927191378, + "eval_recall": 0.6752660432118671, + "eval_recall_macro": 0.8115872679993663, + "eval_runtime": 0.2456, + "eval_samples_per_second": 663.63, + "eval_steps_per_second": 4.071, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6734 + }, + { + "epoch": 260.0, + "eval_accuracy": 0.9048200122025626, + "eval_auc": 0.9302687082620565, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6920039486673247, + "eval_f1_macro": 0.8178583697873878, + "eval_loss": 0.2680213451385498, + "eval_pr_auc": 0.6832868902825516, + "eval_precision": 0.7064158548874706, + "eval_precision_macro": 0.8233115761166728, + "eval_pred_class_0": 16691, + "eval_pred_class_1": 2977, + "eval_predicted_binding_ratio": 0.15136261948342486, + "eval_recall": 0.6781683327958723, + "eval_recall_macro": 0.8127064275194428, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.797, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6760 + }, + { + "epoch": 261.0, + "eval_accuracy": 0.9049725442342892, + "eval_auc": 0.9303376240871719, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.691635043722158, + "eval_f1_macro": 0.817733374603378, + "eval_loss": 0.2681059241294861, + "eval_pr_auc": 0.6834436649713198, + "eval_precision": 0.7081081081081081, + "eval_precision_macro": 0.8239786410782342, + "eval_pred_class_0": 16708, + "eval_pred_class_1": 2960, + "eval_predicted_binding_ratio": 0.15049827130364044, + "eval_recall": 0.6759109964527572, + "eval_recall_macro": 0.8118795641405453, + "eval_runtime": 0.1861, + "eval_samples_per_second": 875.956, + "eval_steps_per_second": 5.374, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6786 + }, + { + "epoch": 262.0, + "eval_accuracy": 0.9049725442342892, + "eval_auc": 0.9303520963131211, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6920415224913494, + "eval_f1_macro": 0.8179298603528982, + "eval_loss": 0.2680259048938751, + "eval_pr_auc": 0.683712511498021, + "eval_precision": 0.7075471698113207, + "eval_precision_macro": 0.8238035250254208, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.6772009029345373, + "eval_recall_macro": 0.8124037954643712, + "eval_runtime": 0.2544, + "eval_samples_per_second": 640.771, + "eval_steps_per_second": 3.931, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6812 + }, + { + "epoch": 263.0, + "eval_accuracy": 0.9051759202765914, + "eval_auc": 0.9303956005834594, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.692700609655627, + "eval_f1_macro": 0.8183195235731167, + "eval_loss": 0.2679544985294342, + "eval_pr_auc": 0.6840791766505604, + "eval_precision": 0.7082210242587601, + "eval_precision_macro": 0.8242003324886615, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.6778458561754273, + "eval_recall_macro": 0.8127866330433483, + "eval_runtime": 0.224, + "eval_samples_per_second": 727.59, + "eval_steps_per_second": 4.464, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6838 + }, + { + "epoch": 264.0, + "eval_accuracy": 0.905328452308318, + "eval_auc": 0.9304257713302264, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6933465085638999, + "eval_f1_macro": 0.8186850387937344, + "eval_loss": 0.26790833473205566, + "eval_pr_auc": 0.6843453300290927, + "eval_precision": 0.70851565129586, + "eval_precision_macro": 0.8244321084532245, + "eval_pred_class_0": 16697, + "eval_pred_class_1": 2971, + "eval_predicted_binding_ratio": 0.15105755541997154, + "eval_recall": 0.6788132860367624, + "eval_recall_macro": 0.8132703479740159, + "eval_runtime": 0.2587, + "eval_samples_per_second": 630.122, + "eval_steps_per_second": 3.866, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6864 + }, + { + "epoch": 265.0, + "eval_accuracy": 0.905328452308318, + "eval_auc": 0.9304709495903853, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6934474810668423, + "eval_f1_macro": 0.8187338421508825, + "eval_loss": 0.26806485652923584, + "eval_pr_auc": 0.684236710788699, + "eval_precision": 0.7083753784056509, + "eval_precision_macro": 0.8243883480827296, + "eval_pred_class_0": 16695, + "eval_pred_class_1": 2973, + "eval_predicted_binding_ratio": 0.15115924344112264, + "eval_recall": 0.6791357626572073, + "eval_recall_macro": 0.8134014058049723, + "eval_runtime": 0.2519, + "eval_samples_per_second": 646.986, + "eval_steps_per_second": 3.969, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6890 + }, + { + "epoch": 266.0, + "eval_accuracy": 0.9054301403294692, + "eval_auc": 0.9305072226139984, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6937767533750412, + "eval_f1_macro": 0.8189285426426647, + "eval_loss": 0.2682030200958252, + "eval_pr_auc": 0.6841419811140891, + "eval_precision": 0.708711738984191, + "eval_precision_macro": 0.8245864774585525, + "eval_pred_class_0": 16695, + "eval_pred_class_1": 2973, + "eval_predicted_binding_ratio": 0.15115924344112264, + "eval_recall": 0.6794582392776524, + "eval_recall_macro": 0.8135928245944608, + "eval_runtime": 0.1928, + "eval_samples_per_second": 845.449, + "eval_steps_per_second": 5.187, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6916 + }, + { + "epoch": 267.0, + "eval_accuracy": 0.9054809843400448, + "eval_auc": 0.9304825799266392, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6942937016938003, + "eval_f1_macro": 0.8191961667392471, + "eval_loss": 0.2681148052215576, + "eval_pr_auc": 0.6840537331785722, + "eval_precision": 0.7083892617449664, + "eval_precision_macro": 0.824532598274209, + "eval_pred_class_0": 16688, + "eval_pred_class_1": 2980, + "eval_predicted_binding_ratio": 0.15151515151515152, + "eval_recall": 0.6807481457594324, + "eval_recall_macro": 0.8141472363975528, + "eval_runtime": 0.2454, + "eval_samples_per_second": 664.157, + "eval_steps_per_second": 4.075, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6942 + }, + { + "epoch": 268.0, + "eval_accuracy": 0.9057860484034981, + "eval_auc": 0.930527787384295, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6951801283105774, + "eval_f1_macro": 0.8197312675109731, + "eval_loss": 0.2679014503955841, + "eval_pr_auc": 0.6848281199608002, + "eval_precision": 0.7095366017461383, + "eval_precision_macro": 0.8251697388598875, + "eval_pred_class_0": 16690, + "eval_pred_class_1": 2978, + "eval_predicted_binding_ratio": 0.1514134634940004, + "eval_recall": 0.6813930990003225, + "eval_recall_macro": 0.8145904349350619, + "eval_runtime": 0.2174, + "eval_samples_per_second": 749.726, + "eval_steps_per_second": 4.6, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6968 + }, + { + "epoch": 269.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.9305513205667733, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6949375410913873, + "eval_f1_macro": 0.8195606747920547, + "eval_loss": 0.26782992482185364, + "eval_pr_auc": 0.6850252128050689, + "eval_precision": 0.7086825343613812, + "eval_precision_macro": 0.8247638023919581, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6817155756207675, + "eval_recall_macro": 0.8146309513282204, + "eval_runtime": 0.2304, + "eval_samples_per_second": 707.619, + "eval_steps_per_second": 4.341, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6994 + }, + { + "epoch": 269.2307692307692, + "grad_norm": 17604.1328125, + "learning_rate": 5.202671165416819e-07, + "loss": 0.2132, + "step": 7000 + }, + { + "epoch": 270.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.9305936180072407, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6950377916529741, + "eval_f1_macro": 0.8196091213903969, + "eval_loss": 0.2680298984050751, + "eval_pr_auc": 0.6846741481205671, + "eval_precision": 0.7085427135678392, + "eval_precision_macro": 0.8247203168031008, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6820380522412125, + "eval_recall_macro": 0.814762009159177, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.148, + "eval_steps_per_second": 3.829, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7020 + }, + { + "epoch": 271.0, + "eval_accuracy": 0.9054809843400448, + "eval_auc": 0.9306496966662317, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6938909929194796, + "eval_f1_macro": 0.8190015347004277, + "eval_loss": 0.2681294083595276, + "eval_pr_auc": 0.6847333752342389, + "eval_precision": 0.7089502018842531, + "eval_precision_macro": 0.8247074919339809, + "eval_pred_class_0": 16696, + "eval_pred_class_1": 2972, + "eval_predicted_binding_ratio": 0.15110839943054707, + "eval_recall": 0.6794582392776524, + "eval_recall_macro": 0.8136230050737269, + "eval_runtime": 0.1821, + "eval_samples_per_second": 895.335, + "eval_steps_per_second": 5.493, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7046 + }, + { + "epoch": 272.0, + "eval_accuracy": 0.9053792963188937, + "eval_auc": 0.9306815900653141, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6932586121641668, + "eval_f1_macro": 0.8186603259504293, + "eval_loss": 0.26823949813842773, + "eval_pr_auc": 0.6847385438827486, + "eval_precision": 0.7090357383681726, + "eval_precision_macro": 0.8246412077064189, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6781683327958723, + "eval_recall_macro": 0.8130384127913689, + "eval_runtime": 0.1793, + "eval_samples_per_second": 909.129, + "eval_steps_per_second": 5.577, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7072 + }, + { + "epoch": 273.0, + "eval_accuracy": 0.905328452308318, + "eval_auc": 0.9307212597310633, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6931443638760711, + "eval_f1_macro": 0.8185873316314347, + "eval_loss": 0.2681174576282501, + "eval_pr_auc": 0.6852124592316542, + "eval_precision": 0.7087967644084934, + "eval_precision_macro": 0.8245199318120546, + "eval_pred_class_0": 16701, + "eval_pred_class_1": 2967, + "eval_predicted_binding_ratio": 0.15085417937766932, + "eval_recall": 0.6781683327958723, + "eval_recall_macro": 0.8130082323121028, + "eval_runtime": 0.1693, + "eval_samples_per_second": 962.904, + "eval_steps_per_second": 5.907, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7098 + }, + { + "epoch": 274.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.9307878592214269, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6947368421052632, + "eval_f1_macro": 0.8194636820581644, + "eval_loss": 0.2679351270198822, + "eval_pr_auc": 0.685659065605018, + "eval_precision": 0.7089627391742196, + "eval_precision_macro": 0.8248510741829513, + "eval_pred_class_0": 16689, + "eval_pred_class_1": 2979, + "eval_predicted_binding_ratio": 0.15146430750457596, + "eval_recall": 0.6810706223798775, + "eval_recall_macro": 0.8143688356663075, + "eval_runtime": 0.2474, + "eval_samples_per_second": 658.933, + "eval_steps_per_second": 4.043, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7124 + }, + { + "epoch": 275.0, + "eval_accuracy": 0.905684360382347, + "eval_auc": 0.9308354511413273, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6945496459739832, + "eval_f1_macro": 0.8193909880953703, + "eval_loss": 0.2679973542690277, + "eval_pr_auc": 0.6858486839853987, + "eval_precision": 0.7096231493943472, + "eval_precision_macro": 0.8251038602745574, + "eval_pred_class_0": 16696, + "eval_pred_class_1": 2972, + "eval_predicted_binding_ratio": 0.15110839943054707, + "eval_recall": 0.6801031925185425, + "eval_recall_macro": 0.8140058426527039, + "eval_runtime": 0.1976, + "eval_samples_per_second": 825.097, + "eval_steps_per_second": 5.062, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7150 + }, + { + "epoch": 276.0, + "eval_accuracy": 0.9055826723611958, + "eval_auc": 0.9308506727696961, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.694320987654321, + "eval_f1_macro": 0.819244917025501, + "eval_loss": 0.26807495951652527, + "eval_pr_auc": 0.6856600891550617, + "eval_precision": 0.7091459314055144, + "eval_precision_macro": 0.8248616921913159, + "eval_pred_class_0": 16694, + "eval_pred_class_1": 2974, + "eval_predicted_binding_ratio": 0.1512100874516982, + "eval_recall": 0.6801031925185425, + "eval_recall_macro": 0.8139454816941719, + "eval_runtime": 0.2211, + "eval_samples_per_second": 737.174, + "eval_steps_per_second": 4.523, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7176 + }, + { + "epoch": 277.0, + "eval_accuracy": 0.9057352043929225, + "eval_auc": 0.9308575828439557, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6946640316205533, + "eval_f1_macro": 0.8194640504423113, + "eval_loss": 0.26810142397880554, + "eval_pr_auc": 0.6855507532370865, + "eval_precision": 0.709861999326826, + "eval_precision_macro": 0.8252250644654733, + "eval_pred_class_0": 16697, + "eval_pred_class_1": 2971, + "eval_predicted_binding_ratio": 0.15105755541997154, + "eval_recall": 0.6801031925185425, + "eval_recall_macro": 0.81403602313197, + "eval_runtime": 0.2072, + "eval_samples_per_second": 786.583, + "eval_steps_per_second": 4.826, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7202 + }, + { + "epoch": 278.0, + "eval_accuracy": 0.905684360382347, + "eval_auc": 0.9309145374278527, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6941467436108821, + "eval_f1_macro": 0.8191962415719043, + "eval_loss": 0.26818612217903137, + "eval_pr_auc": 0.6856281742604067, + "eval_precision": 0.7101889338731444, + "eval_precision_macro": 0.8252812485457677, + "eval_pred_class_0": 16704, + "eval_pred_class_1": 2964, + "eval_predicted_binding_ratio": 0.15070164734594266, + "eval_recall": 0.6788132860367624, + "eval_recall_macro": 0.813481611328878, + "eval_runtime": 0.1872, + "eval_samples_per_second": 870.561, + "eval_steps_per_second": 5.341, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7228 + }, + { + "epoch": 279.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.9309855846702396, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.69433465085639, + "eval_f1_macro": 0.8192692975301671, + "eval_loss": 0.2680346667766571, + "eval_pr_auc": 0.686232632159634, + "eval_precision": 0.7095254123190845, + "eval_precision_macro": 0.825026825462411, + "eval_pred_class_0": 16697, + "eval_pred_class_1": 2971, + "eval_predicted_binding_ratio": 0.15105755541997154, + "eval_recall": 0.6797807158980974, + "eval_recall_macro": 0.8138446043424814, + "eval_runtime": 0.241, + "eval_samples_per_second": 676.212, + "eval_steps_per_second": 4.149, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7254 + }, + { + "epoch": 280.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.930922634866985, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6957377049180328, + "eval_f1_macro": 0.8199473215888755, + "eval_loss": 0.26806843280792236, + "eval_pr_auc": 0.6857403383581059, + "eval_precision": 0.70756918972991, + "eval_precision_macro": 0.8244187060893835, + "eval_pred_class_0": 16669, + "eval_pred_class_1": 2999, + "eval_predicted_binding_ratio": 0.15248118771608704, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8156794139758723, + "eval_runtime": 0.1979, + "eval_samples_per_second": 823.751, + "eval_steps_per_second": 5.054, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7280 + }, + { + "epoch": 281.0, + "eval_accuracy": 0.9055826723611958, + "eval_auc": 0.9309180411274773, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6959227116423776, + "eval_f1_macro": 0.8200188959217034, + "eval_loss": 0.2680058181285858, + "eval_pr_auc": 0.6857513786045211, + "eval_precision": 0.7069194943446441, + "eval_precision_macro": 0.8241715464761271, + "eval_pred_class_0": 16662, + "eval_pred_class_1": 3006, + "eval_predicted_binding_ratio": 0.15283709579011592, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.8160424069894758, + "eval_runtime": 0.2526, + "eval_samples_per_second": 645.248, + "eval_steps_per_second": 3.959, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7306 + }, + { + "epoch": 282.0, + "eval_accuracy": 0.9055318283506203, + "eval_auc": 0.930996368279084, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6950098489822718, + "eval_f1_macro": 0.8195600321797414, + "eval_loss": 0.2679016888141632, + "eval_pr_auc": 0.6864832067617813, + "eval_precision": 0.7077900367769977, + "eval_precision_macro": 0.824393309448042, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6826830054821026, + "eval_recall_macro": 0.814963763862558, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.664, + "eval_steps_per_second": 3.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7332 + }, + { + "epoch": 283.0, + "eval_accuracy": 0.905684360382347, + "eval_auc": 0.9310520771031147, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.695152013147083, + "eval_f1_macro": 0.8196821086456596, + "eval_loss": 0.2679460644721985, + "eval_pr_auc": 0.6867185372000254, + "eval_precision": 0.7087801608579088, + "eval_precision_macro": 0.8248408116684653, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6820380522412125, + "eval_recall_macro": 0.8147921896384429, + "eval_runtime": 0.2003, + "eval_samples_per_second": 813.949, + "eval_steps_per_second": 4.994, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7358 + }, + { + "epoch": 284.0, + "eval_accuracy": 0.905684360382347, + "eval_auc": 0.931065877786636, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.695152013147083, + "eval_f1_macro": 0.8196821086456596, + "eval_loss": 0.26795604825019836, + "eval_pr_auc": 0.6868264245068394, + "eval_precision": 0.7087801608579088, + "eval_precision_macro": 0.8248408116684653, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6820380522412125, + "eval_recall_macro": 0.8147921896384429, + "eval_runtime": 0.2632, + "eval_samples_per_second": 619.283, + "eval_steps_per_second": 3.799, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7384 + }, + { + "epoch": 285.0, + "eval_accuracy": 0.9057352043929225, + "eval_auc": 0.9310428312291054, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696165191740413, + "eval_f1_macro": 0.8201894743681304, + "eval_loss": 0.2679717540740967, + "eval_pr_auc": 0.6865300625458848, + "eval_precision": 0.7077640786404532, + "eval_precision_macro": 0.8245726255085029, + "eval_pred_class_0": 16667, + "eval_pred_class_1": 3001, + "eval_predicted_binding_ratio": 0.15258287573723817, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8160018905963173, + "eval_runtime": 0.2365, + "eval_samples_per_second": 689.133, + "eval_steps_per_second": 4.228, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7410 + }, + { + "epoch": 286.0, + "eval_accuracy": 0.9054301403294692, + "eval_auc": 0.9311142093764568, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6946815495732108, + "eval_f1_macro": 0.8193658018591599, + "eval_loss": 0.2679450213909149, + "eval_pr_auc": 0.6869273130451355, + "eval_precision": 0.7074557004346372, + "eval_precision_macro": 0.8241961598653369, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6823605288616575, + "eval_recall_macro": 0.8147723450730694, + "eval_runtime": 0.1793, + "eval_samples_per_second": 908.844, + "eval_steps_per_second": 5.576, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7436 + }, + { + "epoch": 287.0, + "eval_accuracy": 0.9058368924140736, + "eval_auc": 0.9311615190538873, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.695695037791653, + "eval_f1_macro": 0.8199978948356761, + "eval_loss": 0.2679760158061981, + "eval_pr_auc": 0.6872164392300586, + "eval_precision": 0.7092127303182579, + "eval_precision_macro": 0.8251152664358777, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6826830054821026, + "eval_recall_macro": 0.8151448467381539, + "eval_runtime": 0.1776, + "eval_samples_per_second": 917.947, + "eval_steps_per_second": 5.632, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7462 + }, + { + "epoch": 288.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9311948528628156, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6959237343852729, + "eval_f1_macro": 0.8201439915761322, + "eval_loss": 0.26804664731025696, + "eval_pr_auc": 0.6873402512916988, + "eval_precision": 0.7096882333221589, + "eval_precision_macro": 0.8253565529811274, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6826830054821026, + "eval_recall_macro": 0.815205207696686, + "eval_runtime": 0.2679, + "eval_samples_per_second": 608.503, + "eval_steps_per_second": 3.733, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7488 + }, + { + "epoch": 288.46153846153845, + "grad_norm": 18250.5078125, + "learning_rate": 4.5321317063898914e-07, + "loss": 0.2101, + "step": 7500 + }, + { + "epoch": 289.0, + "eval_accuracy": 0.9058368924140736, + "eval_auc": 0.9312627857055362, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6958949096880132, + "eval_f1_macro": 0.8200944800500465, + "eval_loss": 0.26791396737098694, + "eval_pr_auc": 0.6879250403674073, + "eval_precision": 0.7089327534292406, + "eval_precision_macro": 0.8250281609942534, + "eval_pred_class_0": 16679, + "eval_pred_class_1": 2989, + "eval_predicted_binding_ratio": 0.1519727476103315, + "eval_recall": 0.6833279587229926, + "eval_recall_macro": 0.8154069624000669, + "eval_runtime": 0.2277, + "eval_samples_per_second": 715.925, + "eval_steps_per_second": 4.392, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7514 + }, + { + "epoch": 290.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9312241087546806, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696522309711286, + "eval_f1_macro": 0.8204332366847645, + "eval_loss": 0.26787513494491577, + "eval_pr_auc": 0.6878638814996979, + "eval_precision": 0.7088480801335559, + "eval_precision_macro": 0.8250951850316913, + "eval_pred_class_0": 16673, + "eval_pred_class_1": 2995, + "eval_predicted_binding_ratio": 0.15227781167378482, + "eval_recall": 0.6846178652047726, + "eval_recall_macro": 0.815991554682425, + "eval_runtime": 0.2212, + "eval_samples_per_second": 736.836, + "eval_steps_per_second": 4.52, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7540 + }, + { + "epoch": 291.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.9312868249779602, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6975369458128079, + "eval_f1_macro": 0.8210658921448086, + "eval_loss": 0.26807519793510437, + "eval_pr_auc": 0.6876156626538744, + "eval_precision": 0.7106055536968886, + "eval_precision_macro": 0.8260144502101566, + "eval_pred_class_0": 16679, + "eval_pred_class_1": 2989, + "eval_predicted_binding_ratio": 0.1519727476103315, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8163640563475095, + "eval_runtime": 0.2581, + "eval_samples_per_second": 631.522, + "eval_steps_per_second": 3.874, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7566 + }, + { + "epoch": 292.0, + "eval_accuracy": 0.9061928004881025, + "eval_auc": 0.9313246454689077, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6971935007385525, + "eval_f1_macro": 0.8208465473190101, + "eval_loss": 0.26810526847839355, + "eval_pr_auc": 0.6877564948921628, + "eval_precision": 0.7098930481283422, + "eval_precision_macro": 0.8256529284776994, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8162735149097116, + "eval_runtime": 0.206, + "eval_samples_per_second": 791.322, + "eval_steps_per_second": 4.855, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7592 + }, + { + "epoch": 293.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9313187962370344, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6969647251845775, + "eval_f1_macro": 0.8207004065741184, + "eval_loss": 0.26795074343681335, + "eval_pr_auc": 0.6879453119558532, + "eval_precision": 0.7094188376753507, + "eval_precision_macro": 0.8254123095657551, + "eval_pred_class_0": 16674, + "eval_pred_class_1": 2994, + "eval_predicted_binding_ratio": 0.15222696766320928, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8162131539511794, + "eval_runtime": 0.2264, + "eval_samples_per_second": 719.954, + "eval_steps_per_second": 4.417, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7618 + }, + { + "epoch": 294.0, + "eval_accuracy": 0.9061419564775269, + "eval_auc": 0.9313842959550158, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966809069996713, + "eval_f1_macro": 0.820581055003595, + "eval_loss": 0.2678394019603729, + "eval_pr_auc": 0.6884837475836854, + "eval_precision": 0.7102177554438861, + "eval_precision_macro": 0.8257076908850431, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8157191031066195, + "eval_runtime": 0.1788, + "eval_samples_per_second": 911.642, + "eval_steps_per_second": 5.593, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7644 + }, + { + "epoch": 295.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9314417858263554, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696337658071933, + "eval_f1_macro": 0.8203618088536944, + "eval_loss": 0.26788315176963806, + "eval_pr_auc": 0.6885944465147925, + "eval_precision": 0.7095046854082999, + "eval_precision_macro": 0.8253458678840061, + "eval_pred_class_0": 16680, + "eval_pred_class_1": 2988, + "eval_predicted_binding_ratio": 0.15192190359975594, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8156285616688215, + "eval_runtime": 0.2717, + "eval_samples_per_second": 599.832, + "eval_steps_per_second": 3.68, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7670 + }, + { + "epoch": 296.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9314500389854711, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696636587366694, + "eval_f1_macro": 0.8205062543343502, + "eval_loss": 0.2678987681865692, + "eval_pr_auc": 0.6885305487751676, + "eval_precision": 0.7090848363393454, + "eval_precision_macro": 0.8252153220919469, + "eval_pred_class_0": 16674, + "eval_pred_class_1": 2994, + "eval_predicted_binding_ratio": 0.15222696766320928, + "eval_recall": 0.6846178652047726, + "eval_recall_macro": 0.816021735161691, + "eval_runtime": 0.2551, + "eval_samples_per_second": 639.002, + "eval_steps_per_second": 3.92, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7696 + }, + { + "epoch": 297.0, + "eval_accuracy": 0.9061928004881025, + "eval_auc": 0.931550575699698, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965959546127282, + "eval_f1_macro": 0.8205577878611678, + "eval_loss": 0.2679198086261749, + "eval_pr_auc": 0.6889351423284887, + "eval_precision": 0.710738255033557, + "eval_precision_macro": 0.8259168264621285, + "eval_pred_class_0": 16688, + "eval_pred_class_1": 2980, + "eval_predicted_binding_ratio": 0.15151515151515152, + "eval_recall": 0.6830054821025475, + "eval_recall_macro": 0.8154871679239726, + "eval_runtime": 0.2409, + "eval_samples_per_second": 676.584, + "eval_steps_per_second": 4.151, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7722 + }, + { + "epoch": 298.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9316071409836368, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6964256300444738, + "eval_f1_macro": 0.82051102635547, + "eval_loss": 0.2680239677429199, + "eval_pr_auc": 0.6888956424322248, + "eval_precision": 0.7117845117845117, + "eval_precision_macro": 0.8263378182350514, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.6817155756207675, + "eval_recall_macro": 0.8150232975586785, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.263, + "eval_steps_per_second": 3.842, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7748 + }, + { + "epoch": 299.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9315966104197652, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6972235912600624, + "eval_f1_macro": 0.8208966763783243, + "eval_loss": 0.26803261041641235, + "eval_pr_auc": 0.6887732742755047, + "eval_precision": 0.7106496985934361, + "eval_precision_macro": 0.8259818448607991, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8160717602063305, + "eval_runtime": 0.2484, + "eval_samples_per_second": 656.12, + "eval_steps_per_second": 4.025, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7774 + }, + { + "epoch": 300.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9315960653998236, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966218432272876, + "eval_f1_macro": 0.8204813289787078, + "eval_loss": 0.26792290806770325, + "eval_pr_auc": 0.6890570542847262, + "eval_precision": 0.7087087087087087, + "eval_precision_macro": 0.8250519729735134, + "eval_pred_class_0": 16671, + "eval_pred_class_1": 2997, + "eval_predicted_binding_ratio": 0.15237949969493594, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8161226125133815, + "eval_runtime": 0.2626, + "eval_samples_per_second": 620.694, + "eval_steps_per_second": 3.808, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7800 + }, + { + "epoch": 301.0, + "eval_accuracy": 0.9061928004881025, + "eval_auc": 0.9316357350655727, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966957093539372, + "eval_f1_macro": 0.8206059967994839, + "eval_loss": 0.26809555292129517, + "eval_pr_auc": 0.6889378655811479, + "eval_precision": 0.710596914822267, + "eval_precision_macro": 0.8258725914156881, + "eval_pred_class_0": 16686, + "eval_pred_class_1": 2982, + "eval_predicted_binding_ratio": 0.15161683953630262, + "eval_recall": 0.6833279587229926, + "eval_recall_macro": 0.815618225754929, + "eval_runtime": 0.2178, + "eval_samples_per_second": 748.303, + "eval_steps_per_second": 4.591, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7826 + }, + { + "epoch": 302.0, + "eval_accuracy": 0.9061419564775269, + "eval_auc": 0.9317000084886855, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965811965811965, + "eval_f1_macro": 0.8205328694321837, + "eval_loss": 0.26798245310783386, + "eval_pr_auc": 0.6894273728032447, + "eval_precision": 0.7103586992960107, + "eval_precision_macro": 0.8257517200405735, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6833279587229926, + "eval_recall_macro": 0.815588045275663, + "eval_runtime": 0.2561, + "eval_samples_per_second": 636.477, + "eval_steps_per_second": 3.905, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7852 + }, + { + "epoch": 303.0, + "eval_accuracy": 0.9060402684563759, + "eval_auc": 0.9316820812256066, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966513460275772, + "eval_f1_macro": 0.8205311837826491, + "eval_loss": 0.26780617237091064, + "eval_pr_auc": 0.6897785082602487, + "eval_precision": 0.7094617184887997, + "eval_precision_macro": 0.8253790573615671, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8159208578100005, + "eval_runtime": 0.1902, + "eval_samples_per_second": 856.826, + "eval_steps_per_second": 5.257, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7878 + }, + { + "epoch": 304.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9317363691047894, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6963230466185161, + "eval_f1_macro": 0.8203369534620676, + "eval_loss": 0.26783081889152527, + "eval_pr_auc": 0.690108350201664, + "eval_precision": 0.7091273821464393, + "eval_precision_macro": 0.8251819077788622, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815729439020512, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.745, + "eval_steps_per_second": 3.784, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7904 + }, + { + "epoch": 305.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9318277475374976, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6962670613385956, + "eval_f1_macro": 0.8203632705580364, + "eval_loss": 0.267810195684433, + "eval_pr_auc": 0.6906198234983021, + "eval_precision": 0.7104026845637584, + "eval_precision_macro": 0.82571907957814, + "eval_pred_class_0": 16688, + "eval_pred_class_1": 2980, + "eval_predicted_binding_ratio": 0.15151515151515152, + "eval_recall": 0.6826830054821026, + "eval_recall_macro": 0.8152957491344841, + "eval_runtime": 0.2457, + "eval_samples_per_second": 663.513, + "eval_steps_per_second": 4.071, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7930 + }, + { + "epoch": 306.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9318495094051658, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965664530967636, + "eval_f1_macro": 0.8205079551116469, + "eval_loss": 0.26770758628845215, + "eval_pr_auc": 0.6907229577841941, + "eval_precision": 0.709979906229069, + "eval_precision_macro": 0.8255870038278783, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8156889226273535, + "eval_runtime": 0.2472, + "eval_samples_per_second": 659.459, + "eval_steps_per_second": 4.046, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7956 + }, + { + "epoch": 307.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.9318431735483448, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6973381531383503, + "eval_f1_macro": 0.8209698284488745, + "eval_loss": 0.26784345507621765, + "eval_pr_auc": 0.6903028236900399, + "eval_precision": 0.7108877721943049, + "eval_precision_macro": 0.8261026405178202, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8161019406855965, + "eval_runtime": 0.2339, + "eval_samples_per_second": 696.845, + "eval_steps_per_second": 4.275, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7982 + }, + { + "epoch": 307.6923076923077, + "grad_norm": 18753.48046875, + "learning_rate": 3.8700127731844033e-07, + "loss": 0.2071, + "step": 8000 + }, + { + "epoch": 308.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9318915635331595, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966661192314009, + "eval_f1_macro": 0.8205561173351938, + "eval_loss": 0.2679731547832489, + "eval_pr_auc": 0.6902808443840289, + "eval_precision": 0.7098393574297188, + "eval_precision_macro": 0.8255431799139001, + "eval_pred_class_0": 16680, + "eval_pred_class_1": 2988, + "eval_predicted_binding_ratio": 0.15192190359975594, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.81581998045831, + "eval_runtime": 0.2227, + "eval_samples_per_second": 731.952, + "eval_steps_per_second": 4.491, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8008 + }, + { + "epoch": 309.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9319105808361218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6975217462662071, + "eval_f1_macro": 0.821040751603759, + "eval_loss": 0.2678290605545044, + "eval_pr_auc": 0.6904266123808676, + "eval_precision": 0.7102272727272727, + "eval_precision_macro": 0.8258500239865676, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.8164649336992, + "eval_runtime": 0.2365, + "eval_samples_per_second": 689.086, + "eval_steps_per_second": 4.228, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8034 + }, + { + "epoch": 310.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9319563819762139, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6977201902575036, + "eval_f1_macro": 0.821136637744354, + "eval_loss": 0.2677942216396332, + "eval_pr_auc": 0.6905770539125178, + "eval_precision": 0.7099465954606141, + "eval_precision_macro": 0.8257626451391362, + "eval_pred_class_0": 16672, + "eval_pred_class_1": 2996, + "eval_predicted_binding_ratio": 0.15232865568436038, + "eval_recall": 0.6859077716865527, + "eval_recall_macro": 0.816727049361113, + "eval_runtime": 0.2422, + "eval_samples_per_second": 672.991, + "eval_steps_per_second": 4.129, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8060 + }, + { + "epoch": 311.0, + "eval_accuracy": 0.9064470205409803, + "eval_auc": 0.9320261250637406, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6975673898750822, + "eval_f1_macro": 0.8211161862162611, + "eval_loss": 0.2678627669811249, + "eval_pr_auc": 0.6908896136415948, + "eval_precision": 0.7113643982567884, + "eval_precision_macro": 0.8263444706297427, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8161623016441286, + "eval_runtime": 0.264, + "eval_samples_per_second": 617.505, + "eval_steps_per_second": 3.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8086 + }, + { + "epoch": 312.0, + "eval_accuracy": 0.9060402684563759, + "eval_auc": 0.9320056478859348, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696551724137931, + "eval_f1_macro": 0.8204830448879512, + "eval_loss": 0.2678248882293701, + "eval_pr_auc": 0.6908301522653787, + "eval_precision": 0.7096018735362998, + "eval_precision_macro": 0.8254226766806146, + "eval_pred_class_0": 16679, + "eval_pred_class_1": 2989, + "eval_predicted_binding_ratio": 0.1519727476103315, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815789799979044, + "eval_runtime": 0.2616, + "eval_samples_per_second": 623.086, + "eval_steps_per_second": 3.823, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8112 + }, + { + "epoch": 313.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9319988935316585, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965370096832431, + "eval_f1_macro": 0.8204581387495119, + "eval_loss": 0.2678254544734955, + "eval_pr_auc": 0.6908227024589884, + "eval_precision": 0.7092245989304813, + "eval_precision_macro": 0.8252587374599636, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8158906773307344, + "eval_runtime": 0.2589, + "eval_samples_per_second": 629.545, + "eval_steps_per_second": 3.862, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8138 + }, + { + "epoch": 314.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9320198184044164, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6971634694212165, + "eval_f1_macro": 0.8207964351950081, + "eval_loss": 0.2678711414337158, + "eval_pr_auc": 0.6909501280556708, + "eval_precision": 0.7091394262841895, + "eval_precision_macro": 0.8253255619723288, + "eval_pred_class_0": 16670, + "eval_pred_class_1": 2998, + "eval_predicted_binding_ratio": 0.15243034370551148, + "eval_recall": 0.6855852950661077, + "eval_recall_macro": 0.8164752696130925, + "eval_runtime": 0.2654, + "eval_samples_per_second": 614.201, + "eval_steps_per_second": 3.768, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8164 + }, + { + "epoch": 315.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9320753325784678, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6963230466185161, + "eval_f1_macro": 0.8203369534620676, + "eval_loss": 0.2678229808807373, + "eval_pr_auc": 0.6914105766155315, + "eval_precision": 0.7091273821464393, + "eval_precision_macro": 0.8251819077788622, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815729439020512, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.375, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8190 + }, + { + "epoch": 316.0, + "eval_accuracy": 0.9061928004881025, + "eval_auc": 0.9321298151076297, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6967953985209532, + "eval_f1_macro": 0.8206541727499956, + "eval_loss": 0.26796844601631165, + "eval_pr_auc": 0.6912288183485439, + "eval_precision": 0.710455764075067, + "eval_precision_macro": 0.8258284574391159, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8157492835858855, + "eval_runtime": 0.2237, + "eval_samples_per_second": 728.567, + "eval_steps_per_second": 4.47, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8216 + }, + { + "epoch": 317.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9321722877330785, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6956235603817045, + "eval_f1_macro": 0.8199989231339035, + "eval_loss": 0.2681267261505127, + "eval_pr_auc": 0.6912998271961284, + "eval_precision": 0.7101108498488411, + "eval_precision_macro": 0.8254885924997606, + "eval_pred_class_0": 16691, + "eval_pred_class_1": 2977, + "eval_predicted_binding_ratio": 0.15136261948342486, + "eval_recall": 0.6817155756207675, + "eval_recall_macro": 0.8148120342038165, + "eval_runtime": 0.1877, + "eval_samples_per_second": 868.412, + "eval_steps_per_second": 5.328, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8242 + }, + { + "epoch": 318.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9321751880177678, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965664530967636, + "eval_f1_macro": 0.8205079551116469, + "eval_loss": 0.26805615425109863, + "eval_pr_auc": 0.6913470747613989, + "eval_precision": 0.709979906229069, + "eval_precision_macro": 0.8255870038278783, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8156889226273535, + "eval_runtime": 0.2426, + "eval_samples_per_second": 671.932, + "eval_steps_per_second": 4.122, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8268 + }, + { + "epoch": 319.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9321843949617812, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6961380443714051, + "eval_f1_macro": 0.8202653471082613, + "eval_loss": 0.267932653427124, + "eval_pr_auc": 0.6916487257323465, + "eval_precision": 0.7097855227882037, + "eval_precision_macro": 0.8254333991308556, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6830054821025475, + "eval_recall_macro": 0.8153664460069084, + "eval_runtime": 0.2569, + "eval_samples_per_second": 634.38, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8294 + }, + { + "epoch": 320.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.932188093311385, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6964373666064686, + "eval_f1_macro": 0.8204099902666875, + "eval_loss": 0.2679634094238281, + "eval_pr_auc": 0.6915655068510385, + "eval_precision": 0.7093645484949833, + "eval_precision_macro": 0.8253022526621696, + "eval_pred_class_0": 16678, + "eval_pred_class_1": 2990, + "eval_predicted_binding_ratio": 0.15202359162090706, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815759619499778, + "eval_runtime": 0.2483, + "eval_samples_per_second": 656.448, + "eval_steps_per_second": 4.027, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8320 + }, + { + "epoch": 321.0, + "eval_accuracy": 0.9058877364246491, + "eval_auc": 0.9321906237611137, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965076242006887, + "eval_f1_macro": 0.8204083386821658, + "eval_loss": 0.26790735125541687, + "eval_pr_auc": 0.6917736045731879, + "eval_precision": 0.7084723148765844, + "eval_precision_macro": 0.8249320182661266, + "eval_pred_class_0": 16670, + "eval_pred_class_1": 2998, + "eval_predicted_binding_ratio": 0.15243034370551148, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8160924320341154, + "eval_runtime": 0.2593, + "eval_samples_per_second": 628.581, + "eval_steps_per_second": 3.856, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8346 + }, + { + "epoch": 322.0, + "eval_accuracy": 0.9058368924140736, + "eval_auc": 0.932275423024527, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6955950032873109, + "eval_f1_macro": 0.8199495526481063, + "eval_loss": 0.2680682837963104, + "eval_pr_auc": 0.6919499023976591, + "eval_precision": 0.709353000335233, + "eval_precision_macro": 0.8251589694514043, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6823605288616575, + "eval_recall_macro": 0.8150137889071974, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.703, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8372 + }, + { + "epoch": 323.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9322442887603632, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965370096832431, + "eval_f1_macro": 0.8204581387495119, + "eval_loss": 0.26799651980400085, + "eval_pr_auc": 0.6918309607756195, + "eval_precision": 0.7092245989304813, + "eval_precision_macro": 0.8252587374599636, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8158906773307344, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.368, + "eval_steps_per_second": 3.965, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8398 + }, + { + "epoch": 324.0, + "eval_accuracy": 0.9058368924140736, + "eval_auc": 0.9322356073712934, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6960945191992124, + "eval_f1_macro": 0.8201909332654507, + "eval_loss": 0.2680003046989441, + "eval_pr_auc": 0.6918708088537314, + "eval_precision": 0.7086535248914133, + "eval_precision_macro": 0.8249414550993799, + "eval_pred_class_0": 16675, + "eval_pred_class_1": 2993, + "eval_predicted_binding_ratio": 0.15217612365263372, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.81566907806198, + "eval_runtime": 0.1979, + "eval_samples_per_second": 823.473, + "eval_steps_per_second": 5.052, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8424 + }, + { + "epoch": 325.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9322670238779269, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6961235216819974, + "eval_f1_macro": 0.8202405385118361, + "eval_loss": 0.26794886589050293, + "eval_pr_auc": 0.6921960622354616, + "eval_precision": 0.7094074322062269, + "eval_precision_macro": 0.8252690299332196, + "eval_pred_class_0": 16681, + "eval_pred_class_1": 2987, + "eval_predicted_binding_ratio": 0.1518710595891804, + "eval_recall": 0.6833279587229926, + "eval_recall_macro": 0.815467323358599, + "eval_runtime": 0.2683, + "eval_samples_per_second": 607.5, + "eval_steps_per_second": 3.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8450 + }, + { + "epoch": 326.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9322930096501425, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6963230466185161, + "eval_f1_macro": 0.8203369534620676, + "eval_loss": 0.2680268883705139, + "eval_pr_auc": 0.6921477872574622, + "eval_precision": 0.7091273821464393, + "eval_precision_macro": 0.8251819077788622, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815729439020512, + "eval_runtime": 0.1733, + "eval_samples_per_second": 940.367, + "eval_steps_per_second": 5.769, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8476 + }, + { + "epoch": 326.9230769230769, + "grad_norm": 17241.076171875, + "learning_rate": 3.2282309449959705e-07, + "loss": 0.2047, + "step": 8500 + }, + { + "epoch": 327.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9323445529646195, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6967657199146281, + "eval_f1_macro": 0.820604246632002, + "eval_loss": 0.26804205775260925, + "eval_pr_auc": 0.6924439463998024, + "eval_precision": 0.7096989966555184, + "eval_precision_macro": 0.8254994563562998, + "eval_pred_class_0": 16678, + "eval_pred_class_1": 2990, + "eval_predicted_binding_ratio": 0.15202359162090706, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8159510382892665, + "eval_runtime": 0.1771, + "eval_samples_per_second": 920.47, + "eval_steps_per_second": 5.647, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8502 + }, + { + "epoch": 328.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9323743441439272, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6967657199146281, + "eval_f1_macro": 0.820604246632002, + "eval_loss": 0.26802849769592285, + "eval_pr_auc": 0.6925977669253861, + "eval_precision": 0.7096989966555184, + "eval_precision_macro": 0.8254994563562998, + "eval_pred_class_0": 16678, + "eval_pred_class_1": 2990, + "eval_predicted_binding_ratio": 0.15202359162090706, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8159510382892665, + "eval_runtime": 0.1678, + "eval_samples_per_second": 971.421, + "eval_steps_per_second": 5.96, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8528 + }, + { + "epoch": 329.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.9323637649175607, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6983295119554537, + "eval_f1_macro": 0.8214488366277419, + "eval_loss": 0.2680566608905792, + "eval_pr_auc": 0.6924500045026715, + "eval_precision": 0.7094841930116472, + "eval_precision_macro": 0.825665699698526, + "eval_pred_class_0": 16663, + "eval_pred_class_1": 3005, + "eval_predicted_binding_ratio": 0.15278625177954036, + "eval_recall": 0.6875201547887778, + "eval_recall_macro": 0.8174125189951615, + "eval_runtime": 0.2683, + "eval_samples_per_second": 607.54, + "eval_steps_per_second": 3.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8554 + }, + { + "epoch": 330.0, + "eval_accuracy": 0.906243644498678, + "eval_auc": 0.9324242037360844, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6974072858549393, + "eval_f1_macro": 0.8209676462967013, + "eval_loss": 0.26804018020629883, + "eval_pr_auc": 0.6928556723686659, + "eval_precision": 0.7099899766120948, + "eval_precision_macro": 0.8257296209897056, + "eval_pred_class_0": 16675, + "eval_pred_class_1": 2993, + "eval_predicted_binding_ratio": 0.15217612365263372, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.8164347532199341, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.585, + "eval_steps_per_second": 3.869, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8580 + }, + { + "epoch": 331.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9323838917254041, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6981162981162982, + "eval_f1_macro": 0.8213280175544326, + "eval_loss": 0.2678423821926117, + "eval_pr_auc": 0.6929480254197629, + "eval_precision": 0.7093874833555259, + "eval_precision_macro": 0.8255890849326837, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6871976781683328, + "eval_recall_macro": 0.817251280684939, + "eval_runtime": 0.2474, + "eval_samples_per_second": 658.814, + "eval_steps_per_second": 4.042, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8606 + }, + { + "epoch": 332.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9323530591687079, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6993784756297023, + "eval_f1_macro": 0.8220268454242666, + "eval_loss": 0.26779934763908386, + "eval_pr_auc": 0.6928104051729911, + "eval_precision": 0.7095917690009956, + "eval_precision_macro": 0.8258856473344816, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183195878979646, + "eval_runtime": 0.2656, + "eval_samples_per_second": 613.815, + "eval_steps_per_second": 3.766, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8632 + }, + { + "epoch": 333.0, + "eval_accuracy": 0.9063961765304047, + "eval_auc": 0.932432009200248, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6983450761920367, + "eval_f1_macro": 0.8214741659959972, + "eval_loss": 0.2679860591888428, + "eval_pr_auc": 0.6929996618872077, + "eval_precision": 0.7098600932711525, + "eval_precision_macro": 0.8258288825890143, + "eval_pred_class_0": 16666, + "eval_pred_class_1": 3002, + "eval_predicted_binding_ratio": 0.1526337197478137, + "eval_recall": 0.6871976781683328, + "eval_recall_macro": 0.817311641643471, + "eval_runtime": 0.1802, + "eval_samples_per_second": 904.69, + "eval_steps_per_second": 5.55, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8658 + }, + { + "epoch": 334.0, + "eval_accuracy": 0.9063961765304047, + "eval_auc": 0.9325279911049631, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6975521603417119, + "eval_f1_macro": 0.8210910370116632, + "eval_loss": 0.26808273792266846, + "eval_pr_auc": 0.6933573725062909, + "eval_precision": 0.7109845947756196, + "eval_precision_macro": 0.8261792653772595, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6846178652047726, + "eval_recall_macro": 0.816263178995819, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.721, + "eval_steps_per_second": 3.827, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8684 + }, + { + "epoch": 335.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.932545276023111, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6976362442547603, + "eval_f1_macro": 0.8211138747443938, + "eval_loss": 0.26816996932029724, + "eval_pr_auc": 0.6932853419412803, + "eval_precision": 0.710464727515881, + "eval_precision_macro": 0.8259705061096825, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.816495114178466, + "eval_runtime": 0.254, + "eval_samples_per_second": 641.649, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8710 + }, + { + "epoch": 336.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9325305604846879, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6982772764561116, + "eval_f1_macro": 0.821477015533191, + "eval_loss": 0.2680180072784424, + "eval_pr_auc": 0.6935243427052671, + "eval_precision": 0.7107548430193721, + "eval_precision_macro": 0.8262002594609874, + "eval_pred_class_0": 16674, + "eval_pred_class_1": 2994, + "eval_predicted_binding_ratio": 0.15222696766320928, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8169788291091336, + "eval_runtime": 0.264, + "eval_samples_per_second": 617.515, + "eval_steps_per_second": 3.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8736 + }, + { + "epoch": 337.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.932509878924404, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.698032786885246, + "eval_f1_macro": 0.8213054775682699, + "eval_loss": 0.2678954601287842, + "eval_pr_auc": 0.6935428752602653, + "eval_precision": 0.7099033011003668, + "eval_precision_macro": 0.8257957323787274, + "eval_pred_class_0": 16669, + "eval_pred_class_1": 2999, + "eval_predicted_binding_ratio": 0.15248118771608704, + "eval_recall": 0.6865527249274428, + "eval_recall_macro": 0.817019345502292, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.491, + "eval_steps_per_second": 3.825, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8762 + }, + { + "epoch": 338.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9325608674864403, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6989176779271893, + "eval_f1_macro": 0.8218398486513014, + "eval_loss": 0.26806220412254333, + "eval_pr_auc": 0.6935170525264562, + "eval_precision": 0.7110443777110443, + "eval_precision_macro": 0.8264297528888735, + "eval_pred_class_0": 16671, + "eval_pred_class_1": 2997, + "eval_predicted_binding_ratio": 0.15237949969493594, + "eval_recall": 0.6871976781683328, + "eval_recall_macro": 0.8174625440398011, + "eval_runtime": 0.1767, + "eval_samples_per_second": 922.3, + "eval_steps_per_second": 5.658, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8788 + }, + { + "epoch": 339.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9325807801793065, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6990163934426229, + "eval_f1_macro": 0.8218875444165816, + "eval_loss": 0.26803991198539734, + "eval_pr_auc": 0.6936355510877774, + "eval_precision": 0.7109036345448483, + "eval_precision_macro": 0.8263858865027319, + "eval_pred_class_0": 16669, + "eval_pred_class_1": 2999, + "eval_predicted_binding_ratio": 0.15248118771608704, + "eval_recall": 0.6875201547887778, + "eval_recall_macro": 0.8175936018707576, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.235, + "eval_steps_per_second": 3.891, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8814 + }, + { + "epoch": 340.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9326051308916972, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6983762506150566, + "eval_f1_macro": 0.8215248382050282, + "eval_loss": 0.26792144775390625, + "eval_pr_auc": 0.6939264625162457, + "eval_precision": 0.7106141522029372, + "eval_precision_macro": 0.8261564043164398, + "eval_pred_class_0": 16672, + "eval_pred_class_1": 2996, + "eval_predicted_binding_ratio": 0.15232865568436038, + "eval_recall": 0.6865527249274428, + "eval_recall_macro": 0.8171098869400901, + "eval_runtime": 0.2744, + "eval_samples_per_second": 593.925, + "eval_steps_per_second": 3.644, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8840 + }, + { + "epoch": 341.0, + "eval_accuracy": 0.9063961765304047, + "eval_auc": 0.9325905710732574, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6980482204362801, + "eval_f1_macro": 0.821330737974691, + "eval_loss": 0.26793381571769714, + "eval_pr_auc": 0.6938155556406347, + "eval_precision": 0.7102803738317757, + "eval_precision_macro": 0.825959524727788, + "eval_pred_class_0": 16672, + "eval_pred_class_1": 2996, + "eval_predicted_binding_ratio": 0.15232865568436038, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8169184681506015, + "eval_runtime": 0.2601, + "eval_samples_per_second": 626.704, + "eval_steps_per_second": 3.845, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8866 + }, + { + "epoch": 342.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9325911160931989, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997542997542997, + "eval_f1_macro": 0.8222974802915219, + "eval_loss": 0.26800957322120667, + "eval_pr_auc": 0.6936597261010234, + "eval_precision": 0.711051930758988, + "eval_precision_macro": 0.8265713326382553, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8182083746323816, + "eval_runtime": 0.1766, + "eval_samples_per_second": 923.118, + "eval_steps_per_second": 5.663, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8892 + }, + { + "epoch": 343.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9326288587241547, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6982772764561116, + "eval_f1_macro": 0.821477015533191, + "eval_loss": 0.2680290937423706, + "eval_pr_auc": 0.6938544134850919, + "eval_precision": 0.7107548430193721, + "eval_precision_macro": 0.8262002594609874, + "eval_pred_class_0": 16674, + "eval_pred_class_1": 2994, + "eval_predicted_binding_ratio": 0.15222696766320928, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8169788291091336, + "eval_runtime": 0.2553, + "eval_samples_per_second": 638.353, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8918 + }, + { + "epoch": 344.0, + "eval_accuracy": 0.9064470205409803, + "eval_auc": 0.9326493359019604, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6982617251557888, + "eval_f1_macro": 0.8214517001734176, + "eval_loss": 0.2680736482143402, + "eval_pr_auc": 0.6938105747317047, + "eval_precision": 0.710377043710377, + "eval_precision_macro": 0.8260361014844849, + "eval_pred_class_0": 16671, + "eval_pred_class_1": 2997, + "eval_predicted_binding_ratio": 0.15237949969493594, + "eval_recall": 0.6865527249274428, + "eval_recall_macro": 0.8170797064608241, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.605, + "eval_steps_per_second": 3.869, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8944 + }, + { + "epoch": 345.0, + "eval_accuracy": 0.9063961765304047, + "eval_auc": 0.9326923535473509, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6977507798391068, + "eval_f1_macro": 0.8211870157279457, + "eval_loss": 0.2681424021720886, + "eval_pr_auc": 0.6938928037843046, + "eval_precision": 0.7107023411371237, + "eval_precision_macro": 0.8260910674386901, + "eval_pred_class_0": 16678, + "eval_pred_class_1": 2990, + "eval_predicted_binding_ratio": 0.15202359162090706, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.816525294657732, + "eval_runtime": 0.2453, + "eval_samples_per_second": 664.523, + "eval_steps_per_second": 4.077, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8970 + }, + { + "epoch": 346.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9327347288478101, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6979799638692724, + "eval_f1_macro": 0.821333351261192, + "eval_loss": 0.2682046592235565, + "eval_pr_auc": 0.6939995353512864, + "eval_precision": 0.7111780455153949, + "eval_precision_macro": 0.8263324280334768, + "eval_pred_class_0": 16680, + "eval_pred_class_1": 2988, + "eval_predicted_binding_ratio": 0.15192190359975594, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.8165856556162641, + "eval_runtime": 0.2656, + "eval_samples_per_second": 613.642, + "eval_steps_per_second": 3.765, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8996 + }, + { + "epoch": 346.15384615384613, + "grad_norm": 18666.783203125, + "learning_rate": 2.618336781094791e-07, + "loss": 0.2031, + "step": 9000 + }, + { + "epoch": 347.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9326942611171465, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6983139630054018, + "eval_f1_macro": 0.8214235117341392, + "eval_loss": 0.2681069076061249, + "eval_pr_auc": 0.6938781737586003, + "eval_precision": 0.7091090425531915, + "eval_precision_macro": 0.8255029006283365, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6878426314092229, + "eval_recall_macro": 0.817513396346852, + "eval_runtime": 0.1933, + "eval_samples_per_second": 843.456, + "eval_steps_per_second": 5.175, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9022 + }, + { + "epoch": 348.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9326940859321652, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6992961204779833, + "eval_f1_macro": 0.8220048784892098, + "eval_loss": 0.26795056462287903, + "eval_pr_auc": 0.6940722215736992, + "eval_precision": 0.7101063829787234, + "eval_precision_macro": 0.8260916068555082, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8180876527153176, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.287, + "eval_steps_per_second": 3.842, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9048 + }, + { + "epoch": 349.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9327572303853989, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991310050828005, + "eval_f1_macro": 0.8219607247335354, + "eval_loss": 0.267974317073822, + "eval_pr_auc": 0.6943572217621294, + "eval_precision": 0.7111407605070047, + "eval_precision_macro": 0.8265061930909349, + "eval_pred_class_0": 16670, + "eval_pred_class_1": 2998, + "eval_predicted_binding_ratio": 0.15243034370551148, + "eval_recall": 0.6875201547887778, + "eval_recall_macro": 0.8176237823500236, + "eval_runtime": 0.2638, + "eval_samples_per_second": 617.819, + "eval_steps_per_second": 3.79, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9074 + }, + { + "epoch": 350.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9327859996523161, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6985064828491712, + "eval_f1_macro": 0.8216233644580062, + "eval_loss": 0.2680947780609131, + "eval_pr_auc": 0.6943020020881897, + "eval_precision": 0.7112299465240641, + "eval_precision_macro": 0.8264413105131714, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8170391900676656, + "eval_runtime": 0.2408, + "eval_samples_per_second": 676.86, + "eval_steps_per_second": 4.153, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9100 + }, + { + "epoch": 351.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9328183504788495, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6986211424819435, + "eval_f1_macro": 0.8216965657061384, + "eval_loss": 0.2681223750114441, + "eval_pr_auc": 0.694450730809475, + "eval_precision": 0.7114677365429622, + "eval_precision_macro": 0.8265619548577976, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8170693705469316, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.919, + "eval_steps_per_second": 3.901, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9126 + }, + { + "epoch": 352.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9328099415997506, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991310050828005, + "eval_f1_macro": 0.8219607247335354, + "eval_loss": 0.26799651980400085, + "eval_pr_auc": 0.6945293388795055, + "eval_precision": 0.7111407605070047, + "eval_precision_macro": 0.8265061930909349, + "eval_pred_class_0": 16670, + "eval_pred_class_1": 2998, + "eval_predicted_binding_ratio": 0.15243034370551148, + "eval_recall": 0.6875201547887778, + "eval_recall_macro": 0.8176237823500236, + "eval_runtime": 0.1729, + "eval_samples_per_second": 942.612, + "eval_steps_per_second": 5.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9152 + }, + { + "epoch": 353.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9328013186056745, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000819000819001, + "eval_f1_macro": 0.8224913728389398, + "eval_loss": 0.26793336868286133, + "eval_pr_auc": 0.6945633927009858, + "eval_precision": 0.7113848202396804, + "eval_precision_macro": 0.8267677821793697, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8183997934218701, + "eval_runtime": 0.2553, + "eval_samples_per_second": 638.431, + "eval_steps_per_second": 3.917, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9178 + }, + { + "epoch": 354.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.932783060437631, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26802781224250793, + "eval_pr_auc": 0.694307607889245, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.039, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9204 + }, + { + "epoch": 355.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9327856687473517, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997875469848014, + "eval_f1_macro": 0.8222422697443199, + "eval_loss": 0.2679577171802521, + "eval_pr_auc": 0.6944152708065687, + "eval_precision": 0.7094102054340623, + "eval_precision_macro": 0.8258762738882024, + "eval_pred_class_0": 16650, + "eval_pred_class_1": 3018, + "eval_predicted_binding_ratio": 0.15344722391702256, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8187429418701, + "eval_runtime": 0.1868, + "eval_samples_per_second": 872.379, + "eval_steps_per_second": 5.352, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9230 + }, + { + "epoch": 356.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9328543023299973, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000490918016691, + "eval_f1_macro": 0.8224398957879677, + "eval_loss": 0.26801130175590515, + "eval_pr_auc": 0.6946173571124205, + "eval_precision": 0.7106312292358804, + "eval_precision_macro": 0.8264405996101362, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8186015481252511, + "eval_runtime": 0.1679, + "eval_samples_per_second": 971.014, + "eval_steps_per_second": 5.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9256 + }, + { + "epoch": 357.0, + "eval_accuracy": 0.9069554606467358, + "eval_auc": 0.9328912663610364, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002947920078612, + "eval_f1_macro": 0.8226120363891247, + "eval_loss": 0.26803824305534363, + "eval_pr_auc": 0.6948368351435844, + "eval_precision": 0.7114808652246256, + "eval_precision_macro": 0.8268440754137292, + "eval_pred_class_0": 16663, + "eval_pred_class_1": 3005, + "eval_predicted_binding_ratio": 0.15278625177954036, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8185610317320926, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.12, + "eval_steps_per_second": 3.841, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9282 + }, + { + "epoch": 358.0, + "eval_accuracy": 0.9069554606467358, + "eval_auc": 0.9328766286826047, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003929273084479, + "eval_f1_macro": 0.8226594466805872, + "eval_loss": 0.2680445909500122, + "eval_pr_auc": 0.6947543185413181, + "eval_precision": 0.711340206185567, + "eval_precision_macro": 0.8268002873554328, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8186920895630492, + "eval_runtime": 0.2229, + "eval_samples_per_second": 731.38, + "eval_steps_per_second": 4.487, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9308 + }, + { + "epoch": 359.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9328791980623294, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002617801047121, + "eval_f1_macro": 0.8225604590386311, + "eval_loss": 0.2679673135280609, + "eval_pr_auc": 0.694877850403851, + "eval_precision": 0.7107273331119229, + "eval_precision_macro": 0.8265169354519211, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187627864354736, + "eval_runtime": 0.1766, + "eval_samples_per_second": 922.941, + "eval_steps_per_second": 5.662, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9334 + }, + { + "epoch": 360.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9328811640271188, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700228832951945, + "eval_f1_macro": 0.8225089013937882, + "eval_loss": 0.2680214047431946, + "eval_pr_auc": 0.6947522638420174, + "eval_precision": 0.7099767981438515, + "eval_precision_macro": 0.8261913298268353, + "eval_pred_class_0": 16651, + "eval_pred_class_1": 3017, + "eval_predicted_binding_ratio": 0.15339637990644703, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189645411388546, + "eval_runtime": 0.2235, + "eval_samples_per_second": 729.222, + "eval_steps_per_second": 4.474, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9360 + }, + { + "epoch": 361.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.932900882070006, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000163478829492, + "eval_f1_macro": 0.8223884382480462, + "eval_loss": 0.26796379685401917, + "eval_pr_auc": 0.6949176089109205, + "eval_precision": 0.7098806366047745, + "eval_precision_macro": 0.8261149519800236, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188033028286321, + "eval_runtime": 0.1694, + "eval_samples_per_second": 962.09, + "eval_steps_per_second": 5.902, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9386 + }, + { + "epoch": 362.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9329447561753046, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7005723630417008, + "eval_f1_macro": 0.8227283114988764, + "eval_loss": 0.26795387268066406, + "eval_pr_auc": 0.6950891938157696, + "eval_precision": 0.7106834771068348, + "eval_precision_macro": 0.8265498567232265, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190550825766527, + "eval_runtime": 0.2571, + "eval_samples_per_second": 633.968, + "eval_steps_per_second": 3.889, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9412 + }, + { + "epoch": 363.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9329731556072616, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7001636661211129, + "eval_f1_macro": 0.8225130616165066, + "eval_loss": 0.26798126101493835, + "eval_pr_auc": 0.6952376733836106, + "eval_precision": 0.7108673978065803, + "eval_precision_macro": 0.8265604171937038, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818631728604517, + "eval_runtime": 0.2635, + "eval_samples_per_second": 618.549, + "eval_steps_per_second": 3.795, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9438 + }, + { + "epoch": 364.0, + "eval_accuracy": 0.9070063046573114, + "eval_auc": 0.9329669754704237, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7008994276369583, + "eval_f1_macro": 0.8229219452383643, + "eval_loss": 0.26792433857917786, + "eval_pr_auc": 0.6952423959524336, + "eval_precision": 0.7110152621101526, + "eval_precision_macro": 0.8267457720422265, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.8192465013661412, + "eval_runtime": 0.2599, + "eval_samples_per_second": 627.085, + "eval_steps_per_second": 3.847, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9464 + }, + { + "epoch": 365.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9329833552661686, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003598298985934, + "eval_f1_macro": 0.8226078241660808, + "eval_loss": 0.2679993808269501, + "eval_pr_auc": 0.6952272078548911, + "eval_precision": 0.7105874543644208, + "eval_precision_macro": 0.8264735530603251, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188938442664302, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.317, + "eval_steps_per_second": 3.849, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9490 + }, + { + "epoch": 365.38461538461536, + "grad_norm": 18768.416015625, + "learning_rate": 2.0513069380006943e-07, + "loss": 0.2014, + "step": 9500 + }, + { + "epoch": 366.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9330138179879044, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003763704794632, + "eval_f1_macro": 0.822633632944773, + "eval_loss": 0.26798829436302185, + "eval_pr_auc": 0.6953770669982303, + "eval_precision": 0.7109634551495017, + "eval_precision_macro": 0.8266367281750631, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187929669147396, + "eval_runtime": 0.198, + "eval_samples_per_second": 823.426, + "eval_steps_per_second": 5.052, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9516 + }, + { + "epoch": 367.0, + "eval_accuracy": 0.9069554606467358, + "eval_auc": 0.9330281442263691, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7005890052356021, + "eval_f1_macro": 0.8227541703278901, + "eval_loss": 0.2679848372936249, + "eval_pr_auc": 0.6954512195878219, + "eval_precision": 0.7110594486881435, + "eval_precision_macro": 0.8267130106501293, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189542052249621, + "eval_runtime": 0.265, + "eval_samples_per_second": 615.184, + "eval_steps_per_second": 3.774, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9542 + }, + { + "epoch": 368.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9330502564639999, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.2679852545261383, + "eval_pr_auc": 0.6955032656447977, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.651, + "eval_steps_per_second": 3.887, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9568 + }, + { + "epoch": 369.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9330669768972081, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.2680181562900543, + "eval_pr_auc": 0.6955153529738297, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.2378, + "eval_samples_per_second": 685.421, + "eval_steps_per_second": 4.205, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9594 + }, + { + "epoch": 370.0, + "eval_accuracy": 0.9070063046573114, + "eval_auc": 0.9330907631246593, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7008015704236872, + "eval_f1_macro": 0.8228746737830142, + "eval_loss": 0.26799651980400085, + "eval_pr_auc": 0.6956635287744017, + "eval_precision": 0.7111553784860558, + "eval_precision_macro": 0.8267892646512892, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8191154435351846, + "eval_runtime": 0.1746, + "eval_samples_per_second": 933.529, + "eval_steps_per_second": 5.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9620 + }, + { + "epoch": 371.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9331252940309591, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6999672453324599, + "eval_f1_macro": 0.8224181697622275, + "eval_loss": 0.26801252365112305, + "eval_pr_auc": 0.6958269523241747, + "eval_precision": 0.7111480865224625, + "eval_precision_macro": 0.8266476794611952, + "eval_pred_class_0": 16663, + "eval_pred_class_1": 3005, + "eval_predicted_binding_ratio": 0.15278625177954036, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8183696129426041, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.791, + "eval_steps_per_second": 3.851, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9646 + }, + { + "epoch": 372.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9331262672808548, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7005723630417008, + "eval_f1_macro": 0.8227283114988764, + "eval_loss": 0.26803725957870483, + "eval_pr_auc": 0.6957878915651574, + "eval_precision": 0.7106834771068348, + "eval_precision_macro": 0.8265498567232265, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190550825766527, + "eval_runtime": 0.2654, + "eval_samples_per_second": 614.266, + "eval_steps_per_second": 3.769, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9672 + }, + { + "epoch": 373.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9331573528825239, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699607329842932, + "eval_f1_macro": 0.8221730364601127, + "eval_loss": 0.26807889342308044, + "eval_pr_auc": 0.6959044832644976, + "eval_precision": 0.7100631019594819, + "eval_precision_macro": 0.8261247850555049, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183799488564967, + "eval_runtime": 0.2659, + "eval_samples_per_second": 613.113, + "eval_steps_per_second": 3.761, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9698 + }, + { + "epoch": 374.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9331759224905339, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6996235063021771, + "eval_f1_macro": 0.8221986674075668, + "eval_loss": 0.2681734561920166, + "eval_pr_auc": 0.695850524773773, + "eval_precision": 0.710438829787234, + "eval_precision_macro": 0.8262878422645654, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.818279071504806, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.284, + "eval_steps_per_second": 3.842, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9724 + }, + { + "epoch": 375.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933143698186487, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700228832951945, + "eval_f1_macro": 0.8225089013937882, + "eval_loss": 0.2681812345981598, + "eval_pr_auc": 0.6956723886102156, + "eval_precision": 0.7099767981438515, + "eval_precision_macro": 0.8261913298268353, + "eval_pred_class_0": 16651, + "eval_pred_class_1": 3017, + "eval_predicted_binding_ratio": 0.15339637990644703, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189645411388546, + "eval_runtime": 0.1783, + "eval_samples_per_second": 914.221, + "eval_steps_per_second": 5.609, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9750 + }, + { + "epoch": 376.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9331405935193198, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002123835974514, + "eval_f1_macro": 0.8224831299290885, + "eval_loss": 0.2681320309638977, + "eval_pr_auc": 0.6957066245846253, + "eval_precision": 0.7096026490066225, + "eval_precision_macro": 0.8260290996114323, + "eval_pred_class_0": 16648, + "eval_pred_class_1": 3020, + "eval_predicted_binding_ratio": 0.1535489119381737, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.819065418490545, + "eval_runtime": 0.2617, + "eval_samples_per_second": 622.75, + "eval_steps_per_second": 3.821, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9776 + }, + { + "epoch": 377.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9331510656881976, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7010443864229765, + "eval_f1_macro": 0.8229384784439624, + "eval_loss": 0.2682173550128937, + "eval_pr_auc": 0.6955658216411763, + "eval_precision": 0.709613478691774, + "eval_precision_macro": 0.8261726428372638, + "eval_pred_class_0": 16641, + "eval_pred_class_1": 3027, + "eval_predicted_binding_ratio": 0.15390482001220257, + "eval_recall": 0.6926797807158981, + "eval_recall_macro": 0.8198112490831255, + "eval_runtime": 0.2446, + "eval_samples_per_second": 666.285, + "eval_steps_per_second": 4.088, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9802 + }, + { + "epoch": 378.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9331855381995037, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994767822105952, + "eval_f1_macro": 0.8220743333087894, + "eval_loss": 0.2681441009044647, + "eval_pr_auc": 0.6959256270686769, + "eval_precision": 0.709452736318408, + "eval_precision_macro": 0.8258426835378145, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8184506457289211, + "eval_runtime": 0.2608, + "eval_samples_per_second": 625.04, + "eval_steps_per_second": 3.835, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9828 + }, + { + "epoch": 379.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9331895674540718, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003267973856209, + "eval_f1_macro": 0.8225562214288413, + "eval_loss": 0.26809969544410706, + "eval_pr_auc": 0.6959934558189067, + "eval_precision": 0.7098376946008612, + "eval_precision_macro": 0.8261483505739005, + "eval_pred_class_0": 16649, + "eval_pred_class_1": 3019, + "eval_predicted_binding_ratio": 0.15349806792759813, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.8190955989698111, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.005, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9854 + }, + { + "epoch": 380.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9332175581210724, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994928840176673, + "eval_f1_macro": 0.8220999320609059, + "eval_loss": 0.26810789108276367, + "eval_pr_auc": 0.6960299382483204, + "eval_precision": 0.7098273572377158, + "eval_precision_macro": 0.8260051771779358, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183497683772306, + "eval_runtime": 0.1782, + "eval_samples_per_second": 914.547, + "eval_steps_per_second": 5.611, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9880 + }, + { + "epoch": 381.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332336167443518, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699607329842932, + "eval_f1_macro": 0.8221730364601127, + "eval_loss": 0.2680502235889435, + "eval_pr_auc": 0.6962912618518755, + "eval_precision": 0.7100631019594819, + "eval_precision_macro": 0.8261247850555049, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183799488564967, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.169, + "eval_steps_per_second": 3.946, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9906 + }, + { + "epoch": 382.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332394951737218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994106090373281, + "eval_f1_macro": 0.8220780022434744, + "eval_loss": 0.26803064346313477, + "eval_pr_auc": 0.696271279785013, + "eval_precision": 0.71034253408713, + "eval_precision_macro": 0.8262114206958068, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8181178331945835, + "eval_runtime": 0.1775, + "eval_samples_per_second": 918.497, + "eval_steps_per_second": 5.635, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9932 + }, + { + "epoch": 383.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332396703587031, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.2680654227733612, + "eval_pr_auc": 0.6962410474404584, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.2478, + "eval_samples_per_second": 657.85, + "eval_steps_per_second": 4.036, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9958 + }, + { + "epoch": 384.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332586000691747, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995090016366612, + "eval_f1_macro": 0.8221255355501671, + "eval_loss": 0.26804468035697937, + "eval_pr_auc": 0.6963737898708651, + "eval_precision": 0.7102027251578598, + "eval_precision_macro": 0.8261680532566416, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8182488910255401, + "eval_runtime": 0.1642, + "eval_samples_per_second": 992.895, + "eval_steps_per_second": 6.091, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9984 + }, + { + "epoch": 384.61538461538464, + "grad_norm": 19506.416015625, + "learning_rate": 1.5373466155541264e-07, + "loss": 0.1999, + "step": 10000 + }, + { + "epoch": 385.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9332885664334637, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6990990990990991, + "eval_f1_macro": 0.8219096951966862, + "eval_loss": 0.26817384362220764, + "eval_pr_auc": 0.6963813810367415, + "eval_precision": 0.7103861517976032, + "eval_precision_macro": 0.8261784335560267, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6881651080296678, + "eval_recall_macro": 0.8178255370534045, + "eval_runtime": 0.2609, + "eval_samples_per_second": 624.846, + "eval_steps_per_second": 3.833, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10010 + }, + { + "epoch": 386.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9332890919884074, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995251350908793, + "eval_f1_macro": 0.8221511437890823, + "eval_loss": 0.2681432366371155, + "eval_pr_auc": 0.6963686032935126, + "eval_precision": 0.7105788423153693, + "eval_precision_macro": 0.8263313128873689, + "eval_pred_class_0": 16662, + "eval_pred_class_1": 3006, + "eval_predicted_binding_ratio": 0.15283709579011592, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8181480136738496, + "eval_runtime": 0.2677, + "eval_samples_per_second": 609.003, + "eval_steps_per_second": 3.736, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10036 + }, + { + "epoch": 387.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9332816368942063, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6992961204779833, + "eval_f1_macro": 0.8220048784892098, + "eval_loss": 0.2681044936180115, + "eval_pr_auc": 0.6963340299375516, + "eval_precision": 0.7101063829787234, + "eval_precision_macro": 0.8260916068555082, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8180876527153176, + "eval_runtime": 0.2503, + "eval_samples_per_second": 651.154, + "eval_steps_per_second": 3.995, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10062 + }, + { + "epoch": 388.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332853547088078, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699607329842932, + "eval_f1_macro": 0.8221730364601127, + "eval_loss": 0.26804089546203613, + "eval_pr_auc": 0.6964477494759991, + "eval_precision": 0.7100631019594819, + "eval_precision_macro": 0.8261247850555049, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183799488564967, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.349, + "eval_steps_per_second": 3.794, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10088 + }, + { + "epoch": 389.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9333026785569515, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997218131238749, + "eval_f1_macro": 0.8222461586311625, + "eval_loss": 0.26805874705314636, + "eval_pr_auc": 0.6965416515768459, + "eval_precision": 0.7102990033222591, + "eval_precision_macro": 0.8262444710452093, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184101293357626, + "eval_runtime": 0.1895, + "eval_samples_per_second": 860.093, + "eval_steps_per_second": 5.277, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10114 + }, + { + "epoch": 390.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9332940944928713, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26800334453582764, + "eval_pr_auc": 0.6965549091166009, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2601, + "eval_samples_per_second": 626.7, + "eval_steps_per_second": 3.845, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10140 + }, + { + "epoch": 391.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9332915835081403, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699591169255928, + "eval_f1_macro": 0.8221474102804127, + "eval_loss": 0.26801303029060364, + "eval_pr_auc": 0.69649915263674, + "eval_precision": 0.7096881220968813, + "eval_precision_macro": 0.8259621107662262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8184808262081871, + "eval_runtime": 0.2322, + "eval_samples_per_second": 702.068, + "eval_steps_per_second": 4.307, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10166 + }, + { + "epoch": 392.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9333394576705105, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2680598199367523, + "eval_pr_auc": 0.6966844521188784, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2619, + "eval_samples_per_second": 622.281, + "eval_steps_per_second": 3.818, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10192 + }, + { + "epoch": 393.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9333311363839021, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26812389492988586, + "eval_pr_auc": 0.696579843318821, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2606, + "eval_samples_per_second": 625.459, + "eval_steps_per_second": 3.837, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10218 + }, + { + "epoch": 394.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9333747574442278, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6990990990990991, + "eval_f1_macro": 0.8219096951966862, + "eval_loss": 0.26815417408943176, + "eval_pr_auc": 0.6966758102563304, + "eval_precision": 0.7103861517976032, + "eval_precision_macro": 0.8261784335560267, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6881651080296678, + "eval_recall_macro": 0.8178255370534045, + "eval_runtime": 0.2489, + "eval_samples_per_second": 654.797, + "eval_steps_per_second": 4.017, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10244 + }, + { + "epoch": 395.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.93337473797923, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994106090373281, + "eval_f1_macro": 0.8220780022434744, + "eval_loss": 0.26812419295310974, + "eval_pr_auc": 0.6967071460749926, + "eval_precision": 0.71034253408713, + "eval_precision_macro": 0.8262114206958068, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8181178331945835, + "eval_runtime": 0.2546, + "eval_samples_per_second": 640.225, + "eval_steps_per_second": 3.928, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10270 + }, + { + "epoch": 396.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9333609762257046, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6966270582247817, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.1654, + "eval_samples_per_second": 985.392, + "eval_steps_per_second": 6.045, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10296 + }, + { + "epoch": 397.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9333962857319209, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997218131238749, + "eval_f1_macro": 0.8222461586311625, + "eval_loss": 0.268099308013916, + "eval_pr_auc": 0.6968255966064625, + "eval_precision": 0.7102990033222591, + "eval_precision_macro": 0.8262444710452093, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184101293357626, + "eval_runtime": 0.1974, + "eval_samples_per_second": 825.595, + "eval_steps_per_second": 5.065, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10322 + }, + { + "epoch": 398.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9334254248337986, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6987714987714988, + "eval_f1_macro": 0.8217158026492684, + "eval_loss": 0.26809167861938477, + "eval_pr_auc": 0.6970114900505864, + "eval_precision": 0.7100532623169108, + "eval_precision_macro": 0.8259819840149124, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6878426314092229, + "eval_recall_macro": 0.8176341182639161, + "eval_runtime": 0.1847, + "eval_samples_per_second": 882.621, + "eval_steps_per_second": 5.415, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10348 + }, + { + "epoch": 399.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9334154587548664, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994928840176673, + "eval_f1_macro": 0.8220999320609059, + "eval_loss": 0.26815035939216614, + "eval_pr_auc": 0.6968381832516852, + "eval_precision": 0.7098273572377158, + "eval_precision_macro": 0.8260051771779358, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183497683772306, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.281, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10374 + }, + { + "epoch": 400.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9334343592678412, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991976420501065, + "eval_f1_macro": 0.821957303073866, + "eval_loss": 0.26815110445022583, + "eval_pr_auc": 0.6969254623478301, + "eval_precision": 0.7102461743180306, + "eval_precision_macro": 0.826134970486347, + "eval_pred_class_0": 16662, + "eval_pred_class_1": 3006, + "eval_predicted_binding_ratio": 0.15283709579011592, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.817956594884361, + "eval_runtime": 0.2607, + "eval_samples_per_second": 625.319, + "eval_steps_per_second": 3.836, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10400 + }, + { + "epoch": 401.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9334508655860725, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6992961204779833, + "eval_f1_macro": 0.8220048784892098, + "eval_loss": 0.2681412398815155, + "eval_pr_auc": 0.6970213943546584, + "eval_precision": 0.7101063829787234, + "eval_precision_macro": 0.8260916068555082, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8180876527153176, + "eval_runtime": 0.2502, + "eval_samples_per_second": 651.598, + "eval_steps_per_second": 3.998, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10426 + }, + { + "epoch": 402.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9334578048578289, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6996235063021771, + "eval_f1_macro": 0.8221986674075668, + "eval_loss": 0.26816073060035706, + "eval_pr_auc": 0.697032431219364, + "eval_precision": 0.710438829787234, + "eval_precision_macro": 0.8262878422645654, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.818279071504806, + "eval_runtime": 0.2131, + "eval_samples_per_second": 764.767, + "eval_steps_per_second": 4.692, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10452 + }, + { + "epoch": 403.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9334644910846125, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6993945344460808, + "eval_f1_macro": 0.8220524214743572, + "eval_loss": 0.2681373655796051, + "eval_pr_auc": 0.6971117657586055, + "eval_precision": 0.7099667774086379, + "eval_precision_macro": 0.8260483424802825, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8182187105462742, + "eval_runtime": 0.1917, + "eval_samples_per_second": 850.439, + "eval_steps_per_second": 5.217, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10478 + }, + { + "epoch": 403.84615384615387, + "grad_norm": 20065.328125, + "learning_rate": 1.0857058873879127e-07, + "loss": 0.1991, + "step": 10500 + }, + { + "epoch": 404.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9334760435608745, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994106090373281, + "eval_f1_macro": 0.8220780022434744, + "eval_loss": 0.26816511154174805, + "eval_pr_auc": 0.6971952135976213, + "eval_precision": 0.71034253408713, + "eval_precision_macro": 0.8262114206958068, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8181178331945835, + "eval_runtime": 0.2491, + "eval_samples_per_second": 654.311, + "eval_steps_per_second": 4.014, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10504 + }, + { + "epoch": 405.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9334837419675497, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997218131238749, + "eval_f1_macro": 0.8222461586311625, + "eval_loss": 0.26816821098327637, + "eval_pr_auc": 0.6972179050703514, + "eval_precision": 0.7102990033222591, + "eval_precision_macro": 0.8262444710452093, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184101293357626, + "eval_runtime": 0.2552, + "eval_samples_per_second": 638.823, + "eval_steps_per_second": 3.919, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10530 + }, + { + "epoch": 406.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9334893478869489, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997218131238749, + "eval_f1_macro": 0.8222461586311625, + "eval_loss": 0.2681950330734253, + "eval_pr_auc": 0.6972103120395237, + "eval_precision": 0.7102990033222591, + "eval_precision_macro": 0.8262444710452093, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184101293357626, + "eval_runtime": 0.2003, + "eval_samples_per_second": 813.636, + "eval_steps_per_second": 4.992, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10556 + }, + { + "epoch": 407.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9334909050867821, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998363338788871, + "eval_f1_macro": 0.822319298583337, + "eval_loss": 0.2681744396686554, + "eval_pr_auc": 0.6972816477778223, + "eval_precision": 0.71053506148222, + "eval_precision_macro": 0.8263642352251727, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184403098150286, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.723, + "eval_steps_per_second": 4.004, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10582 + }, + { + "epoch": 408.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.933494973271346, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995090016366612, + "eval_f1_macro": 0.8221255355501671, + "eval_loss": 0.2681480348110199, + "eval_pr_auc": 0.697380910091478, + "eval_precision": 0.7102027251578598, + "eval_precision_macro": 0.8261680532566416, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8182488910255401, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.704, + "eval_steps_per_second": 3.943, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10608 + }, + { + "epoch": 409.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9334896009319218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2680969536304474, + "eval_pr_auc": 0.6974108729960042, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2261, + "eval_samples_per_second": 721.003, + "eval_steps_per_second": 4.423, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10634 + }, + { + "epoch": 410.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9334926569365942, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699607329842932, + "eval_f1_macro": 0.8221730364601127, + "eval_loss": 0.26808932423591614, + "eval_pr_auc": 0.6974463532877748, + "eval_precision": 0.7100631019594819, + "eval_precision_macro": 0.8261247850555049, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183799488564967, + "eval_runtime": 0.2604, + "eval_samples_per_second": 625.923, + "eval_steps_per_second": 3.84, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10660 + }, + { + "epoch": 411.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9334912943867403, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2680690288543701, + "eval_pr_auc": 0.6974656777279113, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.181, + "eval_samples_per_second": 900.801, + "eval_steps_per_second": 5.526, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10686 + }, + { + "epoch": 412.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9334969197711376, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000490918016691, + "eval_f1_macro": 0.8224398957879677, + "eval_loss": 0.26803234219551086, + "eval_pr_auc": 0.6975509824558107, + "eval_precision": 0.7106312292358804, + "eval_precision_macro": 0.8264405996101362, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8186015481252511, + "eval_runtime": 0.1785, + "eval_samples_per_second": 913.403, + "eval_steps_per_second": 5.604, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10712 + }, + { + "epoch": 413.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9334973090710958, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000490918016691, + "eval_f1_macro": 0.8224398957879677, + "eval_loss": 0.26804181933403015, + "eval_pr_auc": 0.697539016898834, + "eval_precision": 0.7106312292358804, + "eval_precision_macro": 0.8264405996101362, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8186015481252511, + "eval_runtime": 0.1858, + "eval_samples_per_second": 877.163, + "eval_steps_per_second": 5.381, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10738 + }, + { + "epoch": 414.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9335163069090602, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998363338788871, + "eval_f1_macro": 0.822319298583337, + "eval_loss": 0.2680352032184601, + "eval_pr_auc": 0.697653341121327, + "eval_precision": 0.71053506148222, + "eval_precision_macro": 0.8263642352251727, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184403098150286, + "eval_runtime": 0.239, + "eval_samples_per_second": 682.004, + "eval_steps_per_second": 4.184, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10764 + }, + { + "epoch": 415.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9335094357647963, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.2680439054965973, + "eval_pr_auc": 0.6975754183405896, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.2547, + "eval_samples_per_second": 640.063, + "eval_steps_per_second": 3.927, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10790 + }, + { + "epoch": 416.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9335170855089767, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.26802244782447815, + "eval_pr_auc": 0.6976835126920541, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.1858, + "eval_samples_per_second": 877.122, + "eval_steps_per_second": 5.381, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10816 + }, + { + "epoch": 417.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9335191877287514, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.26805901527404785, + "eval_pr_auc": 0.6976076287719296, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.253, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10842 + }, + { + "epoch": 418.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9335267206829443, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002617801047121, + "eval_f1_macro": 0.8225604590386311, + "eval_loss": 0.26806166768074036, + "eval_pr_auc": 0.6976721144908643, + "eval_precision": 0.7107273331119229, + "eval_precision_macro": 0.8265169354519211, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187627864354736, + "eval_runtime": 0.1992, + "eval_samples_per_second": 818.364, + "eval_steps_per_second": 5.021, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10868 + }, + { + "epoch": 419.0, + "eval_accuracy": 0.9069554606467358, + "eval_auc": 0.9335306331475249, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7006869479882237, + "eval_f1_macro": 0.8228014837466855, + "eval_loss": 0.2680812180042267, + "eval_pr_auc": 0.6976481882085733, + "eval_precision": 0.7109193494855626, + "eval_precision_macro": 0.8266695216356063, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190852630559187, + "eval_runtime": 0.2498, + "eval_samples_per_second": 652.627, + "eval_steps_per_second": 4.004, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10894 + }, + { + "epoch": 420.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9335429739562026, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26813384890556335, + "eval_pr_auc": 0.6976080200264206, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2232, + "eval_samples_per_second": 730.143, + "eval_steps_per_second": 4.479, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10920 + }, + { + "epoch": 421.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9335431686061818, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6999182338511856, + "eval_f1_macro": 0.8223410440199006, + "eval_loss": 0.2681698799133301, + "eval_pr_auc": 0.6975308484626278, + "eval_precision": 0.710019907100199, + "eval_precision_macro": 0.8261580260852261, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8186722449976755, + "eval_runtime": 0.1913, + "eval_samples_per_second": 852.153, + "eval_steps_per_second": 5.228, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10946 + }, + { + "epoch": 422.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9335561517597906, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.2681558430194855, + "eval_pr_auc": 0.6975926126749412, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.252, + "eval_samples_per_second": 646.818, + "eval_steps_per_second": 3.968, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10972 + }, + { + "epoch": 423.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9335701957557857, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6989687346537895, + "eval_f1_macro": 0.821811089570853, + "eval_loss": 0.2681851089000702, + "eval_pr_auc": 0.6976584248764129, + "eval_precision": 0.7097739361702128, + "eval_precision_macro": 0.825895371446451, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.8178962339258291, + "eval_runtime": 0.2518, + "eval_samples_per_second": 647.463, + "eval_steps_per_second": 3.972, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10998 + }, + { + "epoch": 423.0769230769231, + "grad_norm": 19880.513671875, + "learning_rate": 7.045132214180816e-08, + "loss": 0.198, + "step": 11000 + }, + { + "epoch": 424.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.933567850223537, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002452984464431, + "eval_f1_macro": 0.8225346777593885, + "eval_loss": 0.2681238353252411, + "eval_pr_auc": 0.6977041251052366, + "eval_precision": 0.7103516921035169, + "eval_precision_macro": 0.8263539414042262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188636637871641, + "eval_runtime": 0.1802, + "eval_samples_per_second": 904.736, + "eval_steps_per_second": 5.551, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11024 + }, + { + "epoch": 425.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9335591688344673, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26810285449028015, + "eval_pr_auc": 0.6976771787125668, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2422, + "eval_samples_per_second": 673.038, + "eval_steps_per_second": 4.129, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11050 + }, + { + "epoch": 426.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9335456990559106, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700228832951945, + "eval_f1_macro": 0.8225089013937882, + "eval_loss": 0.2680869400501251, + "eval_pr_auc": 0.6976467269760291, + "eval_precision": 0.7099767981438515, + "eval_precision_macro": 0.8261913298268353, + "eval_pred_class_0": 16651, + "eval_pred_class_1": 3017, + "eval_predicted_binding_ratio": 0.15339637990644703, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189645411388546, + "eval_runtime": 0.1727, + "eval_samples_per_second": 943.897, + "eval_steps_per_second": 5.791, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11076 + }, + { + "epoch": 427.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9335531541501119, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004412485700278, + "eval_f1_macro": 0.8226293306702985, + "eval_loss": 0.26812946796417236, + "eval_pr_auc": 0.6976292182478663, + "eval_precision": 0.7100728959575878, + "eval_precision_macro": 0.8262676792100252, + "eval_pred_class_0": 16650, + "eval_pred_class_1": 3018, + "eval_predicted_binding_ratio": 0.15344722391702256, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.8191257794490772, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.952, + "eval_steps_per_second": 3.902, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11102 + }, + { + "epoch": 428.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9335772907475254, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002452984464431, + "eval_f1_macro": 0.8225346777593885, + "eval_loss": 0.26813971996307373, + "eval_pr_auc": 0.6977601654966087, + "eval_precision": 0.7103516921035169, + "eval_precision_macro": 0.8263539414042262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188636637871641, + "eval_runtime": 0.2428, + "eval_samples_per_second": 671.434, + "eval_steps_per_second": 4.119, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11128 + }, + { + "epoch": 429.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9335880938213678, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6989687346537895, + "eval_f1_macro": 0.821811089570853, + "eval_loss": 0.26816368103027344, + "eval_pr_auc": 0.6977910817778257, + "eval_precision": 0.7097739361702128, + "eval_precision_macro": 0.825895371446451, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.8178962339258291, + "eval_runtime": 0.1821, + "eval_samples_per_second": 895.286, + "eval_steps_per_second": 5.493, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11154 + }, + { + "epoch": 430.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9335897288811925, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6993945344460808, + "eval_f1_macro": 0.8220524214743572, + "eval_loss": 0.2681582272052765, + "eval_pr_auc": 0.6978031299323104, + "eval_precision": 0.7099667774086379, + "eval_precision_macro": 0.8260483424802825, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8182187105462742, + "eval_runtime": 0.259, + "eval_samples_per_second": 629.36, + "eval_steps_per_second": 3.861, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11180 + }, + { + "epoch": 431.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9336081038392237, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6990831696136215, + "eval_f1_macro": 0.8218841874311036, + "eval_loss": 0.2681788206100464, + "eval_pr_auc": 0.6978648412555615, + "eval_precision": 0.7100099767209843, + "eval_precision_macro": 0.8260151318092648, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.8179264144050951, + "eval_runtime": 0.1932, + "eval_samples_per_second": 843.696, + "eval_steps_per_second": 5.176, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11206 + }, + { + "epoch": 432.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.933607091659332, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6989687346537895, + "eval_f1_macro": 0.821811089570853, + "eval_loss": 0.26817184686660767, + "eval_pr_auc": 0.6978628373729787, + "eval_precision": 0.7097739361702128, + "eval_precision_macro": 0.825895371446451, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.8178962339258291, + "eval_runtime": 0.2181, + "eval_samples_per_second": 747.26, + "eval_steps_per_second": 4.584, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11232 + }, + { + "epoch": 433.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336009699174881, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002452984464431, + "eval_f1_macro": 0.8225346777593885, + "eval_loss": 0.26814383268356323, + "eval_pr_auc": 0.6978726222778764, + "eval_precision": 0.7103516921035169, + "eval_precision_macro": 0.8263539414042262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188636637871641, + "eval_runtime": 0.2581, + "eval_samples_per_second": 631.607, + "eval_steps_per_second": 3.875, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11258 + }, + { + "epoch": 434.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9335997338901205, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002452984464431, + "eval_f1_macro": 0.8225346777593885, + "eval_loss": 0.26813551783561707, + "eval_pr_auc": 0.6978611289947059, + "eval_precision": 0.7103516921035169, + "eval_precision_macro": 0.8263539414042262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188636637871641, + "eval_runtime": 0.1828, + "eval_samples_per_second": 891.596, + "eval_steps_per_second": 5.47, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11284 + }, + { + "epoch": 435.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9335949455006336, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7001308044473512, + "eval_f1_macro": 0.8224615491231337, + "eval_loss": 0.2681216299533844, + "eval_pr_auc": 0.6978496657032133, + "eval_precision": 0.7101160862354893, + "eval_precision_macro": 0.8262344077367322, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188334833078981, + "eval_runtime": 0.2595, + "eval_samples_per_second": 628.033, + "eval_steps_per_second": 3.853, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11310 + }, + { + "epoch": 436.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933600045330087, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7001308044473512, + "eval_f1_macro": 0.8224615491231337, + "eval_loss": 0.2681162655353546, + "eval_pr_auc": 0.6978775373770459, + "eval_precision": 0.7101160862354893, + "eval_precision_macro": 0.8262344077367322, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188334833078981, + "eval_runtime": 0.2578, + "eval_samples_per_second": 632.163, + "eval_steps_per_second": 3.878, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11336 + }, + { + "epoch": 437.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9336053398095198, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.26813197135925293, + "eval_pr_auc": 0.6979041519553001, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.1888, + "eval_samples_per_second": 863.236, + "eval_steps_per_second": 5.296, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11362 + }, + { + "epoch": 438.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.933606196269428, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26813068985939026, + "eval_pr_auc": 0.6979019651404079, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2467, + "eval_samples_per_second": 660.854, + "eval_steps_per_second": 4.054, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11388 + }, + { + "epoch": 439.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336083958141924, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26813721656799316, + "eval_pr_auc": 0.6979009956604231, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.1795, + "eval_samples_per_second": 908.065, + "eval_steps_per_second": 5.571, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11414 + }, + { + "epoch": 440.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336056512494865, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681269645690918, + "eval_pr_auc": 0.697889844022227, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.569, + "eval_steps_per_second": 3.881, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11440 + }, + { + "epoch": 441.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336110917164033, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681290805339813, + "eval_pr_auc": 0.6979179015521837, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.824, + "eval_steps_per_second": 3.784, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11466 + }, + { + "epoch": 442.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336152864234539, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681342363357544, + "eval_pr_auc": 0.697937589363129, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.1746, + "eval_samples_per_second": 933.511, + "eval_steps_per_second": 5.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11492 + }, + { + "epoch": 442.3076923076923, + "grad_norm": 19259.90625, + "learning_rate": 4.0062918659231006e-08, + "loss": 0.1984, + "step": 11500 + }, + { + "epoch": 443.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336222548927073, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6999182338511856, + "eval_f1_macro": 0.8223410440199006, + "eval_loss": 0.26813840866088867, + "eval_pr_auc": 0.6979755820701472, + "eval_precision": 0.710019907100199, + "eval_precision_macro": 0.8261580260852261, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8186722449976755, + "eval_runtime": 0.189, + "eval_samples_per_second": 862.533, + "eval_steps_per_second": 5.292, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11518 + }, + { + "epoch": 444.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336241624625029, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6999182338511856, + "eval_f1_macro": 0.8223410440199006, + "eval_loss": 0.2681417763233185, + "eval_pr_auc": 0.6980183846984902, + "eval_precision": 0.710019907100199, + "eval_precision_macro": 0.8261580260852261, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8186722449976755, + "eval_runtime": 0.1783, + "eval_samples_per_second": 913.982, + "eval_steps_per_second": 5.607, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11544 + }, + { + "epoch": 445.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.933623228142603, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26812103390693665, + "eval_pr_auc": 0.6980203083536239, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2463, + "eval_samples_per_second": 661.813, + "eval_steps_per_second": 4.06, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11570 + }, + { + "epoch": 446.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.933622994562628, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26812058687210083, + "eval_pr_auc": 0.6980222288630917, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.25, + "eval_samples_per_second": 651.994, + "eval_steps_per_second": 4.0, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11596 + }, + { + "epoch": 447.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336267026447306, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681138813495636, + "eval_pr_auc": 0.6980542679927909, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.1865, + "eval_samples_per_second": 873.899, + "eval_steps_per_second": 5.361, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11622 + }, + { + "epoch": 448.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336341674714307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26811888813972473, + "eval_pr_auc": 0.6980920829430033, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2363, + "eval_samples_per_second": 689.772, + "eval_steps_per_second": 4.232, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11648 + }, + { + "epoch": 449.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336351017913307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7001472272206772, + "eval_f1_macro": 0.8224873029219602, + "eval_loss": 0.26812025904655457, + "eval_pr_auc": 0.6980982354073687, + "eval_precision": 0.7104913678618858, + "eval_precision_macro": 0.8263972209146124, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187326059562077, + "eval_runtime": 0.2566, + "eval_samples_per_second": 635.262, + "eval_steps_per_second": 3.897, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11674 + }, + { + "epoch": 450.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336350044663411, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681255340576172, + "eval_pr_auc": 0.69809285211945, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.1865, + "eval_samples_per_second": 874.141, + "eval_steps_per_second": 5.363, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11700 + }, + { + "epoch": 451.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9336387320134417, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.2681383192539215, + "eval_pr_auc": 0.6981051991963814, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.252, + "eval_steps_per_second": 3.965, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11726 + }, + { + "epoch": 452.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336409802207007, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003598298985934, + "eval_f1_macro": 0.8226078241660808, + "eval_loss": 0.26812514662742615, + "eval_pr_auc": 0.6981289571890097, + "eval_precision": 0.7105874543644208, + "eval_precision_macro": 0.8264735530603251, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188938442664302, + "eval_runtime": 0.1751, + "eval_samples_per_second": 930.856, + "eval_steps_per_second": 5.711, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11752 + }, + { + "epoch": 453.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336424400955443, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26812946796417236, + "eval_pr_auc": 0.6981354214954091, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2533, + "eval_samples_per_second": 643.502, + "eval_steps_per_second": 3.948, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11778 + }, + { + "epoch": 454.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336481628049311, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2681373655796051, + "eval_pr_auc": 0.6981529233150263, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2333, + "eval_samples_per_second": 698.566, + "eval_steps_per_second": 4.286, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11804 + }, + { + "epoch": 455.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336448537552857, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26815658807754517, + "eval_pr_auc": 0.6981319681162118, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.1829, + "eval_samples_per_second": 891.289, + "eval_steps_per_second": 5.468, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11830 + }, + { + "epoch": 456.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933646138445148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.268153578042984, + "eval_pr_auc": 0.6981406301220767, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.693, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11856 + }, + { + "epoch": 457.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.933646138445148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.268160343170166, + "eval_pr_auc": 0.6981424953255787, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.2524, + "eval_samples_per_second": 645.855, + "eval_steps_per_second": 3.962, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11882 + }, + { + "epoch": 458.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9336505959296704, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.26815977692604065, + "eval_pr_auc": 0.6981608628032375, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.1785, + "eval_samples_per_second": 913.407, + "eval_steps_per_second": 5.604, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11908 + }, + { + "epoch": 459.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336521336645056, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26815950870513916, + "eval_pr_auc": 0.6981611753342029, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.1894, + "eval_samples_per_second": 860.719, + "eval_steps_per_second": 5.28, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11934 + }, + { + "epoch": 460.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9336499146547433, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.2681548595428467, + "eval_pr_auc": 0.6981527417806164, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.2094, + "eval_samples_per_second": 778.376, + "eval_steps_per_second": 4.775, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11960 + }, + { + "epoch": 461.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336533599593742, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2681526839733124, + "eval_pr_auc": 0.6981686335311912, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.254, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11986 + }, + { + "epoch": 461.53846153846155, + "grad_norm": 19181.365234375, + "learning_rate": 1.7952297882945e-08, + "loss": 0.1977, + "step": 12000 + }, + { + "epoch": 462.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336556178991323, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26815271377563477, + "eval_pr_auc": 0.6981807528411922, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2521, + "eval_samples_per_second": 646.614, + "eval_steps_per_second": 3.967, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12012 + }, + { + "epoch": 463.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336526397544513, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.2681511640548706, + "eval_pr_auc": 0.6981676703517014, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2048, + "eval_samples_per_second": 795.899, + "eval_steps_per_second": 4.883, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12038 + }, + { + "epoch": 464.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336516859695536, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26814842224121094, + "eval_pr_auc": 0.698161790632896, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2527, + "eval_samples_per_second": 645.01, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12064 + }, + { + "epoch": 465.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336534183543679, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26814672350883484, + "eval_pr_auc": 0.6981725600302674, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.1868, + "eval_samples_per_second": 872.776, + "eval_steps_per_second": 5.354, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12090 + }, + { + "epoch": 466.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336528344044305, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.2681439220905304, + "eval_pr_auc": 0.6981719802867735, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2538, + "eval_samples_per_second": 642.256, + "eval_steps_per_second": 3.94, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12116 + }, + { + "epoch": 467.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336571167039716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26814955472946167, + "eval_pr_auc": 0.6981930050756842, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.251, + "eval_samples_per_second": 649.332, + "eval_steps_per_second": 3.984, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12142 + }, + { + "epoch": 468.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336574573414352, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26814743876457214, + "eval_pr_auc": 0.6981957415820915, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.1759, + "eval_samples_per_second": 926.564, + "eval_steps_per_second": 5.684, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12168 + }, + { + "epoch": 469.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336602894986317, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2681457996368408, + "eval_pr_auc": 0.6982076318844164, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.1997, + "eval_samples_per_second": 816.139, + "eval_steps_per_second": 5.007, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12194 + }, + { + "epoch": 470.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336571945639633, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681419849395752, + "eval_pr_auc": 0.6981934072595471, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.578, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12220 + }, + { + "epoch": 471.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336578855713892, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681434154510498, + "eval_pr_auc": 0.6981968133129176, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.471, + "eval_steps_per_second": 3.868, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12246 + }, + { + "epoch": 472.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336576422589153, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26814115047454834, + "eval_pr_auc": 0.6981937092453367, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2611, + "eval_samples_per_second": 624.347, + "eval_steps_per_second": 3.83, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12272 + }, + { + "epoch": 473.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933659540096212, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26814183592796326, + "eval_pr_auc": 0.698207050020266, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.1807, + "eval_samples_per_second": 902.095, + "eval_steps_per_second": 5.534, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12298 + }, + { + "epoch": 474.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.933661126493542, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813554763793945, + "eval_pr_auc": 0.6982130837277154, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2369, + "eval_samples_per_second": 688.198, + "eval_steps_per_second": 4.222, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12324 + }, + { + "epoch": 475.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336606204035962, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813259720802307, + "eval_pr_auc": 0.6982118437878516, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.1655, + "eval_samples_per_second": 984.988, + "eval_steps_per_second": 6.043, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12350 + }, + { + "epoch": 476.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.933660883181068, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813384890556335, + "eval_pr_auc": 0.6982130674055568, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2472, + "eval_samples_per_second": 659.287, + "eval_steps_per_second": 4.045, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12376 + }, + { + "epoch": 477.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336603284286276, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.2681376338005066, + "eval_pr_auc": 0.6982114384730127, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.234, + "eval_samples_per_second": 696.525, + "eval_steps_per_second": 4.273, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12402 + }, + { + "epoch": 478.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336606982635879, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26814013719558716, + "eval_pr_auc": 0.6982139708732891, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2492, + "eval_samples_per_second": 654.084, + "eval_steps_per_second": 4.013, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12428 + }, + { + "epoch": 479.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336606593335921, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813676953315735, + "eval_pr_auc": 0.6982155837128797, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2089, + "eval_samples_per_second": 780.114, + "eval_steps_per_second": 4.786, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12454 + }, + { + "epoch": 480.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336606009385984, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813599467277527, + "eval_pr_auc": 0.6982237203295948, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.239, + "eval_samples_per_second": 682.114, + "eval_steps_per_second": 4.185, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12480 + }, + { + "epoch": 480.7692307692308, + "grad_norm": 19666.140625, + "learning_rate": 4.5173988392051e-09, + "loss": 0.1976, + "step": 12500 + }, + { + "epoch": 481.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336608247860743, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813769340515137, + "eval_pr_auc": 0.6982221169303999, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2452, + "eval_samples_per_second": 664.649, + "eval_steps_per_second": 4.078, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12506 + }, + { + "epoch": 482.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336611459585399, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.2681364417076111, + "eval_pr_auc": 0.6982243970162039, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2509, + "eval_samples_per_second": 649.641, + "eval_steps_per_second": 3.986, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12532 + }, + { + "epoch": 483.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336621192084356, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681383192539215, + "eval_pr_auc": 0.6982286439182355, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.229, + "eval_samples_per_second": 711.694, + "eval_steps_per_second": 4.366, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12558 + }, + { + "epoch": 484.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336621970684273, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681386172771454, + "eval_pr_auc": 0.6982328773712362, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.789, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12584 + }, + { + "epoch": 485.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336618856284606, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681376338005066, + "eval_pr_auc": 0.6982308470568848, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.4219, + "eval_samples_per_second": 386.39, + "eval_steps_per_second": 2.37, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12610 + }, + { + "epoch": 486.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336622749284189, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813745498657227, + "eval_pr_auc": 0.6982319234202713, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2599, + "eval_samples_per_second": 627.256, + "eval_steps_per_second": 3.848, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12636 + }, + { + "epoch": 487.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336626836933752, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813769340515137, + "eval_pr_auc": 0.6982353364927889, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.782, + "eval_steps_per_second": 3.827, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12662 + }, + { + "epoch": 488.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336625766358866, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813805103302, + "eval_pr_auc": 0.6982371615828771, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.662, + "eval_steps_per_second": 4.004, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12688 + }, + { + "epoch": 489.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336626642283772, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982364881625377, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.289, + "eval_steps_per_second": 3.793, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12714 + }, + { + "epoch": 490.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336626642283772, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982365330396263, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2579, + "eval_samples_per_second": 632.058, + "eval_steps_per_second": 3.878, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12740 + }, + { + "epoch": 491.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933662722623371, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982369774278672, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2559, + "eval_samples_per_second": 636.959, + "eval_steps_per_second": 3.908, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12766 + }, + { + "epoch": 492.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336627420883689, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982367884435748, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.1891, + "eval_samples_per_second": 862.094, + "eval_steps_per_second": 5.289, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12792 + }, + { + "epoch": 493.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336628199483605, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982375574473259, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.1955, + "eval_samples_per_second": 833.803, + "eval_steps_per_second": 5.115, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12818 + } + ], + "logging_steps": 500, + "max_steps": 13000, + "num_input_tokens_seen": 0, + "num_train_epochs": 500, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0486892367874104e+16, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/training_args.bin b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..43cdbe5adfb2a2b6cd48f66b6b5e6b0cc84c9bc3 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fce922531bcc60b40ec3cfe0214120623a297c18ab37c3a2e94007f715374c7 +size 5368 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/config.json b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a424582b16776120730808f810ad9081375d580e --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/config.json @@ -0,0 +1,52 @@ +{ + "architectures": [ + "GloMeModelForTokenClassification" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "bos_token_id": 28, + "cdr_weight": 0.0, + "class_weights": [ + 0.1, + 0.9 + ], + "classifier_activation": "gelu", + "classifier_bias": false, + "classifier_dropout": 0.1, + "classifier_pooling": "cls", + "cls_token_id": 28, + "compress_block_size": 16, + "compress_block_sliding_stride": 16, + "decoder_bias": true, + "dice_weight": 0.1, + "embedding_dropout": 0.1, + "eos_token_id": 29, + "hidden_activation": "gelu", + "hidden_size": 320, + "inner_rank": 32, + "intermediate_size": 1280, + "kv_heads": 10, + "mask_token_id": 31, + "mlp_bias": false, + "mlp_dropout": 0.1, + "model_size": "tiny", + "model_type": "glome", + "norm_bias": false, + "norm_eps": 1e-05, + "num_attention_heads": 20, + "num_hidden_layers": 6, + "num_selected_blocks": 8, + "num_slots": 64, + "pad_token_id": 30, + "reference_compile": null, + "selection_block_size": 16, + "sep_token_id": 29, + "sliding_window_size": 0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.52.3", + "unk_token_id": 27, + "use_glome": true, + "use_nsa": true, + "vocab_size": 36 +} diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/model.safetensors b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d3c9641a30c32b394fa5068aea711c146e2e538 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca38aef4bec71d6d96a0e9bdfd2d7bbd821f572d5ce42d4d2f34673aec533b44 +size 61385376 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/optimizer.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c3990d4ad426a586e408f8c7e00a3499227df79 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d33f9cb88038c34fa64d87c5ddba2ba49323b162a432983750bfc5ffba921e20 +size 122881658 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/rng_state.pth b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a0f6c865946814c37bde90c90990950e7fca5e9b --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4867af6cbff014e37687b6a83467d119b6a2f6f101834ca5a80069323c97a0b +size 14244 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/scaler.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363 +size 988 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/scheduler.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..92e000eddb475f131fcea00cb4cde9f7d5d60e98 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10c756fdb9db8bf7a472800df8b97780cb11d7cca932ec4aba9d03b9e6436112 +size 1064 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/trainer_state.json b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e0cab5c6035a2020692bc76aabb88abe32709b51 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/trainer_state.json @@ -0,0 +1,12185 @@ +{ + "best_global_step": 12818, + "best_metric": 0.6982375574473259, + "best_model_checkpoint": "./results/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818", + "epoch": 499.0, + "eval_steps": 500, + "global_step": 12974, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy": 0.3402481187716087, + "eval_auc": 0.3906724936824889, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25902238465052535, + "eval_f1_macro": 0.3322238022959372, + "eval_loss": 1.061540961265564, + "eval_pr_auc": 0.12123677424188789, + "eval_precision": 0.15737977933523004, + "eval_precision_macro": 0.49946219326282143, + "eval_pred_class_0": 5257, + "eval_pred_class_1": 14411, + "eval_predicted_binding_ratio": 0.7327130364043116, + "eval_recall": 0.7313769751693002, + "eval_recall_macro": 0.49920692785748166, + "eval_runtime": 0.3106, + "eval_samples_per_second": 524.711, + "eval_steps_per_second": 3.219, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 26 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.34141753101484645, + "eval_auc": 0.39093619574173194, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25910884859577876, + "eval_f1_macro": 0.33318779271412513, + "eval_loss": 1.0595855712890625, + "eval_pr_auc": 0.12129083172780017, + "eval_precision": 0.15748852732582394, + "eval_precision_macro": 0.4996674570038125, + "eval_pred_class_0": 5286, + "eval_pred_class_1": 14382, + "eval_predicted_binding_ratio": 0.7312385600976204, + "eval_recall": 0.7304095453079652, + "eval_recall_macro": 0.4995079053877304, + "eval_runtime": 0.2611, + "eval_samples_per_second": 624.188, + "eval_steps_per_second": 3.829, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 52 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.3436038234695953, + "eval_auc": 0.3913807276315981, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2594079853143644, + "eval_f1_macro": 0.3350089597864736, + "eval_loss": 1.0562976598739624, + "eval_pr_auc": 0.1213805792649038, + "eval_precision": 0.15776986951364175, + "eval_precision_macro": 0.5001890381857135, + "eval_pred_class_0": 5337, + "eval_pred_class_1": 14331, + "eval_predicted_binding_ratio": 0.7286455155582673, + "eval_recall": 0.7291196388261851, + "eval_recall_macro": 0.5002814346723429, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.877, + "eval_steps_per_second": 3.772, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.345688427903193, + "eval_auc": 0.39204411422551294, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25976416450963474, + "eval_f1_macro": 0.3367519287310599, + "eval_loss": 1.0516862869262695, + "eval_pr_auc": 0.1215177922821225, + "eval_precision": 0.15807896947633715, + "eval_precision_macro": 0.5007519661646174, + "eval_pred_class_0": 5384, + "eval_pred_class_1": 14284, + "eval_predicted_binding_ratio": 0.7262558470612162, + "eval_recall": 0.72815220896485, + "eval_recall_macro": 0.5011256608293798, + "eval_runtime": 0.2689, + "eval_samples_per_second": 606.218, + "eval_steps_per_second": 3.719, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 104 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.3489424445800285, + "eval_auc": 0.39286881698964193, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25986937171261776, + "eval_f1_macro": 0.33937421387990774, + "eval_loss": 1.0457645654678345, + "eval_pr_auc": 0.12168361829310792, + "eval_precision": 0.15830985915492957, + "eval_precision_macro": 0.5011556611063601, + "eval_pred_class_0": 5468, + "eval_pred_class_1": 14200, + "eval_predicted_binding_ratio": 0.7219849501728697, + "eval_recall": 0.7249274427603999, + "eval_recall_macro": 0.5017466331928395, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.89, + "eval_steps_per_second": 3.852, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 130 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.3526540573520439, + "eval_auc": 0.3938679358675814, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2595952547103978, + "eval_f1_macro": 0.34226375201644554, + "eval_loss": 1.0385552644729614, + "eval_pr_auc": 0.12189495582289459, + "eval_precision": 0.15835402625044342, + "eval_precision_macro": 0.5012118238196412, + "eval_pred_class_0": 5573, + "eval_pred_class_1": 14095, + "eval_predicted_binding_ratio": 0.7166463290624364, + "eval_recall": 0.7197678168332796, + "eval_recall_macro": 0.5018528828839544, + "eval_runtime": 0.2682, + "eval_samples_per_second": 607.675, + "eval_steps_per_second": 3.728, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 156 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.3575859263778727, + "eval_auc": 0.39509779283079605, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25933524825605253, + "eval_f1_macro": 0.34607916966826957, + "eval_loss": 1.0300335884094238, + "eval_pr_auc": 0.12215992714628282, + "eval_precision": 0.1584754262788365, + "eval_precision_macro": 0.5013918287261083, + "eval_pred_class_0": 5710, + "eval_pred_class_1": 13958, + "eval_predicted_binding_ratio": 0.7096806996135855, + "eval_recall": 0.7133182844243793, + "eval_recall_macro": 0.5021592327536275, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.797, + "eval_steps_per_second": 3.956, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 182 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.36261948342485256, + "eval_auc": 0.39656283563130934, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2583126257247663, + "eval_f1_macro": 0.3497589695442054, + "eval_loss": 1.0202081203460693, + "eval_pr_auc": 0.12247236024679278, + "eval_precision": 0.1581769436997319, + "eval_precision_macro": 0.5008542806107318, + "eval_pred_class_0": 5867, + "eval_pred_class_1": 13801, + "eval_predicted_binding_ratio": 0.7016981899532235, + "eval_recall": 0.7039664624314738, + "eval_recall_macro": 0.5013464231032241, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.667, + "eval_steps_per_second": 4.004, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 208 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.3690258287573724, + "eval_auc": 0.39822865015280895, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25848470363288717, + "eval_f1_macro": 0.3546848296925498, + "eval_loss": 1.0091010332107544, + "eval_pr_auc": 0.12282975659183427, + "eval_precision": 0.15863586358635864, + "eval_precision_macro": 0.5015788301853557, + "eval_pred_class_0": 6033, + "eval_pred_class_1": 13635, + "eval_predicted_binding_ratio": 0.6932580841976815, + "eval_recall": 0.6975169300225733, + "eval_recall_macro": 0.5025280068716114, + "eval_runtime": 0.2623, + "eval_samples_per_second": 621.417, + "eval_steps_per_second": 3.812, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 234 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.37553386211104334, + "eval_auc": 0.4001638991754374, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25788519637462237, + "eval_f1_macro": 0.35943500580602444, + "eval_loss": 0.9966734647750854, + "eval_pr_auc": 0.12325069957928089, + "eval_precision": 0.1586735073239646, + "eval_precision_macro": 0.5015911353953799, + "eval_pred_class_0": 6219, + "eval_pred_class_1": 13449, + "eval_predicted_binding_ratio": 0.6838010982306284, + "eval_recall": 0.6881651080296678, + "eval_recall_macro": 0.5025904311199223, + "eval_runtime": 0.2668, + "eval_samples_per_second": 610.918, + "eval_steps_per_second": 3.748, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 260 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.38382143583485867, + "eval_auc": 0.4023744221985687, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25855001529519733, + "eval_f1_macro": 0.3657153538700335, + "eval_loss": 0.9828852415084839, + "eval_pr_auc": 0.12373084625745168, + "eval_precision": 0.15954394442766537, + "eval_precision_macro": 0.5028728439448414, + "eval_pred_class_0": 6424, + "eval_pred_class_1": 13244, + "eval_predicted_binding_ratio": 0.6733780760626398, + "eval_recall": 0.6813930990003225, + "eval_recall_macro": 0.5047576347901956, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.475, + "eval_steps_per_second": 3.85, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 286 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.3912955053894651, + "eval_auc": 0.40482715792324586, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2574122317330356, + "eval_f1_macro": 0.370844480646392, + "eval_loss": 0.9678097367286682, + "eval_pr_auc": 0.12427357405982056, + "eval_precision": 0.15935796021810922, + "eval_precision_macro": 0.5025013059703454, + "eval_pred_class_0": 6647, + "eval_pred_class_1": 13021, + "eval_predicted_binding_ratio": 0.6620398617042912, + "eval_recall": 0.6691389874234118, + "eval_recall_macro": 0.5042139676659523, + "eval_runtime": 0.2363, + "eval_samples_per_second": 689.751, + "eval_steps_per_second": 4.232, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 312 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.4013117754728493, + "eval_auc": 0.40764224431659535, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2572383775941462, + "eval_f1_macro": 0.3779059068484294, + "eval_loss": 0.9513856172561646, + "eval_pr_auc": 0.12488748600523823, + "eval_precision": 0.15989648682559598, + "eval_precision_macro": 0.5031697587395765, + "eval_pred_class_0": 6916, + "eval_pred_class_1": 12752, + "eval_predicted_binding_ratio": 0.6483628228594671, + "eval_recall": 0.6575298290873912, + "eval_recall_macro": 0.5054414401669225, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.633, + "eval_steps_per_second": 3.943, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 338 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.4099552572706935, + "eval_auc": 0.4108023769954713, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25432114630855235, + "eval_f1_macro": 0.38308115532732967, + "eval_loss": 0.9335527420043945, + "eval_pr_auc": 0.12558462856716973, + "eval_precision": 0.15880276039159044, + "eval_precision_macro": 0.5015495900209409, + "eval_pred_class_0": 7206, + "eval_pred_class_1": 12462, + "eval_predicted_binding_ratio": 0.6336180597925565, + "eval_recall": 0.6381812318606901, + "eval_recall_macro": 0.5027086517847544, + "eval_runtime": 0.2682, + "eval_samples_per_second": 607.718, + "eval_steps_per_second": 3.728, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 364 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.41844620703681107, + "eval_auc": 0.4144857969457745, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2516356974613975, + "eval_f1_macro": 0.3880413644466475, + "eval_loss": 0.9142351150512695, + "eval_pr_auc": 0.1264136402678906, + "eval_precision": 0.15784289583846342, + "eval_precision_macro": 0.5002307331563727, + "eval_pred_class_0": 7485, + "eval_pred_class_1": 12183, + "eval_predicted_binding_ratio": 0.6194325808419768, + "eval_recall": 0.6201225411157691, + "eval_recall_macro": 0.5004095532886144, + "eval_runtime": 0.2527, + "eval_samples_per_second": 645.136, + "eval_steps_per_second": 3.958, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 390 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.42897091722595077, + "eval_auc": 0.41858189431685716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24961582147390926, + "eval_f1_macro": 0.3943717007980979, + "eval_loss": 0.8934236168861389, + "eval_pr_auc": 0.12736412734017702, + "eval_precision": 0.15742457441429294, + "eval_precision_macro": 0.4996940867457263, + "eval_pred_class_0": 7802, + "eval_pred_class_1": 11866, + "eval_predicted_binding_ratio": 0.6033150294895261, + "eval_recall": 0.6023863269912931, + "eval_recall_macro": 0.4994487317940711, + "eval_runtime": 0.2416, + "eval_samples_per_second": 674.578, + "eval_steps_per_second": 4.139, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 416 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.4378177750660972, + "eval_auc": 0.42318268015385996, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24520445081575534, + "eval_f1_macro": 0.3986584493314002, + "eval_loss": 0.8710600733757019, + "eval_pr_auc": 0.12844830521974454, + "eval_precision": 0.15552476619328023, + "eval_precision_macro": 0.4974052402394973, + "eval_pred_class_0": 8120, + "eval_pred_class_1": 11548, + "eval_predicted_binding_ratio": 0.5871466341264999, + "eval_recall": 0.5791680103192518, + "eval_recall_macro": 0.4952639713574891, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.27, + "eval_steps_per_second": 3.891, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 442 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.4492576774455969, + "eval_auc": 0.4283753771124365, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.23911211014329867, + "eval_f1_macro": 0.4037791626413705, + "eval_loss": 0.8472632765769958, + "eval_pr_auc": 0.12969206942947384, + "eval_precision": 0.15285136955545578, + "eval_precision_macro": 0.4944498263457579, + "eval_pred_class_0": 8533, + "eval_pred_class_1": 11135, + "eval_predicted_binding_ratio": 0.5661480577587961, + "eval_recall": 0.5488552079974202, + "eval_recall_macro": 0.4897351430824307, + "eval_runtime": 0.281, + "eval_samples_per_second": 580.087, + "eval_steps_per_second": 3.559, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 468 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.46339231238560097, + "eval_auc": 0.4341240430739382, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.23588184187662903, + "eval_f1_macro": 0.41119432949496704, + "eval_loss": 0.8218646049499512, + "eval_pr_auc": 0.13110819406948146, + "eval_precision": 0.15208663990290355, + "eval_precision_macro": 0.4938729504080779, + "eval_pred_class_0": 8957, + "eval_pred_class_1": 10711, + "eval_predicted_binding_ratio": 0.544590197274761, + "eval_recall": 0.5253144147049339, + "eval_recall_macro": 0.4885580946585574, + "eval_runtime": 0.2447, + "eval_samples_per_second": 666.13, + "eval_steps_per_second": 4.087, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 494 + }, + { + "epoch": 19.23076923076923, + "grad_norm": 232728.109375, + "learning_rate": 3.8384615384615384e-07, + "loss": 0.99, + "step": 500 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.4798657718120805, + "eval_auc": 0.44078220133048324, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2309427153811457, + "eval_f1_macro": 0.4189975157915178, + "eval_loss": 0.7945711016654968, + "eval_pr_auc": 0.1327664236209388, + "eval_precision": 0.15057347318890305, + "eval_precision_macro": 0.49263119629657465, + "eval_pred_class_0": 9467, + "eval_pred_class_1": 10201, + "eval_predicted_binding_ratio": 0.5186597518812284, + "eval_recall": 0.49532408900354724, + "eval_recall_macro": 0.4861481916617905, + "eval_runtime": 0.249, + "eval_samples_per_second": 654.734, + "eval_steps_per_second": 4.017, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 520 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.49964409192597115, + "eval_auc": 0.4482004774880778, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22407947646455886, + "eval_f1_macro": 0.42742637388305044, + "eval_loss": 0.765658974647522, + "eval_pr_auc": 0.1347110745909903, + "eval_precision": 0.14829889375913172, + "eval_precision_macro": 0.4908656872127009, + "eval_pred_class_0": 10086, + "eval_pred_class_1": 9582, + "eval_predicted_binding_ratio": 0.48718730933496035, + "eval_recall": 0.4582392776523702, + "eval_recall_macro": 0.48281674753627146, + "eval_runtime": 0.2494, + "eval_samples_per_second": 653.647, + "eval_steps_per_second": 4.01, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 546 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.5294895261338214, + "eval_auc": 0.4563512312496838, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22025615099427032, + "eval_f1_macro": 0.44167751484473966, + "eval_loss": 0.7351489067077637, + "eval_pr_auc": 0.13684095333600696, + "eval_precision": 0.14908178396258698, + "eval_precision_macro": 0.49225486317659667, + "eval_pred_class_0": 10901, + "eval_pred_class_1": 8767, + "eval_predicted_binding_ratio": 0.4457494407158837, + "eval_recall": 0.4214769429216382, + "eval_recall_macro": 0.48559209613637894, + "eval_runtime": 0.2523, + "eval_samples_per_second": 645.993, + "eval_steps_per_second": 3.963, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 572 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.564317673378076, + "eval_auc": 0.46539531162556536, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21679919568595193, + "eval_f1_macro": 0.45751035677940843, + "eval_loss": 0.7033414244651794, + "eval_pr_auc": 0.1392772958743257, + "eval_precision": 0.15127551020408164, + "eval_precision_macro": 0.49468577674559844, + "eval_pred_class_0": 11828, + "eval_pred_class_1": 7840, + "eval_predicted_binding_ratio": 0.3986170429123449, + "eval_recall": 0.38245727184779105, + "eval_recall_macro": 0.49040772688786005, + "eval_runtime": 0.2665, + "eval_samples_per_second": 611.699, + "eval_steps_per_second": 3.753, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 598 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.6033150294895261, + "eval_auc": 0.4754341993823483, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21223747980613894, + "eval_f1_macro": 0.4735759293567254, + "eval_loss": 0.6706362962722778, + "eval_pr_auc": 0.14231930535250045, + "eval_precision": 0.1544906658826988, + "eval_precision_macro": 0.4975718001003078, + "eval_pred_class_0": 12865, + "eval_pred_class_1": 6803, + "eval_predicted_binding_ratio": 0.34589180394549524, + "eval_recall": 0.3389229280877136, + "eval_recall_macro": 0.49586334730576304, + "eval_runtime": 0.2619, + "eval_samples_per_second": 622.356, + "eval_steps_per_second": 3.818, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 624 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.6474984746796827, + "eval_auc": 0.4864891211002582, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2075665790376043, + "eval_f1_macro": 0.4904508280155492, + "eval_loss": 0.6374054551124573, + "eval_pr_auc": 0.14557281943245967, + "eval_precision": 0.16076487252124647, + "eval_precision_macro": 0.5021727358326632, + "eval_pred_class_0": 14020, + "eval_pred_class_1": 5648, + "eval_predicted_binding_ratio": 0.28716697173073014, + "eval_recall": 0.2928087713640761, + "eval_recall_macro": 0.5033489139611471, + "eval_runtime": 0.2341, + "eval_samples_per_second": 696.138, + "eval_steps_per_second": 4.271, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 650 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.6925462680496237, + "eval_auc": 0.49869307137754393, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20360858685631503, + "eval_f1_macro": 0.5065549471155847, + "eval_loss": 0.6044979691505432, + "eval_pr_auc": 0.14935675594952297, + "eval_precision": 0.17208370436331255, + "eval_precision_macro": 0.5093417994668434, + "eval_pred_class_0": 15176, + "eval_pred_class_1": 4492, + "eval_predicted_binding_ratio": 0.22839129550538947, + "eval_recall": 0.2492744276039987, + "eval_recall_macro": 0.5123960114117054, + "eval_runtime": 0.2543, + "eval_samples_per_second": 640.94, + "eval_steps_per_second": 3.932, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 676 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.7326113483831604, + "eval_auc": 0.5121322314924708, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.1977116704805492, + "eval_f1_macro": 0.5186416868006297, + "eval_loss": 0.5727357268333435, + "eval_pr_auc": 0.15383837227298106, + "eval_precision": 0.18760856977417487, + "eval_precision_macro": 0.518159780138105, + "eval_pred_class_0": 16214, + "eval_pred_class_1": 3454, + "eval_predicted_binding_ratio": 0.1756152125279642, + "eval_recall": 0.20896485004837148, + "eval_recall_macro": 0.5197960002037596, + "eval_runtime": 0.2506, + "eval_samples_per_second": 650.309, + "eval_steps_per_second": 3.99, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 702 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.7681513117754728, + "eval_auc": 0.5270188672472933, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.18947742623533595, + "eval_f1_macro": 0.5271029967130403, + "eval_loss": 0.5426873564720154, + "eval_pr_auc": 0.1589999639181036, + "eval_precision": 0.21108910891089108, + "eval_precision_macro": 0.5306451786169109, + "eval_pred_class_0": 17143, + "eval_pred_class_1": 2525, + "eval_predicted_binding_ratio": 0.12838112670327437, + "eval_recall": 0.17188003869719445, + "eval_recall_macro": 0.5258205046507038, + "eval_runtime": 0.2527, + "eval_samples_per_second": 644.973, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 728 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.7986068741102298, + "eval_auc": 0.5437619187831915, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.18076525336091004, + "eval_f1_macro": 0.5329784934669249, + "eval_loss": 0.5149086117744446, + "eval_pr_auc": 0.16564494894795073, + "eval_precision": 0.2520184544405998, + "eval_precision_macro": 0.5517368953367268, + "eval_pred_class_0": 17934, + "eval_pred_class_1": 1734, + "eval_predicted_binding_ratio": 0.08816351433801098, + "eval_recall": 0.14092228313447275, + "eval_recall_macro": 0.5313170599592205, + "eval_runtime": 0.258, + "eval_samples_per_second": 631.786, + "eval_steps_per_second": 3.876, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 754 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.8241814114297336, + "eval_auc": 0.5629712926123112, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.17548879351454458, + "eval_f1_macro": 0.5385440097559633, + "eval_loss": 0.4897482395172119, + "eval_pr_auc": 0.17432371223202417, + "eval_precision": 0.3366880146386093, + "eval_precision_macro": 0.5947773855158054, + "eval_pred_class_0": 18575, + "eval_pred_class_1": 1093, + "eval_predicted_binding_ratio": 0.05557250355908074, + "eval_recall": 0.11867139632376653, + "eval_recall_macro": 0.5374548506940254, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.589, + "eval_steps_per_second": 3.899, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 780 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.8372991661582265, + "eval_auc": 0.5843234707368185, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.16883116883116883, + "eval_f1_macro": 0.5393273806169032, + "eval_loss": 0.46770623326301575, + "eval_pr_auc": 0.18537280435724188, + "eval_precision": 0.43391188251001334, + "eval_precision_macro": 0.6435905413924347, + "eval_pred_class_0": 18919, + "eval_pred_class_1": 749, + "eval_predicted_binding_ratio": 0.038082163921090095, + "eval_recall": 0.10480490164463076, + "eval_recall_macro": 0.5396059276135269, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.108, + "eval_steps_per_second": 3.817, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 806 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.8421293471629042, + "eval_auc": 0.6080693861773249, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.16012983500135244, + "eval_f1_macro": 0.5365030891665479, + "eval_loss": 0.44841739535331726, + "eval_pr_auc": 0.1997259509611161, + "eval_precision": 0.4966442953020134, + "eval_precision_macro": 0.6747850251677853, + "eval_pred_class_0": 19072, + "eval_pred_class_1": 596, + "eval_predicted_binding_ratio": 0.030303030303030304, + "eval_recall": 0.09545307965172525, + "eval_recall_macro": 0.5386723960460594, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.915, + "eval_steps_per_second": 3.852, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 832 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.8450782997762863, + "eval_auc": 0.6341019717128032, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.16497670594683475, + "eval_f1_macro": 0.5397977373430758, + "eval_loss": 0.4312308728694916, + "eval_pr_auc": 0.2181951536640109, + "eval_precision": 0.5492700729927007, + "eval_precision_macro": 0.7014132791741746, + "eval_pred_class_0": 19120, + "eval_pred_class_1": 548, + "eval_predicted_binding_ratio": 0.0278625177954037, + "eval_recall": 0.09706546275395034, + "eval_recall_macro": 0.5410781529982704, + "eval_runtime": 0.2519, + "eval_samples_per_second": 647.04, + "eval_steps_per_second": 3.97, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 858 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.8478238763473663, + "eval_auc": 0.6614343616815009, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.17932547299149987, + "eval_f1_macro": 0.5477310488609045, + "eval_loss": 0.41584137082099915, + "eval_pr_auc": 0.24110190314317137, + "eval_precision": 0.5989010989010989, + "eval_precision_macro": 0.7269162957114008, + "eval_pred_class_0": 19122, + "eval_pred_class_1": 546, + "eval_predicted_binding_ratio": 0.027760829774252593, + "eval_recall": 0.1054498548855208, + "eval_recall_macro": 0.546115402483504, + "eval_runtime": 0.2547, + "eval_samples_per_second": 640.016, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 884 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.8509253609924751, + "eval_auc": 0.6891114086357669, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20585048754062837, + "eval_f1_macro": 0.5617963020129356, + "eval_loss": 0.4015716016292572, + "eval_pr_auc": 0.2683830744239665, + "eval_precision": 0.6429780033840947, + "eval_precision_macro": 0.7501727569994856, + "eval_pred_class_0": 19077, + "eval_pred_class_1": 591, + "eval_predicted_binding_ratio": 0.030048810250152533, + "eval_recall": 0.12254111576910674, + "eval_recall_macro": 0.5549024767594251, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.872, + "eval_steps_per_second": 4.005, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 910 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.8537217815741306, + "eval_auc": 0.7168296727231165, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2366675510745556, + "eval_f1_macro": 0.5778889812054533, + "eval_loss": 0.3880736827850342, + "eval_pr_auc": 0.2994175694348318, + "eval_precision": 0.6676646706586826, + "eval_precision_macro": 0.7639639142767097, + "eval_pred_class_0": 19000, + "eval_pred_class_1": 668, + "eval_predicted_binding_ratio": 0.0339637990644702, + "eval_recall": 0.1438245727184779, + "eval_recall_macro": 0.5652122199621845, + "eval_runtime": 0.2693, + "eval_samples_per_second": 605.227, + "eval_steps_per_second": 3.713, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 936 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.8565690461663616, + "eval_auc": 0.743181046261935, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2768520892078954, + "eval_f1_macro": 0.59862076733571, + "eval_loss": 0.37574923038482666, + "eval_pr_auc": 0.33279138215623166, + "eval_precision": 0.675, + "eval_precision_macro": 0.7696337714649142, + "eval_pred_class_0": 18868, + "eval_pred_class_1": 800, + "eval_predicted_binding_ratio": 0.04067520846044336, + "eval_recall": 0.17413737504030957, + "eval_recall_macro": 0.5792217629109919, + "eval_runtime": 0.2774, + "eval_samples_per_second": 587.664, + "eval_steps_per_second": 3.605, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 962 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.8596705308114704, + "eval_auc": 0.7678773986205973, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.3205317577548006, + "eval_f1_macro": 0.6211435791665652, + "eval_loss": 0.36427780985832214, + "eval_pr_auc": 0.3671211589285648, + "eval_precision": 0.6774193548387096, + "eval_precision_macro": 0.7732261685723991, + "eval_pred_class_0": 18707, + "eval_pred_class_1": 961, + "eval_predicted_binding_ratio": 0.048861094163107584, + "eval_recall": 0.20993227990970656, + "eval_recall_macro": 0.5956101913823899, + "eval_runtime": 0.2611, + "eval_samples_per_second": 624.252, + "eval_steps_per_second": 3.83, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 988 + }, + { + "epoch": 38.46153846153846, + "grad_norm": 35024.03515625, + "learning_rate": 7.684615384615384e-07, + "loss": 0.5725, + "step": 1000 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.8642464917632703, + "eval_auc": 0.7904617013805764, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.3723554301833568, + "eval_f1_macro": 0.6481240123381296, + "eval_loss": 0.35397008061408997, + "eval_pr_auc": 0.40223746916130343, + "eval_precision": 0.6869037294015612, + "eval_precision_macro": 0.7810970172797707, + "eval_pred_class_0": 18515, + "eval_pred_class_1": 1153, + "eval_predicted_binding_ratio": 0.058623144193613995, + "eval_recall": 0.25540148339245405, + "eval_recall_macro": 0.6168055886811972, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.77, + "eval_steps_per_second": 3.888, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1014 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.867246288387228, + "eval_auc": 0.8102996097248453, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.4125984251968504, + "eval_f1_macro": 0.6688826868467987, + "eval_loss": 0.3446972072124481, + "eval_pr_auc": 0.43559149314237056, + "eval_precision": 0.6822916666666666, + "eval_precision_macro": 0.7815518582187295, + "eval_pred_class_0": 18324, + "eval_pred_class_1": 1344, + "eval_predicted_binding_ratio": 0.06833435021354485, + "eval_recall": 0.29571106094808125, + "eval_recall_macro": 0.634968465827454, + "eval_runtime": 0.2532, + "eval_samples_per_second": 643.748, + "eval_steps_per_second": 3.949, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1040 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.8715171852755745, + "eval_auc": 0.8272611461298317, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.4583065380493033, + "eval_f1_macro": 0.6927107089600444, + "eval_loss": 0.33654505014419556, + "eval_pr_auc": 0.4645782536288223, + "eval_precision": 0.6835038363171355, + "eval_precision_macro": 0.7856317237263981, + "eval_pred_class_0": 18104, + "eval_pred_class_1": 1564, + "eval_predicted_binding_ratio": 0.07952003254016676, + "eval_recall": 0.344727507255724, + "eval_recall_macro": 0.6574244163911867, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.484, + "eval_steps_per_second": 3.843, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1066 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.8743136058572301, + "eval_auc": 0.8416796876148132, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.4898885678910442, + "eval_f1_macro": 0.7091078096051335, + "eval_loss": 0.329649955034256, + "eval_pr_auc": 0.49067495219464874, + "eval_precision": 0.6802292263610316, + "eval_precision_macro": 0.7867195342316791, + "eval_pred_class_0": 17923, + "eval_pred_class_1": 1745, + "eval_predicted_binding_ratio": 0.08872279845434208, + "eval_recall": 0.38277974846823604, + "eval_recall_macro": 0.674549166803684, + "eval_runtime": 0.2641, + "eval_samples_per_second": 617.276, + "eval_steps_per_second": 3.787, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1092 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.8758897701850722, + "eval_auc": 0.8534597097025247, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5176842521240862, + "eval_f1_macro": 0.7232330815689724, + "eval_loss": 0.32387641072273254, + "eval_pr_auc": 0.5115876936649595, + "eval_precision": 0.6683673469387755, + "eval_precision_macro": 0.7836133097919538, + "eval_pred_class_0": 17708, + "eval_pred_class_1": 1960, + "eval_predicted_binding_ratio": 0.09965426072808622, + "eval_recall": 0.42244437278297325, + "eval_recall_macro": 0.6916048748685797, + "eval_runtime": 0.2631, + "eval_samples_per_second": 619.429, + "eval_steps_per_second": 3.8, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1118 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.878991254830181, + "eval_auc": 0.863260959032272, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5468392993145469, + "eval_f1_macro": 0.7385059071387897, + "eval_loss": 0.3189404308795929, + "eval_pr_auc": 0.5291286431025274, + "eval_precision": 0.6675964667596467, + "eval_precision_macro": 0.7862729722049646, + "eval_pred_class_0": 17517, + "eval_pred_class_1": 2151, + "eval_predicted_binding_ratio": 0.10936546674801709, + "eval_recall": 0.4630764269590455, + "eval_recall_macro": 0.7099591708043251, + "eval_runtime": 0.2583, + "eval_samples_per_second": 630.993, + "eval_steps_per_second": 3.871, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1144 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.8797030709782387, + "eval_auc": 0.8710211865407248, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5603864734299517, + "eval_f1_macro": 0.7453519808982827, + "eval_loss": 0.3149340748786926, + "eval_pr_auc": 0.5420378923897758, + "eval_precision": 0.6611135466900482, + "eval_precision_macro": 0.7847466853482449, + "eval_pred_class_0": 17387, + "eval_pred_class_1": 2281, + "eval_predicted_binding_ratio": 0.11597518812283913, + "eval_recall": 0.48629474363108677, + "eval_recall_macro": 0.719817861342917, + "eval_runtime": 0.2505, + "eval_samples_per_second": 650.753, + "eval_steps_per_second": 3.992, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1170 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.8811775472849298, + "eval_auc": 0.8772876506442417, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5748590140076405, + "eval_f1_macro": 0.7528983447354317, + "eval_loss": 0.3115498721599579, + "eval_pr_auc": 0.5526462799402374, + "eval_precision": 0.659432387312187, + "eval_precision_macro": 0.7856853923591968, + "eval_pred_class_0": 17272, + "eval_pred_class_1": 2396, + "eval_predicted_binding_ratio": 0.12182224933902787, + "eval_recall": 0.509513060303128, + "eval_recall_macro": 0.7301292590704993, + "eval_runtime": 0.247, + "eval_samples_per_second": 659.808, + "eval_steps_per_second": 4.048, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1196 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.8817368314012609, + "eval_auc": 0.8824923380415335, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5840486409155937, + "eval_f1_macro": 0.7575589340187262, + "eval_loss": 0.3087034523487091, + "eval_pr_auc": 0.5616002050007283, + "eval_precision": 0.6555600160578081, + "eval_precision_macro": 0.7850484483851945, + "eval_pred_class_0": 17177, + "eval_pred_class_1": 2491, + "eval_predicted_binding_ratio": 0.12665243034370552, + "eval_recall": 0.526604321186714, + "eval_recall_macro": 0.7374073093831197, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.602, + "eval_steps_per_second": 3.924, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1222 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.8833638397396787, + "eval_auc": 0.8867719903429474, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5951288386869044, + "eval_f1_macro": 0.7634984852775182, + "eval_loss": 0.30617523193359375, + "eval_pr_auc": 0.5687331552143856, + "eval_precision": 0.6573099415204678, + "eval_precision_macro": 0.7872879591248483, + "eval_pred_class_0": 17103, + "eval_pred_class_1": 2565, + "eval_predicted_binding_ratio": 0.13041488712629654, + "eval_recall": 0.5436955820702999, + "eval_recall_macro": 0.7453191497603264, + "eval_runtime": 0.2573, + "eval_samples_per_second": 633.489, + "eval_steps_per_second": 3.886, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1248 + }, + { + "epoch": 49.0, + "eval_accuracy": 0.8840248118771609, + "eval_auc": 0.8901050792607902, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6022667829119442, + "eval_f1_macro": 0.7671909492667516, + "eval_loss": 0.3041446805000305, + "eval_pr_auc": 0.5742293420515451, + "eval_precision": 0.6556567957479119, + "eval_precision_macro": 0.7874972953730754, + "eval_pred_class_0": 17034, + "eval_pred_class_1": 2634, + "eval_predicted_binding_ratio": 0.13392312385600977, + "eval_recall": 0.5569171235085456, + "eval_recall_macro": 0.751084867060001, + "eval_runtime": 0.2527, + "eval_samples_per_second": 644.934, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1274 + }, + { + "epoch": 50.0, + "eval_accuracy": 0.8846349400040675, + "eval_auc": 0.8931467576948593, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6055970797844603, + "eval_f1_macro": 0.7690165668701654, + "eval_loss": 0.30221912264823914, + "eval_pr_auc": 0.5797467982851593, + "eval_precision": 0.6568627450980392, + "eval_precision_macro": 0.7884983683177079, + "eval_pred_class_0": 17016, + "eval_pred_class_1": 2652, + "eval_predicted_binding_ratio": 0.13483831604636973, + "eval_recall": 0.561754272815221, + "eval_recall_macro": 0.7534129002755408, + "eval_runtime": 0.2505, + "eval_samples_per_second": 650.805, + "eval_steps_per_second": 3.993, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1300 + }, + { + "epoch": 51.0, + "eval_accuracy": 0.8854484441732764, + "eval_auc": 0.8956789398085232, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6095997227516895, + "eval_f1_macro": 0.7712381155096151, + "eval_loss": 0.30062082409858704, + "eval_pr_auc": 0.5844826815319759, + "eval_precision": 0.6588014981273408, + "eval_precision_macro": 0.7899255166833903, + "eval_pred_class_0": 16998, + "eval_pred_class_1": 2670, + "eval_predicted_binding_ratio": 0.13575350823672971, + "eval_recall": 0.5672363753627861, + "eval_recall_macro": 0.7561237710700572, + "eval_runtime": 0.2579, + "eval_samples_per_second": 631.92, + "eval_steps_per_second": 3.877, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1326 + }, + { + "epoch": 52.0, + "eval_accuracy": 0.8860077282896075, + "eval_auc": 0.8977014114868052, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6157010627356874, + "eval_f1_macro": 0.774389842453749, + "eval_loss": 0.2989857792854309, + "eval_pr_auc": 0.5879586440077966, + "eval_precision": 0.6571533113794366, + "eval_precision_macro": 0.7900469834133675, + "eval_pred_class_0": 16935, + "eval_pred_class_1": 2733, + "eval_predicted_binding_ratio": 0.13895668090298963, + "eval_recall": 0.5791680103192518, + "eval_recall_macro": 0.7613048960873738, + "eval_runtime": 0.2404, + "eval_samples_per_second": 677.963, + "eval_steps_per_second": 4.159, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1352 + }, + { + "epoch": 53.0, + "eval_accuracy": 0.8864144803742119, + "eval_auc": 0.8993954307902827, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6165465156196361, + "eval_f1_macro": 0.7749399244764847, + "eval_loss": 0.29775407910346985, + "eval_pr_auc": 0.5914083972949268, + "eval_precision": 0.6590825688073394, + "eval_precision_macro": 0.7910298047365505, + "eval_pred_class_0": 16943, + "eval_pred_class_1": 2725, + "eval_predicted_binding_ratio": 0.13854992881838518, + "eval_recall": 0.5791680103192518, + "eval_recall_macro": 0.7615463399215019, + "eval_runtime": 0.2547, + "eval_samples_per_second": 639.94, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1378 + }, + { + "epoch": 54.0, + "eval_accuracy": 0.8866178564165141, + "eval_auc": 0.9007296980023092, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6186730506155951, + "eval_f1_macro": 0.7760410164688105, + "eval_loss": 0.296587198972702, + "eval_pr_auc": 0.59415963293408, + "eval_precision": 0.6585365853658537, + "eval_precision_macro": 0.7910908800004612, + "eval_pred_class_0": 16921, + "eval_pred_class_1": 2747, + "eval_predicted_binding_ratio": 0.1396684970510474, + "eval_recall": 0.583360206385037, + "eval_recall_macro": 0.7633708136410005, + "eval_runtime": 0.2605, + "eval_samples_per_second": 625.694, + "eval_steps_per_second": 3.839, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1404 + }, + { + "epoch": 55.0, + "eval_accuracy": 0.8872279845434208, + "eval_auc": 0.9019074471661075, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6215017064846416, + "eval_f1_macro": 0.7776226419864958, + "eval_loss": 0.2955063581466675, + "eval_pr_auc": 0.5967231989416606, + "eval_precision": 0.6600217470097861, + "eval_precision_macro": 0.7921612076464745, + "eval_pred_class_0": 16909, + "eval_pred_class_1": 2759, + "eval_predicted_binding_ratio": 0.14027862517795403, + "eval_recall": 0.5872299258303773, + "eval_recall_macro": 0.7653056733636705, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.741, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1430 + }, + { + "epoch": 56.0, + "eval_accuracy": 0.8881431767337807, + "eval_auc": 0.9030401348597343, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6235455167693361, + "eval_f1_macro": 0.778929094226063, + "eval_loss": 0.29450690746307373, + "eval_pr_auc": 0.5995879576354929, + "eval_precision": 0.6642362376959533, + "eval_precision_macro": 0.7943337761596458, + "eval_pred_class_0": 16925, + "eval_pred_class_1": 2743, + "eval_predicted_binding_ratio": 0.13946512100874517, + "eval_recall": 0.5875524024508223, + "eval_recall_macro": 0.7659799798214153, + "eval_runtime": 0.2658, + "eval_samples_per_second": 613.135, + "eval_steps_per_second": 3.762, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1456 + }, + { + "epoch": 57.0, + "eval_accuracy": 0.8882957087655075, + "eval_auc": 0.9040587382005859, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6243802359377671, + "eval_f1_macro": 0.7793863433697854, + "eval_loss": 0.2936408519744873, + "eval_pr_auc": 0.6024898616264603, + "eval_precision": 0.6644832605531296, + "eval_precision_macro": 0.7945643253120258, + "eval_pred_class_0": 16920, + "eval_pred_class_1": 2748, + "eval_predicted_binding_ratio": 0.13971934106162295, + "eval_recall": 0.5888423089326024, + "eval_recall_macro": 0.7665947525830392, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.369, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1482 + }, + { + "epoch": 57.69230769230769, + "grad_norm": 15613.5302734375, + "learning_rate": 9.992863736980368e-07, + "loss": 0.3115, + "step": 1500 + }, + { + "epoch": 58.0, + "eval_accuracy": 0.887888956680903, + "eval_auc": 0.9048886089216611, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6246808510638298, + "eval_f1_macro": 0.7793916194591735, + "eval_loss": 0.29281434416770935, + "eval_pr_auc": 0.603713292882509, + "eval_precision": 0.6614996395097332, + "eval_precision_macro": 0.7932808958765667, + "eval_pred_class_0": 16894, + "eval_pred_class_1": 2774, + "eval_predicted_binding_ratio": 0.14104128533658736, + "eval_recall": 0.5917445985166075, + "eval_recall_macro": 0.7675328292275196, + "eval_runtime": 0.2518, + "eval_samples_per_second": 647.428, + "eval_steps_per_second": 3.972, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1508 + }, + { + "epoch": 59.0, + "eval_accuracy": 0.887888956680903, + "eval_auc": 0.9056672866982218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6251912289648138, + "eval_f1_macro": 0.7796389289833485, + "eval_loss": 0.2920655906200409, + "eval_pr_auc": 0.6054694565410179, + "eval_precision": 0.6610352264557872, + "eval_precision_macro": 0.7931493791878604, + "eval_pred_class_0": 16886, + "eval_pred_class_1": 2782, + "eval_predicted_binding_ratio": 0.14144803742119177, + "eval_recall": 0.5930345049983876, + "eval_recall_macro": 0.7680570605513457, + "eval_runtime": 0.235, + "eval_samples_per_second": 693.644, + "eval_steps_per_second": 4.255, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1534 + }, + { + "epoch": 60.0, + "eval_accuracy": 0.8882957087655075, + "eval_auc": 0.9063294664622661, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6254049445865303, + "eval_f1_macro": 0.7798830166450921, + "eval_loss": 0.29136955738067627, + "eval_pr_auc": 0.6071731602747702, + "eval_precision": 0.6635311143270622, + "eval_precision_macro": 0.7942892202018652, + "eval_pred_class_0": 16904, + "eval_pred_class_1": 2764, + "eval_predicted_binding_ratio": 0.14053284523083182, + "eval_recall": 0.5914221218961625, + "eval_recall_macro": 0.7676432152306912, + "eval_runtime": 0.2576, + "eval_samples_per_second": 632.702, + "eval_steps_per_second": 3.882, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1560 + }, + { + "epoch": 61.0, + "eval_accuracy": 0.8886007728289608, + "eval_auc": 0.9070085321120007, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6269368295589988, + "eval_f1_macro": 0.7807307642400976, + "eval_loss": 0.290680855512619, + "eval_pr_auc": 0.6088679721523397, + "eval_precision": 0.6641414141414141, + "eval_precision_macro": 0.7947837752525253, + "eval_pred_class_0": 16896, + "eval_pred_class_1": 2772, + "eval_predicted_binding_ratio": 0.14093959731543623, + "eval_recall": 0.5936794582392777, + "eval_recall_macro": 0.7687417029229828, + "eval_runtime": 0.2486, + "eval_samples_per_second": 655.642, + "eval_steps_per_second": 4.022, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1586 + }, + { + "epoch": 62.0, + "eval_accuracy": 0.8887533048606874, + "eval_auc": 0.9076136113046634, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6273841961852861, + "eval_f1_macro": 0.7810002501366307, + "eval_loss": 0.29004454612731934, + "eval_pr_auc": 0.6100991712198425, + "eval_precision": 0.664741970407795, + "eval_precision_macro": 0.7951158511564335, + "eval_pred_class_0": 16897, + "eval_pred_class_1": 2771, + "eval_predicted_binding_ratio": 0.1408887533048607, + "eval_recall": 0.5940019348597226, + "eval_recall_macro": 0.7689633021917374, + "eval_runtime": 0.2604, + "eval_samples_per_second": 625.871, + "eval_steps_per_second": 3.84, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1612 + }, + { + "epoch": 63.0, + "eval_accuracy": 0.8888041488712629, + "eval_auc": 0.9081136281710841, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6286296484971982, + "eval_f1_macro": 0.781621309135136, + "eval_loss": 0.28950682282447815, + "eval_pr_auc": 0.6111782063777282, + "eval_precision": 0.6639167862266858, + "eval_precision_macro": 0.794932326762632, + "eval_pred_class_0": 16880, + "eval_pred_class_1": 2788, + "eval_predicted_binding_ratio": 0.14175310148464512, + "eval_recall": 0.5969042244437278, + "eval_recall_macro": 0.7701730031496119, + "eval_runtime": 0.2489, + "eval_samples_per_second": 654.907, + "eval_steps_per_second": 4.018, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1638 + }, + { + "epoch": 64.0, + "eval_accuracy": 0.889363432987594, + "eval_auc": 0.9086206913667498, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6299319727891156, + "eval_f1_macro": 0.7824456611913058, + "eval_loss": 0.2888965606689453, + "eval_pr_auc": 0.6126297306007413, + "eval_precision": 0.6664267722202231, + "eval_precision_macro": 0.7962366556938643, + "eval_pred_class_0": 16889, + "eval_pred_class_1": 2779, + "eval_predicted_binding_ratio": 0.14129550538946511, + "eval_recall": 0.5972267010641729, + "eval_recall_macro": 0.7706360462524945, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.362, + "eval_steps_per_second": 3.935, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1664 + }, + { + "epoch": 65.0, + "eval_accuracy": 0.8900244051250763, + "eval_auc": 0.9091278518874051, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6305721605465414, + "eval_f1_macro": 0.782984177701663, + "eval_loss": 0.2884848117828369, + "eval_pr_auc": 0.6142560104629078, + "eval_precision": 0.6702977487291213, + "eval_precision_macro": 0.7980494301171916, + "eval_pred_class_0": 16914, + "eval_pred_class_1": 2754, + "eval_predicted_binding_ratio": 0.14002440512507627, + "eval_recall": 0.5952918413415027, + "eval_recall_macro": 0.7702420454972136, + "eval_runtime": 0.265, + "eval_samples_per_second": 614.992, + "eval_steps_per_second": 3.773, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1690 + }, + { + "epoch": 66.0, + "eval_accuracy": 0.8905836892414074, + "eval_auc": 0.9094545718773954, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6327645051194539, + "eval_f1_macro": 0.78423982216183, + "eval_loss": 0.2880232632160187, + "eval_pr_auc": 0.6147358252333397, + "eval_precision": 0.6719826023921711, + "eval_precision_macro": 0.7991174470355793, + "eval_pred_class_0": 16909, + "eval_pred_class_1": 2759, + "eval_predicted_binding_ratio": 0.14027862517795403, + "eval_recall": 0.5978716543050628, + "eval_recall_macro": 0.7716224934167917, + "eval_runtime": 0.2505, + "eval_samples_per_second": 650.665, + "eval_steps_per_second": 3.992, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1716 + }, + { + "epoch": 67.0, + "eval_accuracy": 0.8908887533048607, + "eval_auc": 0.9099424231201196, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6360244233378561, + "eval_f1_macro": 0.7859248910947654, + "eval_loss": 0.28752708435058594, + "eval_pr_auc": 0.6159928290925853, + "eval_precision": 0.6708407871198568, + "eval_precision_macro": 0.7990901618287602, + "eval_pred_class_0": 16873, + "eval_pred_class_1": 2795, + "eval_predicted_binding_ratio": 0.14210900955867398, + "eval_recall": 0.6046436633344082, + "eval_recall_macro": 0.7745557907424743, + "eval_runtime": 0.2471, + "eval_samples_per_second": 659.527, + "eval_steps_per_second": 4.046, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1742 + }, + { + "epoch": 68.0, + "eval_accuracy": 0.8910412853365873, + "eval_auc": 0.910346516476819, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6362247496180614, + "eval_f1_macro": 0.7860747010162366, + "eval_loss": 0.2870826721191406, + "eval_pr_auc": 0.6168347475575285, + "eval_precision": 0.6716845878136201, + "eval_precision_macro": 0.7994932004123201, + "eval_pred_class_0": 16878, + "eval_pred_class_1": 2790, + "eval_predicted_binding_ratio": 0.14185478950579622, + "eval_recall": 0.6043211867139633, + "eval_recall_macro": 0.7745152743493158, + "eval_runtime": 0.266, + "eval_samples_per_second": 612.866, + "eval_steps_per_second": 3.76, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1768 + }, + { + "epoch": 69.0, + "eval_accuracy": 0.8910921293471629, + "eval_auc": 0.9107640601470772, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6352179836512262, + "eval_f1_macro": 0.7856044496310159, + "eval_loss": 0.28665614128112793, + "eval_pr_auc": 0.6181373929491851, + "eval_precision": 0.673042223024179, + "eval_precision_macro": 0.7999465716529429, + "eval_pred_class_0": 16897, + "eval_pred_class_1": 2771, + "eval_predicted_binding_ratio": 0.1408887533048607, + "eval_recall": 0.6014188971299581, + "eval_recall_macro": 0.7733659343499732, + "eval_runtime": 0.2549, + "eval_samples_per_second": 639.359, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1794 + }, + { + "epoch": 70.0, + "eval_accuracy": 0.8910412853365873, + "eval_auc": 0.9111344401273891, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6354822248681748, + "eval_f1_macro": 0.7857149295725039, + "eval_loss": 0.28624698519706726, + "eval_pr_auc": 0.6190938884927122, + "eval_precision": 0.6724262059035278, + "eval_precision_macro": 0.7997122148522967, + "eval_pred_class_0": 16890, + "eval_pred_class_1": 2778, + "eval_predicted_binding_ratio": 0.14124466137888958, + "eval_recall": 0.6023863269912931, + "eval_recall_macro": 0.7737289273635768, + "eval_runtime": 0.2673, + "eval_samples_per_second": 609.866, + "eval_steps_per_second": 3.742, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1820 + }, + { + "epoch": 71.0, + "eval_accuracy": 0.891193817368314, + "eval_auc": 0.9114138601724477, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.63642541624193, + "eval_f1_macro": 0.7862246662674524, + "eval_loss": 0.2858646512031555, + "eval_pr_auc": 0.6197061363545492, + "eval_precision": 0.6725314183123878, + "eval_precision_macro": 0.7998977650704271, + "eval_pred_class_0": 16883, + "eval_pred_class_1": 2785, + "eval_predicted_binding_ratio": 0.14160056945291843, + "eval_recall": 0.6039987100935182, + "eval_recall_macro": 0.7744747579561573, + "eval_runtime": 0.2386, + "eval_samples_per_second": 683.037, + "eval_steps_per_second": 4.19, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1846 + }, + { + "epoch": 72.0, + "eval_accuracy": 0.8913971934106162, + "eval_auc": 0.9118766988928523, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6373514431239389, + "eval_f1_macro": 0.7867436519572335, + "eval_loss": 0.2853938341140747, + "eval_pr_auc": 0.6212208808374569, + "eval_precision": 0.6730010756543564, + "eval_precision_macro": 0.8002424656665053, + "eval_pred_class_0": 16879, + "eval_pred_class_1": 2789, + "eval_predicted_binding_ratio": 0.14180394549522066, + "eval_recall": 0.6052886165752983, + "eval_recall_macro": 0.7751197111970474, + "eval_runtime": 0.249, + "eval_samples_per_second": 654.683, + "eval_steps_per_second": 4.016, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1872 + }, + { + "epoch": 73.0, + "eval_accuracy": 0.891193817368314, + "eval_auc": 0.9121406831945651, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.637411047102677, + "eval_f1_macro": 0.7867021736679862, + "eval_loss": 0.2850610911846161, + "eval_pr_auc": 0.6219405042066507, + "eval_precision": 0.6715458764726884, + "eval_precision_macro": 0.79960764506032, + "eval_pred_class_0": 16867, + "eval_pred_class_1": 2801, + "eval_predicted_binding_ratio": 0.14241407362212732, + "eval_recall": 0.6065785230570784, + "eval_recall_macro": 0.7755232206038093, + "eval_runtime": 0.2633, + "eval_samples_per_second": 618.95, + "eval_steps_per_second": 3.797, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1898 + }, + { + "epoch": 74.0, + "eval_accuracy": 0.8912446613788896, + "eval_auc": 0.9124869655074592, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.637026981164093, + "eval_f1_macro": 0.7865337040796394, + "eval_loss": 0.284681111574173, + "eval_pr_auc": 0.6229948438184316, + "eval_precision": 0.6722779369627507, + "eval_precision_macro": 0.7998744508231626, + "eval_pred_class_0": 16876, + "eval_pred_class_1": 2792, + "eval_predicted_binding_ratio": 0.14195647752694732, + "eval_recall": 0.6052886165752983, + "eval_recall_macro": 0.7750291697592493, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.745, + "eval_steps_per_second": 3.925, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1924 + }, + { + "epoch": 75.0, + "eval_accuracy": 0.8913971934106162, + "eval_auc": 0.9128360118500571, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6385786802030456, + "eval_f1_macro": 0.7873381643700563, + "eval_loss": 0.2842992842197418, + "eval_pr_auc": 0.6238239183047751, + "eval_precision": 0.6717693129227483, + "eval_precision_macro": 0.7998801484834395, + "eval_pred_class_0": 16859, + "eval_pred_class_1": 2809, + "eval_predicted_binding_ratio": 0.14282082570673174, + "eval_recall": 0.6085133827797484, + "eval_recall_macro": 0.7764302895066123, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.282, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1950 + }, + { + "epoch": 76.0, + "eval_accuracy": 0.8913971934106162, + "eval_auc": 0.9131794425407568, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.638700947225981, + "eval_f1_macro": 0.7873973860112672, + "eval_loss": 0.2839708924293518, + "eval_pr_auc": 0.6248797725776689, + "eval_precision": 0.671647100675916, + "eval_precision_macro": 0.7998444318708524, + "eval_pred_class_0": 16857, + "eval_pred_class_1": 2811, + "eval_predicted_binding_ratio": 0.14292251372788287, + "eval_recall": 0.6088358594001935, + "eval_recall_macro": 0.7765613473375688, + "eval_runtime": 0.2644, + "eval_samples_per_second": 616.603, + "eval_steps_per_second": 3.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1976 + }, + { + "epoch": 76.92307692307692, + "grad_norm": 18483.060546875, + "learning_rate": 9.912189372587507e-07, + "loss": 0.2796, + "step": 2000 + }, + { + "epoch": 77.0, + "eval_accuracy": 0.891651413463494, + "eval_auc": 0.9134005357195656, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6389971201084195, + "eval_f1_macro": 0.7876288504858192, + "eval_loss": 0.28368592262268066, + "eval_pr_auc": 0.6256253637409228, + "eval_precision": 0.6730906495360457, + "eval_precision_macro": 0.8005261145225586, + "eval_pred_class_0": 16866, + "eval_pred_class_1": 2802, + "eval_predicted_binding_ratio": 0.14246491763270286, + "eval_recall": 0.6081909061593035, + "eval_recall_macro": 0.7764501340719858, + "eval_runtime": 0.2557, + "eval_samples_per_second": 637.408, + "eval_steps_per_second": 3.91, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2002 + }, + { + "epoch": 78.0, + "eval_accuracy": 0.8918547895057962, + "eval_auc": 0.9135784263355038, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6399187404773997, + "eval_f1_macro": 0.788145675542478, + "eval_loss": 0.283357173204422, + "eval_pr_auc": 0.6259773419133142, + "eval_precision": 0.6735566642908054, + "eval_precision_macro": 0.8008691873227245, + "eval_pred_class_0": 16862, + "eval_pred_class_1": 2806, + "eval_predicted_binding_ratio": 0.14266829367500508, + "eval_recall": 0.6094808126410836, + "eval_recall_macro": 0.7770950873128759, + "eval_runtime": 0.2408, + "eval_samples_per_second": 676.804, + "eval_steps_per_second": 4.152, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2028 + }, + { + "epoch": 79.0, + "eval_accuracy": 0.8920073215375229, + "eval_auc": 0.9139080660751812, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6396335256192739, + "eval_f1_macro": 0.788060288914535, + "eval_loss": 0.28298139572143555, + "eval_pr_auc": 0.6270064177031266, + "eval_precision": 0.6749015395631937, + "eval_precision_macro": 0.8014211401519672, + "eval_pred_class_0": 16875, + "eval_pred_class_1": 2793, + "eval_predicted_binding_ratio": 0.14200732153752288, + "eval_recall": 0.6078684295388584, + "eval_recall_macro": 0.7765303395958915, + "eval_runtime": 0.255, + "eval_samples_per_second": 639.262, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2054 + }, + { + "epoch": 80.0, + "eval_accuracy": 0.8923123856009763, + "eval_auc": 0.9140997087121456, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6410169491525424, + "eval_f1_macro": 0.7888360257187523, + "eval_loss": 0.28268861770629883, + "eval_pr_auc": 0.6271691682976167, + "eval_precision": 0.6755984280100036, + "eval_precision_macro": 0.8019346102940528, + "eval_pred_class_0": 16869, + "eval_pred_class_1": 2799, + "eval_predicted_binding_ratio": 0.1423123856009762, + "eval_recall": 0.6098032892615285, + "eval_recall_macro": 0.7774977694572265, + "eval_runtime": 0.2121, + "eval_samples_per_second": 768.576, + "eval_steps_per_second": 4.715, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2080 + }, + { + "epoch": 81.0, + "eval_accuracy": 0.8924140736221273, + "eval_auc": 0.9143275951752264, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6415989159891599, + "eval_f1_macro": 0.7891531311221224, + "eval_loss": 0.28239867091178894, + "eval_pr_auc": 0.6278526459152028, + "eval_precision": 0.6757046022119158, + "eval_precision_macro": 0.8020681327098713, + "eval_pred_class_0": 16865, + "eval_pred_class_1": 2803, + "eval_predicted_binding_ratio": 0.14251576164327842, + "eval_recall": 0.6107707191228636, + "eval_recall_macro": 0.7779513039086281, + "eval_runtime": 0.2642, + "eval_samples_per_second": 617.069, + "eval_steps_per_second": 3.786, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2106 + }, + { + "epoch": 82.0, + "eval_accuracy": 0.8925666056538539, + "eval_auc": 0.9145931950717662, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.641317263622475, + "eval_f1_macro": 0.7890694555517069, + "eval_loss": 0.2821619510650635, + "eval_pr_auc": 0.6287354538303637, + "eval_precision": 0.6770609318996416, + "eval_precision_macro": 0.8026257379014738, + "eval_pred_class_0": 16878, + "eval_pred_class_1": 2790, + "eval_predicted_binding_ratio": 0.14185478950579622, + "eval_recall": 0.6091583360206385, + "eval_recall_macro": 0.7773865561916435, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.835, + "eval_steps_per_second": 3.772, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2132 + }, + { + "epoch": 83.0, + "eval_accuracy": 0.8929733577384584, + "eval_auc": 0.9148203126674294, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6442453946256549, + "eval_f1_macro": 0.7906286370477088, + "eval_loss": 0.2817782461643219, + "eval_pr_auc": 0.6293872239214393, + "eval_precision": 0.6768465909090909, + "eval_precision_macro": 0.8029675631972466, + "eval_pred_class_0": 16852, + "eval_pred_class_1": 2816, + "eval_predicted_binding_ratio": 0.14317673378076062, + "eval_recall": 0.6146404385682038, + "eval_recall_macro": 0.7798559831520322, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.502, + "eval_steps_per_second": 3.88, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2158 + }, + { + "epoch": 84.0, + "eval_accuracy": 0.8929225137278829, + "eval_auc": 0.9150136584917113, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6435341909275558, + "eval_f1_macro": 0.7902665569930348, + "eval_loss": 0.2815438210964203, + "eval_pr_auc": 0.6300492382313454, + "eval_precision": 0.677235482721767, + "eval_precision_macro": 0.8030326633702543, + "eval_pred_class_0": 16861, + "eval_pred_class_1": 2807, + "eval_predicted_binding_ratio": 0.14271913768558064, + "eval_recall": 0.6130280554659787, + "eval_recall_macro": 0.7791705135179836, + "eval_runtime": 0.2597, + "eval_samples_per_second": 627.685, + "eval_steps_per_second": 3.851, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2184 + }, + { + "epoch": 85.0, + "eval_accuracy": 0.8928716697173072, + "eval_auc": 0.9151760160393141, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6437869822485207, + "eval_f1_macro": 0.7903713942390684, + "eval_loss": 0.28126296401023865, + "eval_pr_auc": 0.6304146488380505, + "eval_precision": 0.6766169154228856, + "eval_precision_macro": 0.8027975997548746, + "eval_pred_class_0": 16854, + "eval_pred_class_1": 2814, + "eval_predicted_binding_ratio": 0.14307504575960953, + "eval_recall": 0.6139954853273137, + "eval_recall_macro": 0.7795335065315872, + "eval_runtime": 0.2553, + "eval_samples_per_second": 638.417, + "eval_steps_per_second": 3.917, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2210 + }, + { + "epoch": 86.0, + "eval_accuracy": 0.8930750457596095, + "eval_auc": 0.9154795142867925, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.643861134631668, + "eval_f1_macro": 0.7904777241463208, + "eval_loss": 0.2809857428073883, + "eval_pr_auc": 0.6313964494387146, + "eval_precision": 0.677960057061341, + "eval_precision_macro": 0.803401280902587, + "eval_pred_class_0": 16864, + "eval_pred_class_1": 2804, + "eval_predicted_binding_ratio": 0.14256660565385398, + "eval_recall": 0.6130280554659787, + "eval_recall_macro": 0.7792610549557817, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.65, + "eval_steps_per_second": 3.924, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2236 + }, + { + "epoch": 87.0, + "eval_accuracy": 0.8930750457596095, + "eval_auc": 0.9156233995513745, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6458999831621485, + "eval_f1_macro": 0.7914651276711422, + "eval_loss": 0.28072693943977356, + "eval_pr_auc": 0.631527672626228, + "eval_precision": 0.6758280479210712, + "eval_precision_macro": 0.8027684505796682, + "eval_pred_class_0": 16830, + "eval_pred_class_1": 2838, + "eval_predicted_binding_ratio": 0.14429530201342283, + "eval_recall": 0.618510158013544, + "eval_recall_macro": 0.7814890380820421, + "eval_runtime": 0.2629, + "eval_samples_per_second": 620.026, + "eval_steps_per_second": 3.804, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2262 + }, + { + "epoch": 88.0, + "eval_accuracy": 0.8932275777913362, + "eval_auc": 0.9158623713307676, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6449103821440649, + "eval_f1_macro": 0.7910387587819241, + "eval_loss": 0.28049618005752563, + "eval_pr_auc": 0.6324229662687507, + "eval_precision": 0.6779239246356203, + "eval_precision_macro": 0.8035422055690709, + "eval_pred_class_0": 16855, + "eval_pred_class_1": 2813, + "eval_predicted_binding_ratio": 0.14302420174903396, + "eval_recall": 0.6149629151886489, + "eval_recall_macro": 0.7801379433793187, + "eval_runtime": 0.2576, + "eval_samples_per_second": 632.715, + "eval_steps_per_second": 3.882, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2288 + }, + { + "epoch": 89.0, + "eval_accuracy": 0.893125889770185, + "eval_auc": 0.9160425393514616, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6452919338508268, + "eval_f1_macro": 0.7911883195144587, + "eval_loss": 0.28025364875793457, + "eval_pr_auc": 0.6329798450144843, + "eval_precision": 0.6768141592920354, + "eval_precision_macro": 0.8031105172758937, + "eval_pred_class_0": 16843, + "eval_pred_class_1": 2825, + "eval_predicted_binding_ratio": 0.1436343298759406, + "eval_recall": 0.6165752982908739, + "eval_recall_macro": 0.7807328715755691, + "eval_runtime": 0.2595, + "eval_samples_per_second": 628.097, + "eval_steps_per_second": 3.853, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2314 + }, + { + "epoch": 90.0, + "eval_accuracy": 0.8936343298759406, + "eval_auc": 0.9161711835226769, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6463826910074375, + "eval_f1_macro": 0.7918927219250234, + "eval_loss": 0.2800801396369934, + "eval_pr_auc": 0.6332605675535015, + "eval_precision": 0.6792184724689165, + "eval_precision_macro": 0.8043336176502299, + "eval_pred_class_0": 16853, + "eval_pred_class_1": 2815, + "eval_predicted_binding_ratio": 0.14312588977018506, + "eval_recall": 0.6165752982908739, + "eval_recall_macro": 0.7810346763682292, + "eval_runtime": 0.2564, + "eval_samples_per_second": 635.634, + "eval_steps_per_second": 3.9, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2340 + }, + { + "epoch": 91.0, + "eval_accuracy": 0.8934309538336384, + "eval_auc": 0.9163414730569294, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6463044211947351, + "eval_f1_macro": 0.7917843566614202, + "eval_loss": 0.2798333764076233, + "eval_pr_auc": 0.633657166273441, + "eval_precision": 0.6778761061946903, + "eval_precision_macro": 0.8037305484960271, + "eval_pred_class_0": 16843, + "eval_pred_class_1": 2825, + "eval_predicted_binding_ratio": 0.1436343298759406, + "eval_recall": 0.617542728152209, + "eval_recall_macro": 0.7813071279440347, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.372, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2366 + }, + { + "epoch": 92.0, + "eval_accuracy": 0.8937868619076673, + "eval_auc": 0.9165959292421633, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6469494676356261, + "eval_f1_macro": 0.7922200583338069, + "eval_loss": 0.27958908677101135, + "eval_pr_auc": 0.6345494466448222, + "eval_precision": 0.6796875, + "eval_precision_macro": 0.8046253782933777, + "eval_pred_class_0": 16852, + "eval_pred_class_1": 2816, + "eval_predicted_binding_ratio": 0.14317673378076062, + "eval_recall": 0.617220251531764, + "eval_recall_macro": 0.7813873334679403, + "eval_runtime": 0.2594, + "eval_samples_per_second": 628.29, + "eval_steps_per_second": 3.855, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2392 + }, + { + "epoch": 93.0, + "eval_accuracy": 0.8938885499288184, + "eval_auc": 0.9168139566838005, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6482386650935446, + "eval_f1_macro": 0.7928796235385993, + "eval_loss": 0.27935075759887695, + "eval_pr_auc": 0.6350396647674293, + "eval_precision": 0.6790254237288136, + "eval_precision_macro": 0.8045281549625298, + "eval_pred_class_0": 16836, + "eval_pred_class_1": 2832, + "eval_predicted_binding_ratio": 0.14399023794996949, + "eval_recall": 0.6201225411157691, + "eval_recall_macro": 0.7826272149050808, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.307, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2418 + }, + { + "epoch": 94.0, + "eval_accuracy": 0.8941936139922717, + "eval_auc": 0.916949978089225, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6492499578628013, + "eval_f1_macro": 0.7934750822155368, + "eval_loss": 0.27906060218811035, + "eval_pr_auc": 0.6354743744677446, + "eval_precision": 0.6800847457627118, + "eval_precision_macro": 0.8051469107763429, + "eval_pred_class_0": 16836, + "eval_pred_class_1": 2832, + "eval_predicted_binding_ratio": 0.14399023794996949, + "eval_recall": 0.6210899709771042, + "eval_recall_macro": 0.7832014712735463, + "eval_runtime": 0.255, + "eval_samples_per_second": 639.231, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2444 + }, + { + "epoch": 95.0, + "eval_accuracy": 0.894498678055725, + "eval_auc": 0.9171692902207247, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6496707749451291, + "eval_f1_macro": 0.7937845988573549, + "eval_loss": 0.2788851261138916, + "eval_pr_auc": 0.6362118552671664, + "eval_precision": 0.6817859673990078, + "eval_precision_macro": 0.8059588747122072, + "eval_pred_class_0": 16846, + "eval_pred_class_1": 2822, + "eval_predicted_binding_ratio": 0.14348179784421394, + "eval_recall": 0.6204450177362141, + "eval_recall_macro": 0.7831204384872295, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.948, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2470 + }, + { + "epoch": 96.0, + "eval_accuracy": 0.8946003660768761, + "eval_auc": 0.9172832383185145, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.64977192093259, + "eval_f1_macro": 0.7938688135051675, + "eval_loss": 0.27873092889785767, + "eval_pr_auc": 0.6365172336600542, + "eval_precision": 0.6823988644428672, + "eval_precision_macro": 0.8062439426071903, + "eval_pred_class_0": 16850, + "eval_pred_class_1": 2818, + "eval_predicted_binding_ratio": 0.14327842180191175, + "eval_recall": 0.6201225411157691, + "eval_recall_macro": 0.783049741614805, + "eval_runtime": 0.2642, + "eval_samples_per_second": 617.058, + "eval_steps_per_second": 3.786, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2496 + }, + { + "epoch": 96.15384615384616, + "grad_norm": 12855.328125, + "learning_rate": 9.74310718484651e-07, + "loss": 0.268, + "step": 2500 + }, + { + "epoch": 97.0, + "eval_accuracy": 0.8948037421191783, + "eval_auc": 0.9174495472606937, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6506837751139625, + "eval_f1_macro": 0.7943808843546348, + "eval_loss": 0.27859047055244446, + "eval_pr_auc": 0.636938752781715, + "eval_precision": 0.6828490432317506, + "eval_precision_macro": 0.8065794545376371, + "eval_pred_class_0": 16846, + "eval_pred_class_1": 2822, + "eval_predicted_binding_ratio": 0.14348179784421394, + "eval_recall": 0.6214124475975492, + "eval_recall_macro": 0.783694694855695, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.744, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2522 + }, + { + "epoch": 98.0, + "eval_accuracy": 0.8951596501932072, + "eval_auc": 0.9175816853990344, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6521592442645074, + "eval_f1_macro": 0.7952187504847441, + "eval_loss": 0.2782803475856781, + "eval_pr_auc": 0.6372336473067074, + "eval_precision": 0.6837637071100107, + "eval_precision_macro": 0.8072045778587877, + "eval_pred_class_0": 16841, + "eval_pred_class_1": 2827, + "eval_predicted_binding_ratio": 0.14373601789709173, + "eval_recall": 0.6233473073202193, + "eval_recall_macro": 0.784692305196296, + "eval_runtime": 0.2192, + "eval_samples_per_second": 743.723, + "eval_steps_per_second": 4.563, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2548 + }, + { + "epoch": 99.0, + "eval_accuracy": 0.8950579621720561, + "eval_auc": 0.917689375499995, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6515867656988521, + "eval_f1_macro": 0.7949062764205981, + "eval_loss": 0.2782030701637268, + "eval_pr_auc": 0.6376582660543189, + "eval_precision": 0.683669854764435, + "eval_precision_macro": 0.8070768389286704, + "eval_pred_class_0": 16845, + "eval_pred_class_1": 2823, + "eval_predicted_binding_ratio": 0.1435326418547895, + "eval_recall": 0.6223798774588842, + "eval_recall_macro": 0.7842387707448946, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.364, + "eval_steps_per_second": 3.935, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2574 + }, + { + "epoch": 100.0, + "eval_accuracy": 0.8950579621720561, + "eval_auc": 0.917862049496492, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6513513513513514, + "eval_f1_macro": 0.7947922665303561, + "eval_loss": 0.2779688835144043, + "eval_pr_auc": 0.6381115995039711, + "eval_precision": 0.6839304717985101, + "eval_precision_macro": 0.8071560484103832, + "eval_pred_class_0": 16849, + "eval_pred_class_1": 2819, + "eval_predicted_binding_ratio": 0.14332926581248728, + "eval_recall": 0.6217349242179941, + "eval_recall_macro": 0.7839766550829814, + "eval_runtime": 0.219, + "eval_samples_per_second": 744.395, + "eval_steps_per_second": 4.567, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2600 + }, + { + "epoch": 101.0, + "eval_accuracy": 0.8951088061826317, + "eval_auc": 0.9180189763096767, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6513435862768294, + "eval_f1_macro": 0.7948061179237165, + "eval_loss": 0.27778077125549316, + "eval_pr_auc": 0.6385730633658938, + "eval_precision": 0.6843039772727273, + "eval_precision_macro": 0.8073193278245905, + "eval_pred_class_0": 16852, + "eval_pred_class_1": 2816, + "eval_predicted_binding_ratio": 0.14317673378076062, + "eval_recall": 0.6214124475975492, + "eval_recall_macro": 0.783875777731291, + "eval_runtime": 0.2529, + "eval_samples_per_second": 644.591, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2626 + }, + { + "epoch": 102.0, + "eval_accuracy": 0.895413870246085, + "eval_auc": 0.9182058500221522, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6518869521069555, + "eval_f1_macro": 0.7951749356520059, + "eval_loss": 0.2776651084423065, + "eval_pr_auc": 0.6390385071928153, + "eval_precision": 0.6858974358974359, + "eval_precision_macro": 0.8081029290993704, + "eval_pred_class_0": 16860, + "eval_pred_class_1": 2808, + "eval_predicted_binding_ratio": 0.1427699816961562, + "eval_recall": 0.6210899709771042, + "eval_recall_macro": 0.7839258027759306, + "eval_runtime": 0.2557, + "eval_samples_per_second": 637.358, + "eval_steps_per_second": 3.91, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2652 + }, + { + "epoch": 103.0, + "eval_accuracy": 0.8952104942037828, + "eval_auc": 0.9183596527031714, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6527379949452401, + "eval_f1_macro": 0.7955166277830898, + "eval_loss": 0.2773846685886383, + "eval_pr_auc": 0.6394513455183966, + "eval_precision": 0.6834862385321101, + "eval_precision_macro": 0.8071702310636076, + "eval_pred_class_0": 16834, + "eval_pred_class_1": 2834, + "eval_predicted_binding_ratio": 0.1440919259711206, + "eval_recall": 0.6246372138019993, + "eval_recall_macro": 0.785246716999388, + "eval_runtime": 0.2264, + "eval_samples_per_second": 719.926, + "eval_steps_per_second": 4.417, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2678 + }, + { + "epoch": 104.0, + "eval_accuracy": 0.8953630262355095, + "eval_auc": 0.9185293680199856, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6535353535353535, + "eval_f1_macro": 0.7959556034654849, + "eval_loss": 0.2772791385650635, + "eval_pr_auc": 0.6398373166129732, + "eval_precision": 0.6836914406481155, + "eval_precision_macro": 0.8073814027769664, + "eval_pred_class_0": 16829, + "eval_pred_class_1": 2839, + "eval_predicted_binding_ratio": 0.14434614602399837, + "eval_recall": 0.6259271202837794, + "eval_recall_macro": 0.7858614897610121, + "eval_runtime": 0.2593, + "eval_samples_per_second": 628.649, + "eval_steps_per_second": 3.857, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2704 + }, + { + "epoch": 105.0, + "eval_accuracy": 0.8953630262355095, + "eval_auc": 0.9186085224340037, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6536519690339953, + "eval_f1_macro": 0.7960120658489734, + "eval_loss": 0.27707138657569885, + "eval_pr_auc": 0.6400560131071933, + "eval_precision": 0.6835621260119676, + "eval_precision_macro": 0.8073423632971826, + "eval_pred_class_0": 16827, + "eval_pred_class_1": 2841, + "eval_predicted_binding_ratio": 0.1444478340451495, + "eval_recall": 0.6262495969042244, + "eval_recall_macro": 0.7859925475919686, + "eval_runtime": 0.2509, + "eval_samples_per_second": 649.533, + "eval_steps_per_second": 3.985, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2730 + }, + { + "epoch": 106.0, + "eval_accuracy": 0.8953630262355095, + "eval_auc": 0.9187703544266627, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6535353535353535, + "eval_f1_macro": 0.7959556034654849, + "eval_loss": 0.27683117985725403, + "eval_pr_auc": 0.6407556071793965, + "eval_precision": 0.6836914406481155, + "eval_precision_macro": 0.8073814027769664, + "eval_pred_class_0": 16829, + "eval_pred_class_1": 2839, + "eval_predicted_binding_ratio": 0.14434614602399837, + "eval_recall": 0.6259271202837794, + "eval_recall_macro": 0.7858614897610121, + "eval_runtime": 0.3724, + "eval_samples_per_second": 437.667, + "eval_steps_per_second": 2.685, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2756 + }, + { + "epoch": 107.0, + "eval_accuracy": 0.8955664022778117, + "eval_auc": 0.918983612943811, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6543251430494783, + "eval_f1_macro": 0.7964085438550979, + "eval_loss": 0.2766495645046234, + "eval_pr_auc": 0.6413517959683596, + "eval_precision": 0.6842661034846885, + "eval_precision_macro": 0.8077537803332993, + "eval_pred_class_0": 16827, + "eval_pred_class_1": 2841, + "eval_predicted_binding_ratio": 0.1444478340451495, + "eval_recall": 0.6268945501451145, + "eval_recall_macro": 0.7863753851709456, + "eval_runtime": 0.2579, + "eval_samples_per_second": 631.95, + "eval_steps_per_second": 3.877, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2782 + }, + { + "epoch": 108.0, + "eval_accuracy": 0.8957189343095383, + "eval_auc": 0.9190842761805244, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.655583543240974, + "eval_f1_macro": 0.7970707027489733, + "eval_loss": 0.27643415331840515, + "eval_pr_auc": 0.6415079341486267, + "eval_precision": 0.6839523475823406, + "eval_precision_macro": 0.8078082185158045, + "eval_pred_class_0": 16814, + "eval_pred_class_1": 2854, + "eval_predicted_binding_ratio": 0.1451088061826317, + "eval_recall": 0.6294743631086747, + "eval_recall_macro": 0.7875143892563956, + "eval_runtime": 0.2441, + "eval_samples_per_second": 667.885, + "eval_steps_per_second": 4.097, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2808 + }, + { + "epoch": 109.0, + "eval_accuracy": 0.8958206223306895, + "eval_auc": 0.9192001707781057, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6554565327055658, + "eval_f1_macro": 0.7970445082288499, + "eval_loss": 0.2763550579547882, + "eval_pr_auc": 0.6419306315808602, + "eval_precision": 0.6848208011243851, + "eval_precision_macro": 0.8081695255176081, + "eval_pred_class_0": 16822, + "eval_pred_class_1": 2846, + "eval_predicted_binding_ratio": 0.14470205409802725, + "eval_recall": 0.6285069332473395, + "eval_recall_macro": 0.7871815767220581, + "eval_runtime": 0.2526, + "eval_samples_per_second": 645.285, + "eval_steps_per_second": 3.959, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2834 + }, + { + "epoch": 110.0, + "eval_accuracy": 0.8958206223306895, + "eval_auc": 0.9193565525713485, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.655919395465995, + "eval_f1_macro": 0.7972685860227431, + "eval_loss": 0.27626872062683105, + "eval_pr_auc": 0.6423732230660918, + "eval_precision": 0.684302733006307, + "eval_precision_macro": 0.8080131483516131, + "eval_pred_class_0": 16814, + "eval_pred_class_1": 2854, + "eval_predicted_binding_ratio": 0.1451088061826317, + "eval_recall": 0.6297968397291196, + "eval_recall_macro": 0.7877058080458841, + "eval_runtime": 0.2566, + "eval_samples_per_second": 635.337, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2860 + }, + { + "epoch": 111.0, + "eval_accuracy": 0.8959223103518406, + "eval_auc": 0.9193956188221624, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.656485987581809, + "eval_f1_macro": 0.7975781647169913, + "eval_loss": 0.2761881351470947, + "eval_pr_auc": 0.6423085789727141, + "eval_precision": 0.6843946815955213, + "eval_precision_macro": 0.8081402319339891, + "eval_pred_class_0": 16810, + "eval_pred_class_1": 2858, + "eval_predicted_binding_ratio": 0.1453121822249339, + "eval_recall": 0.6307642695904547, + "eval_recall_macro": 0.7881593424972857, + "eval_runtime": 0.2618, + "eval_samples_per_second": 622.648, + "eval_steps_per_second": 3.82, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2886 + }, + { + "epoch": 112.0, + "eval_accuracy": 0.8960239983729916, + "eval_auc": 0.9196281671522437, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6564757265244414, + "eval_f1_macro": 0.7976085010119736, + "eval_loss": 0.2759994864463806, + "eval_pr_auc": 0.6431408853405497, + "eval_precision": 0.685133239831697, + "eval_precision_macro": 0.808462195558094, + "eval_pred_class_0": 16816, + "eval_pred_class_1": 2852, + "eval_predicted_binding_ratio": 0.14500711816148057, + "eval_recall": 0.6301193163495646, + "eval_recall_macro": 0.7879575877939047, + "eval_runtime": 0.2423, + "eval_samples_per_second": 672.612, + "eval_steps_per_second": 4.126, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2912 + }, + { + "epoch": 113.0, + "eval_accuracy": 0.8960239983729916, + "eval_auc": 0.9197686265771928, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6561291407432319, + "eval_f1_macro": 0.797440712214738, + "eval_loss": 0.27580633759498596, + "eval_pr_auc": 0.6436439478836922, + "eval_precision": 0.685523541813071, + "eval_precision_macro": 0.8085803418255701, + "eval_pred_class_0": 16822, + "eval_pred_class_1": 2846, + "eval_predicted_binding_ratio": 0.14470205409802725, + "eval_recall": 0.6291518864882296, + "eval_recall_macro": 0.7875644143010352, + "eval_runtime": 0.2575, + "eval_samples_per_second": 632.943, + "eval_steps_per_second": 3.883, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2938 + }, + { + "epoch": 114.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9199272079152, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6573731944910984, + "eval_f1_macro": 0.7981312081136818, + "eval_loss": 0.27558717131614685, + "eval_pr_auc": 0.6441453864761489, + "eval_precision": 0.6859446196985629, + "eval_precision_macro": 0.8089550633431857, + "eval_pred_class_0": 16815, + "eval_pred_class_1": 2853, + "eval_predicted_binding_ratio": 0.14505796217205613, + "eval_recall": 0.6310867462108997, + "eval_recall_macro": 0.7885016636831041, + "eval_runtime": 0.259, + "eval_samples_per_second": 629.272, + "eval_steps_per_second": 3.861, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2964 + }, + { + "epoch": 115.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9200543727465736, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6571428571428571, + "eval_f1_macro": 0.7980197003020941, + "eval_loss": 0.2754935324192047, + "eval_pr_auc": 0.6445859889828064, + "eval_precision": 0.6862056862056862, + "eval_precision_macro": 0.8090342302245508, + "eval_pred_class_0": 16819, + "eval_pred_class_1": 2849, + "eval_predicted_binding_ratio": 0.1448545861297539, + "eval_recall": 0.6304417929700097, + "eval_recall_macro": 0.7882395480211912, + "eval_runtime": 0.2588, + "eval_samples_per_second": 629.903, + "eval_steps_per_second": 3.864, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2990 + }, + { + "epoch": 115.38461538461539, + "grad_norm": 13551.1435546875, + "learning_rate": 9.488660254357756e-07, + "loss": 0.2594, + "step": 3000 + }, + { + "epoch": 116.0, + "eval_accuracy": 0.8964815944681717, + "eval_auc": 0.9201159794649721, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6579301075268817, + "eval_f1_macro": 0.7984714041109127, + "eval_loss": 0.2753925323486328, + "eval_pr_auc": 0.6447630589609926, + "eval_precision": 0.6867765696246931, + "eval_precision_macro": 0.8094048157037065, + "eval_pred_class_0": 16817, + "eval_pred_class_1": 2851, + "eval_predicted_binding_ratio": 0.14495627415090503, + "eval_recall": 0.6314092228313447, + "eval_recall_macro": 0.7887534434311247, + "eval_runtime": 0.2556, + "eval_samples_per_second": 637.638, + "eval_steps_per_second": 3.912, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3016 + }, + { + "epoch": 117.0, + "eval_accuracy": 0.8964815944681717, + "eval_auc": 0.920259670079575, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6579301075268817, + "eval_f1_macro": 0.7984714041109127, + "eval_loss": 0.27515658736228943, + "eval_pr_auc": 0.6451647424161069, + "eval_precision": 0.6867765696246931, + "eval_precision_macro": 0.8094048157037065, + "eval_pred_class_0": 16817, + "eval_pred_class_1": 2851, + "eval_predicted_binding_ratio": 0.14495627415090503, + "eval_recall": 0.6314092228313447, + "eval_recall_macro": 0.7887534434311247, + "eval_runtime": 0.2523, + "eval_samples_per_second": 645.985, + "eval_steps_per_second": 3.963, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3042 + }, + { + "epoch": 118.0, + "eval_accuracy": 0.8963799064470206, + "eval_auc": 0.920327525062304, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6583975863224941, + "eval_f1_macro": 0.7986623832121911, + "eval_loss": 0.2750197649002075, + "eval_pr_auc": 0.6454382852682906, + "eval_precision": 0.6855148342059337, + "eval_precision_macro": 0.8089241730394068, + "eval_pred_class_0": 16803, + "eval_pred_class_1": 2865, + "eval_predicted_binding_ratio": 0.14566809029896277, + "eval_recall": 0.6333440825540149, + "eval_recall_macro": 0.7894794294583318, + "eval_runtime": 0.2545, + "eval_samples_per_second": 640.495, + "eval_steps_per_second": 3.929, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3068 + }, + { + "epoch": 119.0, + "eval_accuracy": 0.8963799064470206, + "eval_auc": 0.9205032745284716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6585120643431636, + "eval_f1_macro": 0.7987177919414212, + "eval_loss": 0.27491119503974915, + "eval_pr_auc": 0.6460050422984182, + "eval_precision": 0.6853854202999651, + "eval_precision_macro": 0.8088851986923312, + "eval_pred_class_0": 16801, + "eval_pred_class_1": 2867, + "eval_predicted_binding_ratio": 0.1457697783201139, + "eval_recall": 0.6336665591744598, + "eval_recall_macro": 0.7896104872892882, + "eval_runtime": 0.2295, + "eval_samples_per_second": 710.293, + "eval_steps_per_second": 4.358, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3094 + }, + { + "epoch": 120.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9205394599595942, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6587487453997992, + "eval_f1_macro": 0.7987969999557303, + "eval_loss": 0.2747833728790283, + "eval_pr_auc": 0.6459576422229408, + "eval_precision": 0.6843934654153633, + "eval_precision_macro": 0.8084881983738124, + "eval_pred_class_0": 16791, + "eval_pred_class_1": 2877, + "eval_predicted_binding_ratio": 0.14627821842586944, + "eval_recall": 0.63495646565624, + "eval_recall_macro": 0.7900743576545822, + "eval_runtime": 0.2633, + "eval_samples_per_second": 619.079, + "eval_steps_per_second": 3.798, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3120 + }, + { + "epoch": 121.0, + "eval_accuracy": 0.8966341264998983, + "eval_auc": 0.9206969317927203, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6591785414920369, + "eval_f1_macro": 0.7991286912009045, + "eval_loss": 0.2746541202068329, + "eval_pr_auc": 0.6466379382676535, + "eval_precision": 0.6864525139664804, + "eval_precision_macro": 0.8094545359644352, + "eval_pred_class_0": 16804, + "eval_pred_class_1": 2864, + "eval_predicted_binding_ratio": 0.14561724628838724, + "eval_recall": 0.6339890357949048, + "eval_recall_macro": 0.7898924475165747, + "eval_runtime": 0.2553, + "eval_samples_per_second": 638.354, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3146 + }, + { + "epoch": 122.0, + "eval_accuracy": 0.8966341264998983, + "eval_auc": 0.9208301697034432, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6592927769398358, + "eval_f1_macro": 0.7991839832435101, + "eval_loss": 0.2745382785797119, + "eval_pr_auc": 0.6470920919458031, + "eval_precision": 0.6863224005582693, + "eval_precision_macro": 0.809415217658018, + "eval_pred_class_0": 16802, + "eval_pred_class_1": 2866, + "eval_predicted_binding_ratio": 0.14571893430953833, + "eval_recall": 0.6343115124153499, + "eval_recall_macro": 0.7900235053475313, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.372, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3172 + }, + { + "epoch": 123.0, + "eval_accuracy": 0.8967358145210494, + "eval_auc": 0.9209352222971863, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6597420003350645, + "eval_f1_macro": 0.7994367387715422, + "eval_loss": 0.2744734585285187, + "eval_pr_auc": 0.6474021950727136, + "eval_precision": 0.6865411436541143, + "eval_precision_macro": 0.809580095636581, + "eval_pred_class_0": 16800, + "eval_pred_class_1": 2868, + "eval_predicted_binding_ratio": 0.14582062233068943, + "eval_recall": 0.63495646565624, + "eval_recall_macro": 0.7903459819679763, + "eval_runtime": 0.2539, + "eval_samples_per_second": 641.926, + "eval_steps_per_second": 3.938, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3198 + }, + { + "epoch": 124.0, + "eval_accuracy": 0.896786658531625, + "eval_auc": 0.9210344159265571, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6599664991624791, + "eval_f1_macro": 0.7995630613656908, + "eval_loss": 0.2742863893508911, + "eval_pr_auc": 0.6479185906925482, + "eval_precision": 0.6866504008365284, + "eval_precision_macro": 0.8096624823993346, + "eval_pred_class_0": 16799, + "eval_pred_class_1": 2869, + "eval_predicted_binding_ratio": 0.145871466341265, + "eval_recall": 0.6352789422766849, + "eval_recall_macro": 0.7905072202781989, + "eval_runtime": 0.2336, + "eval_samples_per_second": 697.863, + "eval_steps_per_second": 4.281, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3224 + }, + { + "epoch": 125.0, + "eval_accuracy": 0.896888346552776, + "eval_auc": 0.9211382130279347, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6603015075376885, + "eval_f1_macro": 0.7997605361820792, + "eval_loss": 0.27425193786621094, + "eval_pr_auc": 0.6481518613470144, + "eval_precision": 0.6869989543394911, + "eval_precision_macro": 0.8098665228272252, + "eval_pred_class_0": 16799, + "eval_pred_class_1": 2869, + "eval_predicted_binding_ratio": 0.145871466341265, + "eval_recall": 0.63560141889713, + "eval_recall_macro": 0.7906986390676873, + "eval_runtime": 0.2655, + "eval_samples_per_second": 613.835, + "eval_steps_per_second": 3.766, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3250 + }, + { + "epoch": 126.0, + "eval_accuracy": 0.896888346552776, + "eval_auc": 0.9212811737051158, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6598456893659845, + "eval_f1_macro": 0.7995399118610351, + "eval_loss": 0.2741680145263672, + "eval_pr_auc": 0.6487724280436702, + "eval_precision": 0.6875218455085634, + "eval_precision_macro": 0.8100249793973471, + "eval_pred_class_0": 16807, + "eval_pred_class_1": 2861, + "eval_predicted_binding_ratio": 0.14546471425666058, + "eval_recall": 0.6343115124153499, + "eval_recall_macro": 0.7901744077438613, + "eval_runtime": 0.2775, + "eval_samples_per_second": 587.468, + "eval_steps_per_second": 3.604, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3276 + }, + { + "epoch": 127.0, + "eval_accuracy": 0.8966849705104739, + "eval_auc": 0.9212962201485035, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6608811748998665, + "eval_f1_macro": 0.7999703379297798, + "eval_loss": 0.2739817500114441, + "eval_pr_auc": 0.648761125236648, + "eval_precision": 0.6848841231407817, + "eval_precision_macro": 0.809033228048307, + "eval_pred_class_0": 16777, + "eval_pred_class_1": 2891, + "eval_predicted_binding_ratio": 0.1469900345739272, + "eval_recall": 0.6385037084811351, + "eval_recall_macro": 0.7917574376292318, + "eval_runtime": 0.2571, + "eval_samples_per_second": 634.003, + "eval_steps_per_second": 3.89, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3302 + }, + { + "epoch": 128.0, + "eval_accuracy": 0.8971425666056538, + "eval_auc": 0.9213870048987755, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6608549874266555, + "eval_f1_macro": 0.8001167448595325, + "eval_loss": 0.2739529609680176, + "eval_pr_auc": 0.6489216955933252, + "eval_precision": 0.6881983240223464, + "eval_precision_macro": 0.8104762150937725, + "eval_pred_class_0": 16804, + "eval_pred_class_1": 2864, + "eval_predicted_binding_ratio": 0.14561724628838724, + "eval_recall": 0.63560141889713, + "eval_recall_macro": 0.7908495414640173, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.828, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3328 + }, + { + "epoch": 129.0, + "eval_accuracy": 0.8970408785845028, + "eval_auc": 0.9215601460552225, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6604058359885964, + "eval_f1_macro": 0.7998640212813866, + "eval_loss": 0.27383002638816833, + "eval_pr_auc": 0.6495277773578755, + "eval_precision": 0.6879804332634522, + "eval_precision_macro": 0.8103117684584547, + "eval_pred_class_0": 16806, + "eval_pred_class_1": 2862, + "eval_predicted_binding_ratio": 0.1455155582672361, + "eval_recall": 0.63495646565624, + "eval_recall_macro": 0.7905270648435724, + "eval_runtime": 0.2665, + "eval_samples_per_second": 611.602, + "eval_steps_per_second": 3.752, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3354 + }, + { + "epoch": 130.0, + "eval_accuracy": 0.8969900345739272, + "eval_auc": 0.9216559138449605, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6610906657745065, + "eval_f1_macro": 0.8001778048579948, + "eval_loss": 0.27363157272338867, + "eval_pr_auc": 0.6497825902519792, + "eval_precision": 0.6868265554396942, + "eval_precision_macro": 0.8099131883862756, + "eval_pred_class_0": 16791, + "eval_pred_class_1": 2877, + "eval_predicted_binding_ratio": 0.14627821842586944, + "eval_recall": 0.6372138019993551, + "eval_recall_macro": 0.7914142891810019, + "eval_runtime": 0.2147, + "eval_samples_per_second": 759.086, + "eval_steps_per_second": 4.657, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3380 + }, + { + "epoch": 131.0, + "eval_accuracy": 0.8970917225950783, + "eval_auc": 0.9217394576160085, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6619906479625919, + "eval_f1_macro": 0.800648676506185, + "eval_loss": 0.27347350120544434, + "eval_pr_auc": 0.6501280761818352, + "eval_precision": 0.686525805334257, + "eval_precision_macro": 0.8099216238398834, + "eval_pred_class_0": 16781, + "eval_pred_class_1": 2887, + "eval_predicted_binding_ratio": 0.14678665853162498, + "eval_recall": 0.6391486617220251, + "eval_recall_macro": 0.7922609971252728, + "eval_runtime": 0.2669, + "eval_samples_per_second": 610.642, + "eval_steps_per_second": 3.746, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3406 + }, + { + "epoch": 132.0, + "eval_accuracy": 0.8970917225950783, + "eval_auc": 0.9218276924515536, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6609715242881072, + "eval_f1_macro": 0.8001554858148563, + "eval_loss": 0.2734222412109375, + "eval_pr_auc": 0.6505561229387223, + "eval_precision": 0.6876960613454165, + "eval_precision_macro": 0.8102746036830064, + "eval_pred_class_0": 16799, + "eval_pred_class_1": 2869, + "eval_predicted_binding_ratio": 0.145871466341265, + "eval_recall": 0.63624637213802, + "eval_recall_macro": 0.7910814766466644, + "eval_runtime": 0.2569, + "eval_samples_per_second": 634.527, + "eval_steps_per_second": 3.893, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3432 + }, + { + "epoch": 133.0, + "eval_accuracy": 0.8976001627008339, + "eval_auc": 0.9219583415175538, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6620805369127517, + "eval_f1_macro": 0.8008688878235859, + "eval_loss": 0.273334801197052, + "eval_pr_auc": 0.6510448354697362, + "eval_precision": 0.6901014340678558, + "eval_precision_macro": 0.8114972635268781, + "eval_pred_class_0": 16809, + "eval_pred_class_1": 2859, + "eval_predicted_binding_ratio": 0.14536302623550945, + "eval_recall": 0.63624637213802, + "eval_recall_macro": 0.7913832814393245, + "eval_runtime": 0.2542, + "eval_samples_per_second": 641.276, + "eval_steps_per_second": 3.934, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3458 + }, + { + "epoch": 134.0, + "eval_accuracy": 0.8976001627008339, + "eval_auc": 0.92201595791138, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6632107023411371, + "eval_f1_macro": 0.8014158800109571, + "eval_loss": 0.2731546461582184, + "eval_pr_auc": 0.6511084632800272, + "eval_precision": 0.6887808266759291, + "eval_precision_macro": 0.8110948031169865, + "eval_pred_class_0": 16789, + "eval_pred_class_1": 2879, + "eval_predicted_binding_ratio": 0.14637990644702054, + "eval_recall": 0.6394711383424702, + "eval_recall_macro": 0.7926938597488895, + "eval_runtime": 0.2551, + "eval_samples_per_second": 638.927, + "eval_steps_per_second": 3.92, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3484 + }, + { + "epoch": 134.6153846153846, + "grad_norm": 16295.5498046875, + "learning_rate": 9.153428025759045e-07, + "loss": 0.2515, + "step": 3500 + }, + { + "epoch": 135.0, + "eval_accuracy": 0.8977526947325605, + "eval_auc": 0.9221653809678686, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.663543583737661, + "eval_f1_macro": 0.8016300010477627, + "eval_loss": 0.2730526030063629, + "eval_pr_auc": 0.6517474669432921, + "eval_precision": 0.6894993045897079, + "eval_precision_macro": 0.8114599905511665, + "eval_pred_class_0": 16792, + "eval_pred_class_1": 2876, + "eval_predicted_binding_ratio": 0.14622737441529388, + "eval_recall": 0.6394711383424702, + "eval_recall_macro": 0.7927844011866875, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.318, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3510 + }, + { + "epoch": 136.0, + "eval_accuracy": 0.8976510067114094, + "eval_auc": 0.9222144814251072, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6634342083263668, + "eval_f1_macro": 0.8015417181640828, + "eval_loss": 0.27299538254737854, + "eval_pr_auc": 0.6516584418617962, + "eval_precision": 0.6888888888888889, + "eval_precision_macro": 0.8111766341037249, + "eval_pred_class_0": 16788, + "eval_pred_class_1": 2880, + "eval_predicted_binding_ratio": 0.1464307504575961, + "eval_recall": 0.6397936149629152, + "eval_recall_macro": 0.792855098059112, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.888, + "eval_steps_per_second": 3.901, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3536 + }, + { + "epoch": 137.0, + "eval_accuracy": 0.8978543827537117, + "eval_auc": 0.922406970789481, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6637656903765691, + "eval_f1_macro": 0.8017728364954996, + "eval_loss": 0.27294018864631653, + "eval_pr_auc": 0.6525345183514315, + "eval_precision": 0.6899791231732777, + "eval_precision_macro": 0.8117038643138033, + "eval_pred_class_0": 16794, + "eval_pred_class_1": 2874, + "eval_predicted_binding_ratio": 0.14612568639414278, + "eval_recall": 0.6394711383424702, + "eval_recall_macro": 0.7928447621452195, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.531, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3562 + }, + { + "epoch": 138.0, + "eval_accuracy": 0.8979560707748627, + "eval_auc": 0.9224206449505158, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6643251379829402, + "eval_f1_macro": 0.8020789283763069, + "eval_loss": 0.272890567779541, + "eval_pr_auc": 0.652455646685698, + "eval_precision": 0.6900625434329395, + "eval_precision_macro": 0.8118269834496443, + "eval_pred_class_0": 16790, + "eval_pred_class_1": 2878, + "eval_predicted_binding_ratio": 0.14632906243644497, + "eval_recall": 0.6404385682038052, + "eval_recall_macro": 0.793298296596621, + "eval_runtime": 0.2465, + "eval_samples_per_second": 661.298, + "eval_steps_per_second": 4.057, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3588 + }, + { + "epoch": 139.0, + "eval_accuracy": 0.8980069147854383, + "eval_auc": 0.9225580970332872, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6647727272727273, + "eval_f1_macro": 0.802313204605421, + "eval_loss": 0.2726689577102661, + "eval_pr_auc": 0.6528952820360587, + "eval_precision": 0.6899063475546305, + "eval_precision_macro": 0.8118283599554506, + "eval_pred_class_0": 16785, + "eval_pred_class_1": 2883, + "eval_predicted_binding_ratio": 0.14658328248932276, + "eval_recall": 0.6414059980651403, + "eval_recall_macro": 0.7937216505687565, + "eval_runtime": 0.1981, + "eval_samples_per_second": 822.798, + "eval_steps_per_second": 5.048, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3614 + }, + { + "epoch": 140.0, + "eval_accuracy": 0.898159446817165, + "eval_auc": 0.9226611836622408, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6642078792958928, + "eval_f1_macro": 0.8020928521767887, + "eval_loss": 0.2726409435272217, + "eval_pr_auc": 0.6533512747607447, + "eval_precision": 0.6916899441340782, + "eval_precision_macro": 0.8125195733524473, + "eval_pred_class_0": 16804, + "eval_pred_class_1": 2864, + "eval_predicted_binding_ratio": 0.14561724628838724, + "eval_recall": 0.6388261851015802, + "eval_recall_macro": 0.7927637293589026, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.759, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3640 + }, + { + "epoch": 141.0, + "eval_accuracy": 0.8983119788488916, + "eval_auc": 0.9227806014244446, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.666110183639399, + "eval_f1_macro": 0.8030664874893451, + "eval_loss": 0.2725253105163574, + "eval_pr_auc": 0.6539162157851042, + "eval_precision": 0.6905503634475597, + "eval_precision_macro": 0.8123173177271173, + "eval_pred_class_0": 16779, + "eval_pred_class_1": 2889, + "eval_predicted_binding_ratio": 0.14688834655277608, + "eval_recall": 0.6433408577878104, + "eval_recall_macro": 0.7946890804300916, + "eval_runtime": 0.2669, + "eval_samples_per_second": 610.673, + "eval_steps_per_second": 3.746, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3666 + }, + { + "epoch": 142.0, + "eval_accuracy": 0.898159446817165, + "eval_auc": 0.9228251470721713, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6665556850341269, + "eval_f1_macro": 0.8032289361592369, + "eval_loss": 0.27245020866394043, + "eval_pr_auc": 0.6537875546315605, + "eval_precision": 0.6889194769442533, + "eval_precision_macro": 0.8116772542816959, + "eval_pred_class_0": 16762, + "eval_pred_class_1": 2906, + "eval_predicted_binding_ratio": 0.1477526947325605, + "eval_recall": 0.6455981941309256, + "eval_recall_macro": 0.795515943808989, + "eval_runtime": 0.2562, + "eval_samples_per_second": 636.157, + "eval_steps_per_second": 3.903, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3692 + }, + { + "epoch": 143.0, + "eval_accuracy": 0.8984645108806183, + "eval_auc": 0.9229446329618678, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6663324979114453, + "eval_f1_macro": 0.8032271166958205, + "eval_loss": 0.2723686695098877, + "eval_pr_auc": 0.6543603067705216, + "eval_precision": 0.6914008321775312, + "eval_precision_macro": 0.8127225800544472, + "eval_pred_class_0": 16784, + "eval_pred_class_1": 2884, + "eval_predicted_binding_ratio": 0.14663412649989832, + "eval_recall": 0.6430183811673653, + "eval_recall_macro": 0.7946485640369331, + "eval_runtime": 0.2531, + "eval_samples_per_second": 644.012, + "eval_steps_per_second": 3.951, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3718 + }, + { + "epoch": 144.0, + "eval_accuracy": 0.898159446817165, + "eval_auc": 0.9229539761608667, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6658882402001668, + "eval_f1_macro": 0.8029060288610683, + "eval_loss": 0.2724270820617676, + "eval_pr_auc": 0.6540924505284794, + "eval_precision": 0.6897028334485141, + "eval_precision_macro": 0.8119135366717949, + "eval_pred_class_0": 16774, + "eval_pred_class_1": 2894, + "eval_predicted_binding_ratio": 0.14714256660565386, + "eval_recall": 0.6436633344082554, + "eval_recall_macro": 0.7947295968232501, + "eval_runtime": 0.2578, + "eval_samples_per_second": 632.29, + "eval_steps_per_second": 3.879, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3744 + }, + { + "epoch": 145.0, + "eval_accuracy": 0.8983119788488916, + "eval_auc": 0.923075982767793, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6671105193075899, + "eval_f1_macro": 0.8035504588856721, + "eval_loss": 0.27218085527420044, + "eval_pr_auc": 0.6548951387334131, + "eval_precision": 0.6893704850361198, + "eval_precision_macro": 0.8119604647601695, + "eval_pred_class_0": 16761, + "eval_pred_class_1": 2907, + "eval_predicted_binding_ratio": 0.14780353874313606, + "eval_recall": 0.6462431473718155, + "eval_recall_macro": 0.7958686009087, + "eval_runtime": 0.2047, + "eval_samples_per_second": 796.353, + "eval_steps_per_second": 4.886, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3770 + }, + { + "epoch": 146.0, + "eval_accuracy": 0.8984645108806183, + "eval_auc": 0.9231917800403848, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6671111851975329, + "eval_f1_macro": 0.8036038872863508, + "eval_loss": 0.27211907505989075, + "eval_pr_auc": 0.6553363499797747, + "eval_precision": 0.6904761904761905, + "eval_precision_macro": 0.8124414345344577, + "eval_pred_class_0": 16770, + "eval_pred_class_1": 2898, + "eval_predicted_binding_ratio": 0.14734594264795606, + "eval_recall": 0.6452757175104805, + "eval_recall_macro": 0.7955659688536286, + "eval_runtime": 0.2587, + "eval_samples_per_second": 629.971, + "eval_steps_per_second": 3.865, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3796 + }, + { + "epoch": 147.0, + "eval_accuracy": 0.8985661989017694, + "eval_auc": 0.9232496689441817, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6676661669165417, + "eval_f1_macro": 0.8039077842052783, + "eval_loss": 0.2721000015735626, + "eval_pr_auc": 0.655340056976625, + "eval_precision": 0.6905582356995176, + "eval_precision_macro": 0.812564099359958, + "eval_pred_class_0": 16766, + "eval_pred_class_1": 2902, + "eval_predicted_binding_ratio": 0.14754931869025828, + "eval_recall": 0.6462431473718155, + "eval_recall_macro": 0.7960195033050301, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.767, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3822 + }, + { + "epoch": 148.0, + "eval_accuracy": 0.8985661989017694, + "eval_auc": 0.9233701767462686, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6673336668334167, + "eval_f1_macro": 0.8037469198020228, + "eval_loss": 0.2719952464103699, + "eval_pr_auc": 0.6558934902075464, + "eval_precision": 0.6909530386740331, + "eval_precision_macro": 0.8126837695158862, + "eval_pred_class_0": 16772, + "eval_pred_class_1": 2896, + "eval_predicted_binding_ratio": 0.14724425462680496, + "eval_recall": 0.6452757175104805, + "eval_recall_macro": 0.7956263298121606, + "eval_runtime": 0.2562, + "eval_samples_per_second": 636.275, + "eval_steps_per_second": 3.904, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3848 + }, + { + "epoch": 149.0, + "eval_accuracy": 0.8983628228594671, + "eval_auc": 0.9234171749837325, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6676641729010806, + "eval_f1_macro": 0.8038359878940744, + "eval_loss": 0.27190613746643066, + "eval_pr_auc": 0.6558672612955618, + "eval_precision": 0.6890871654083733, + "eval_precision_macro": 0.8119245066626444, + "eval_pred_class_0": 16754, + "eval_pred_class_1": 2914, + "eval_predicted_binding_ratio": 0.14815944681716495, + "eval_recall": 0.6475330538535956, + "eval_recall_macro": 0.796423012711792, + "eval_runtime": 0.1964, + "eval_samples_per_second": 830.09, + "eval_steps_per_second": 5.093, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3874 + }, + { + "epoch": 150.0, + "eval_accuracy": 0.8986678869229204, + "eval_auc": 0.9235316972989609, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6682204095222241, + "eval_f1_macro": 0.8042113179058208, + "eval_loss": 0.2718164622783661, + "eval_pr_auc": 0.6562427373314145, + "eval_precision": 0.6906400550584997, + "eval_precision_macro": 0.8126866902186664, + "eval_pred_class_0": 16762, + "eval_pred_class_1": 2906, + "eval_predicted_binding_ratio": 0.1477526947325605, + "eval_recall": 0.6472105772331506, + "eval_recall_macro": 0.7964730377564316, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.207, + "eval_steps_per_second": 3.842, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3900 + }, + { + "epoch": 151.0, + "eval_accuracy": 0.8984645108806183, + "eval_auc": 0.923607970893288, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.66744379683597, + "eval_f1_macro": 0.8037648014211952, + "eval_loss": 0.27192452549934387, + "eval_pr_auc": 0.6563118529429329, + "eval_precision": 0.6900826446280992, + "eval_precision_macro": 0.8123224008156005, + "eval_pred_class_0": 16764, + "eval_pred_class_1": 2904, + "eval_predicted_binding_ratio": 0.1476510067114094, + "eval_recall": 0.6462431473718155, + "eval_recall_macro": 0.7959591423464981, + "eval_runtime": 0.1771, + "eval_samples_per_second": 920.181, + "eval_steps_per_second": 5.645, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3926 + }, + { + "epoch": 152.0, + "eval_accuracy": 0.8986170429123449, + "eval_auc": 0.9236869111923289, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6687707641196013, + "eval_f1_macro": 0.8044598207679289, + "eval_loss": 0.2716231048107147, + "eval_pr_auc": 0.6568319718650024, + "eval_precision": 0.6896197327852004, + "eval_precision_macro": 0.812330315374629, + "eval_pred_class_0": 16749, + "eval_pred_class_1": 2919, + "eval_predicted_binding_ratio": 0.1484136668700427, + "eval_recall": 0.6491454369558207, + "eval_recall_macro": 0.7972292042629046, + "eval_runtime": 0.241, + "eval_samples_per_second": 676.369, + "eval_steps_per_second": 4.15, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3952 + }, + { + "epoch": 153.0, + "eval_accuracy": 0.8986678869229204, + "eval_auc": 0.9238590985638783, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6671120761650242, + "eval_f1_macro": 0.8036750821318089, + "eval_loss": 0.2716236114501953, + "eval_pr_auc": 0.657752547087354, + "eval_precision": 0.691961191961192, + "eval_precision_macro": 0.8130882112827054, + "eval_pred_class_0": 16782, + "eval_pred_class_1": 2886, + "eval_predicted_binding_ratio": 0.14673581452104942, + "eval_recall": 0.6439858110287005, + "eval_recall_macro": 0.7951624594468667, + "eval_runtime": 0.2469, + "eval_samples_per_second": 660.315, + "eval_steps_per_second": 4.051, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3978 + }, + { + "epoch": 153.84615384615384, + "grad_norm": 13863.017578125, + "learning_rate": 8.743443888522679e-07, + "loss": 0.244, + "step": 4000 + }, + { + "epoch": 154.0, + "eval_accuracy": 0.8986170429123449, + "eval_auc": 0.9239073328287097, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6674449633088726, + "eval_f1_macro": 0.8038184624582756, + "eval_loss": 0.27161940932273865, + "eval_pr_auc": 0.6577434889769634, + "eval_precision": 0.6911917098445596, + "eval_precision_macro": 0.8128050601926549, + "eval_pred_class_0": 16773, + "eval_pred_class_1": 2895, + "eval_predicted_binding_ratio": 0.1471934106162294, + "eval_recall": 0.6452757175104805, + "eval_recall_macro": 0.7956565102914266, + "eval_runtime": 0.2311, + "eval_samples_per_second": 705.281, + "eval_steps_per_second": 4.327, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4004 + }, + { + "epoch": 155.0, + "eval_accuracy": 0.8983119788488916, + "eval_auc": 0.9240325316952941, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6666666666666666, + "eval_f1_macro": 0.8033357331413487, + "eval_loss": 0.2715211510658264, + "eval_pr_auc": 0.6581299979478261, + "eval_precision": 0.689893066574681, + "eval_precision_macro": 0.812118099868532, + "eval_pred_class_0": 16769, + "eval_pred_class_1": 2899, + "eval_predicted_binding_ratio": 0.14739678665853162, + "eval_recall": 0.6449532408900355, + "eval_recall_macro": 0.7953443695848741, + "eval_runtime": 0.2265, + "eval_samples_per_second": 719.735, + "eval_steps_per_second": 4.416, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4030 + }, + { + "epoch": 156.0, + "eval_accuracy": 0.8987187309334961, + "eval_auc": 0.9240861966945435, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6688829787234043, + "eval_f1_macro": 0.8045495325789891, + "eval_loss": 0.27123013138771057, + "eval_pr_auc": 0.6585643355556502, + "eval_precision": 0.6902229845626072, + "eval_precision_macro": 0.8126098507842583, + "eval_pred_class_0": 16753, + "eval_pred_class_1": 2915, + "eval_predicted_binding_ratio": 0.14821029082774048, + "eval_recall": 0.6488229603353757, + "eval_recall_macro": 0.7971585073904801, + "eval_runtime": 0.2445, + "eval_samples_per_second": 666.601, + "eval_steps_per_second": 4.09, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4056 + }, + { + "epoch": 157.0, + "eval_accuracy": 0.8987695749440716, + "eval_auc": 0.9242226073999265, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.668111351891982, + "eval_f1_macro": 0.8041939607346642, + "eval_loss": 0.2712385952472687, + "eval_pr_auc": 0.6591839305732792, + "eval_precision": 0.6915113871635611, + "eval_precision_macro": 0.8130484783164258, + "eval_pred_class_0": 16770, + "eval_pred_class_1": 2898, + "eval_predicted_binding_ratio": 0.14734594264795606, + "eval_recall": 0.6462431473718155, + "eval_recall_macro": 0.7961402252220942, + "eval_runtime": 0.2571, + "eval_samples_per_second": 634.046, + "eval_steps_per_second": 3.89, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4082 + }, + { + "epoch": 158.0, + "eval_accuracy": 0.8989729509863738, + "eval_auc": 0.9241500613527003, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6699883740242485, + "eval_f1_macro": 0.8051727852411502, + "eval_loss": 0.27118799090385437, + "eval_pr_auc": 0.6587023192472763, + "eval_precision": 0.6907534246575342, + "eval_precision_macro": 0.8130146392454138, + "eval_pred_class_0": 16748, + "eval_pred_class_1": 2920, + "eval_predicted_binding_ratio": 0.14846451088061827, + "eval_recall": 0.6504353434376008, + "eval_recall_macro": 0.7979646989415927, + "eval_runtime": 0.2165, + "eval_samples_per_second": 752.927, + "eval_steps_per_second": 4.619, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4108 + }, + { + "epoch": 159.0, + "eval_accuracy": 0.8990237949969494, + "eval_auc": 0.9243365652302152, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6693306693306693, + "eval_f1_macro": 0.8048723553674049, + "eval_loss": 0.2712218463420868, + "eval_pr_auc": 0.6594296109425748, + "eval_precision": 0.6919104991394148, + "eval_precision_macro": 0.8134133417966358, + "eval_pred_class_0": 16763, + "eval_pred_class_1": 2905, + "eval_predicted_binding_ratio": 0.14770185072198494, + "eval_recall": 0.6481780070944857, + "eval_recall_macro": 0.7970774746041632, + "eval_runtime": 0.256, + "eval_samples_per_second": 636.627, + "eval_steps_per_second": 3.906, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4134 + }, + { + "epoch": 160.0, + "eval_accuracy": 0.8990746390075249, + "eval_auc": 0.9243664342695147, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6709762970329852, + "eval_f1_macro": 0.805686028587357, + "eval_loss": 0.2712063789367676, + "eval_pr_auc": 0.6596905428752633, + "eval_precision": 0.6903137789904502, + "eval_precision_macro": 0.8129807422676916, + "eval_pred_class_0": 16736, + "eval_pred_class_1": 2932, + "eval_predicted_binding_ratio": 0.1490746390075249, + "eval_recall": 0.6526926797807159, + "eval_recall_macro": 0.7989424647168202, + "eval_runtime": 0.1787, + "eval_samples_per_second": 912.327, + "eval_steps_per_second": 5.597, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4160 + }, + { + "epoch": 161.0, + "eval_accuracy": 0.8989729509863738, + "eval_auc": 0.9244822704721024, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6700979578283247, + "eval_f1_macro": 0.8052257867669526, + "eval_loss": 0.2710643708705902, + "eval_pr_auc": 0.6603246471492675, + "eval_precision": 0.6906228610540726, + "eval_precision_macro": 0.8129753502690642, + "eval_pred_class_0": 16746, + "eval_pred_class_1": 2922, + "eval_predicted_binding_ratio": 0.14856619890176936, + "eval_recall": 0.6507578200580458, + "eval_recall_macro": 0.7980957567725492, + "eval_runtime": 0.2165, + "eval_samples_per_second": 752.853, + "eval_steps_per_second": 4.619, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4186 + }, + { + "epoch": 162.0, + "eval_accuracy": 0.8989729509863738, + "eval_auc": 0.9245231275027241, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6700979578283247, + "eval_f1_macro": 0.8052257867669526, + "eval_loss": 0.2710554301738739, + "eval_pr_auc": 0.6603878428843051, + "eval_precision": 0.6906228610540726, + "eval_precision_macro": 0.8129753502690642, + "eval_pred_class_0": 16746, + "eval_pred_class_1": 2922, + "eval_predicted_binding_ratio": 0.14856619890176936, + "eval_recall": 0.6507578200580458, + "eval_recall_macro": 0.7980957567725492, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.523, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4212 + }, + { + "epoch": 163.0, + "eval_accuracy": 0.8993288590604027, + "eval_auc": 0.9246038585815736, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6719681908548708, + "eval_f1_macro": 0.8062543656977057, + "eval_loss": 0.27094030380249023, + "eval_pr_auc": 0.6606722300563197, + "eval_precision": 0.6909710391822828, + "eval_precision_macro": 0.8134231279100321, + "eval_pred_class_0": 16733, + "eval_pred_class_1": 2935, + "eval_predicted_binding_ratio": 0.14922717103925157, + "eval_recall": 0.653982586262496, + "eval_recall_macro": 0.7996175984369762, + "eval_runtime": 0.2218, + "eval_samples_per_second": 735.026, + "eval_steps_per_second": 4.509, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4238 + }, + { + "epoch": 164.0, + "eval_accuracy": 0.8992271710392515, + "eval_auc": 0.9247262642209572, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6713101160862355, + "eval_f1_macro": 0.8059006594362601, + "eval_loss": 0.2709755003452301, + "eval_pr_auc": 0.6610856256039915, + "eval_precision": 0.691020826220553, + "eval_precision_macro": 0.8133400325618567, + "eval_pred_class_0": 16739, + "eval_pred_class_1": 2929, + "eval_predicted_binding_ratio": 0.14892210697579825, + "eval_recall": 0.6526926797807159, + "eval_recall_macro": 0.7990330061546183, + "eval_runtime": 0.2493, + "eval_samples_per_second": 653.878, + "eval_steps_per_second": 4.012, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4264 + }, + { + "epoch": 165.0, + "eval_accuracy": 0.8998881431767338, + "eval_auc": 0.9247699826062725, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6738446248136492, + "eval_f1_macro": 0.8073568599908361, + "eval_loss": 0.2707850933074951, + "eval_pr_auc": 0.6613417671448518, + "eval_precision": 0.6927792915531336, + "eval_precision_macro": 0.8145046350187375, + "eval_pred_class_0": 16732, + "eval_pred_class_1": 2936, + "eval_predicted_binding_ratio": 0.14927801504982713, + "eval_recall": 0.6559174459851661, + "eval_recall_macro": 0.8007359306946413, + "eval_runtime": 0.1724, + "eval_samples_per_second": 945.745, + "eval_steps_per_second": 5.802, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4290 + }, + { + "epoch": 166.0, + "eval_accuracy": 0.9001423632296115, + "eval_auc": 0.9248060123174118, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6753719008264463, + "eval_f1_macro": 0.8081840577256068, + "eval_loss": 0.2708885669708252, + "eval_pr_auc": 0.6612557604235284, + "eval_precision": 0.6927772126144456, + "eval_precision_macro": 0.8147479579430862, + "eval_pred_class_0": 16719, + "eval_pred_class_1": 2949, + "eval_predicted_binding_ratio": 0.14993898718730933, + "eval_recall": 0.6588197355691713, + "eval_recall_macro": 0.8020663535695799, + "eval_runtime": 0.2656, + "eval_samples_per_second": 613.671, + "eval_steps_per_second": 3.765, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4316 + }, + { + "epoch": 167.0, + "eval_accuracy": 0.8999389871873094, + "eval_auc": 0.9248204358808662, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6741721854304635, + "eval_f1_macro": 0.8075329932438238, + "eval_loss": 0.270906925201416, + "eval_pr_auc": 0.6612342465257918, + "eval_precision": 0.692752636951344, + "eval_precision_macro": 0.8145453662370445, + "eval_pred_class_0": 16729, + "eval_pred_class_1": 2939, + "eval_predicted_binding_ratio": 0.1494305470815538, + "eval_recall": 0.6565623992260561, + "eval_recall_macro": 0.8010282268358203, + "eval_runtime": 0.2263, + "eval_samples_per_second": 720.252, + "eval_steps_per_second": 4.419, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4342 + }, + { + "epoch": 168.0, + "eval_accuracy": 0.900091519219036, + "eval_auc": 0.9249249239896699, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6750454770960806, + "eval_f1_macro": 0.8080084845902765, + "eval_loss": 0.2706829011440277, + "eval_pr_auc": 0.6618605064537387, + "eval_precision": 0.6928038017651053, + "eval_precision_macro": 0.8147071275300828, + "eval_pred_class_0": 16722, + "eval_pred_class_1": 2946, + "eval_predicted_binding_ratio": 0.14978645515558267, + "eval_recall": 0.6581747823282812, + "eval_recall_macro": 0.8017740574284009, + "eval_runtime": 0.2625, + "eval_samples_per_second": 620.89, + "eval_steps_per_second": 3.809, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4368 + }, + { + "epoch": 169.0, + "eval_accuracy": 0.8999898311978849, + "eval_auc": 0.9250021027063997, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6742838218248054, + "eval_f1_macro": 0.8076047153692607, + "eval_loss": 0.2705872058868408, + "eval_pr_auc": 0.6621173041985378, + "eval_precision": 0.6929884275017019, + "eval_precision_macro": 0.8146651641393745, + "eval_pred_class_0": 16730, + "eval_pred_class_1": 2938, + "eval_predicted_binding_ratio": 0.14937970307097823, + "eval_recall": 0.6565623992260561, + "eval_recall_macro": 0.8010584073150864, + "eval_runtime": 0.2622, + "eval_samples_per_second": 621.716, + "eval_steps_per_second": 3.814, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4394 + }, + { + "epoch": 170.0, + "eval_accuracy": 0.900091519219036, + "eval_auc": 0.9250495097088196, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6750454770960806, + "eval_f1_macro": 0.8080084845902765, + "eval_loss": 0.2706546485424042, + "eval_pr_auc": 0.6620136434657915, + "eval_precision": 0.6928038017651053, + "eval_precision_macro": 0.8147071275300828, + "eval_pred_class_0": 16722, + "eval_pred_class_1": 2946, + "eval_predicted_binding_ratio": 0.14978645515558267, + "eval_recall": 0.6581747823282812, + "eval_recall_macro": 0.8017740574284009, + "eval_runtime": 0.2696, + "eval_samples_per_second": 604.539, + "eval_steps_per_second": 3.709, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4420 + }, + { + "epoch": 171.0, + "eval_accuracy": 0.8999389871873094, + "eval_auc": 0.9250856659424456, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6748182419035029, + "eval_f1_macro": 0.8078453665953039, + "eval_loss": 0.27060601115226746, + "eval_pr_auc": 0.6621340116082275, + "eval_precision": 0.6919688241274145, + "eval_precision_macro": 0.814310068581025, + "eval_pred_class_0": 16717, + "eval_pred_class_1": 2951, + "eval_predicted_binding_ratio": 0.15004067520846046, + "eval_recall": 0.6584972589487262, + "eval_recall_macro": 0.8018145738215594, + "eval_runtime": 0.2527, + "eval_samples_per_second": 645.143, + "eval_steps_per_second": 3.958, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4446 + }, + { + "epoch": 172.0, + "eval_accuracy": 0.9000406752084604, + "eval_auc": 0.9252334636716082, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6741796486576069, + "eval_f1_macro": 0.8075720776469225, + "eval_loss": 0.27041611075401306, + "eval_pr_auc": 0.6630610344326174, + "eval_precision": 0.6934878963518581, + "eval_precision_macro": 0.8148646532849819, + "eval_pred_class_0": 16735, + "eval_pred_class_1": 2933, + "eval_predicted_binding_ratio": 0.14912548301810047, + "eval_recall": 0.6559174459851661, + "eval_recall_macro": 0.8008264721324394, + "eval_runtime": 0.2332, + "eval_samples_per_second": 698.832, + "eval_steps_per_second": 4.287, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4472 + }, + { + "epoch": 173.0, + "eval_accuracy": 0.8998372991661582, + "eval_auc": 0.9253198298673536, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6740569159497022, + "eval_f1_macro": 0.8074417704823604, + "eval_loss": 0.2705075442790985, + "eval_pr_auc": 0.662984991174244, + "eval_precision": 0.6921508664627931, + "eval_precision_macro": 0.8142667635751932, + "eval_pred_class_0": 16725, + "eval_pred_class_1": 2943, + "eval_predicted_binding_ratio": 0.149633923123856, + "eval_recall": 0.6568848758465011, + "eval_recall_macro": 0.8010989237082449, + "eval_runtime": 0.2134, + "eval_samples_per_second": 763.793, + "eval_steps_per_second": 4.686, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4498 + }, + { + "epoch": 173.07692307692307, + "grad_norm": 15784.1748046875, + "learning_rate": 8.266086590174684e-07, + "loss": 0.2376, + "step": 4500 + }, + { + "epoch": 174.0, + "eval_accuracy": 0.8999898311978849, + "eval_auc": 0.9254431016991443, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6740679370339685, + "eval_f1_macro": 0.8075003208787752, + "eval_loss": 0.2703414559364319, + "eval_pr_auc": 0.6637127837233647, + "eval_precision": 0.6932515337423313, + "eval_precision_macro": 0.8147445669189726, + "eval_pred_class_0": 16734, + "eval_pred_class_1": 2934, + "eval_predicted_binding_ratio": 0.14917632702867603, + "eval_recall": 0.6559174459851661, + "eval_recall_macro": 0.8007962916531735, + "eval_runtime": 0.1979, + "eval_samples_per_second": 823.53, + "eval_steps_per_second": 5.052, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4524 + }, + { + "epoch": 175.0, + "eval_accuracy": 0.9001423632296115, + "eval_auc": 0.9255452150782025, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6746189529489728, + "eval_f1_macro": 0.8078199869849969, + "eval_loss": 0.27038928866386414, + "eval_pr_auc": 0.6639609479782242, + "eval_precision": 0.6936967632027258, + "eval_precision_macro": 0.8150250385068789, + "eval_pred_class_0": 16733, + "eval_pred_class_1": 2935, + "eval_predicted_binding_ratio": 0.14922717103925157, + "eval_recall": 0.6565623992260561, + "eval_recall_macro": 0.8011489487528844, + "eval_runtime": 0.2587, + "eval_samples_per_second": 630.051, + "eval_steps_per_second": 3.865, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4550 + }, + { + "epoch": 176.0, + "eval_accuracy": 0.8999898311978849, + "eval_auc": 0.9255934298780361, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6742838218248054, + "eval_f1_macro": 0.8076047153692607, + "eval_loss": 0.2702932059764862, + "eval_pr_auc": 0.6640830183725597, + "eval_precision": 0.6929884275017019, + "eval_precision_macro": 0.8146651641393745, + "eval_pred_class_0": 16730, + "eval_pred_class_1": 2938, + "eval_predicted_binding_ratio": 0.14937970307097823, + "eval_recall": 0.6565623992260561, + "eval_recall_macro": 0.8010584073150864, + "eval_runtime": 0.1918, + "eval_samples_per_second": 849.843, + "eval_steps_per_second": 5.214, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4576 + }, + { + "epoch": 177.0, + "eval_accuracy": 0.8998881431767338, + "eval_auc": 0.9255964274877148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6745992397950752, + "eval_f1_macro": 0.8077217319600283, + "eval_loss": 0.27021023631095886, + "eval_pr_auc": 0.6640237853140233, + "eval_precision": 0.691864406779661, + "eval_precision_macro": 0.8142298466485935, + "eval_pred_class_0": 16718, + "eval_pred_class_1": 2950, + "eval_predicted_binding_ratio": 0.1499898311978849, + "eval_recall": 0.6581747823282812, + "eval_recall_macro": 0.8016533355113369, + "eval_runtime": 0.2437, + "eval_samples_per_second": 668.962, + "eval_steps_per_second": 4.104, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4602 + }, + { + "epoch": 178.0, + "eval_accuracy": 0.900549115314216, + "eval_auc": 0.9257395146873824, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6756218905472637, + "eval_f1_macro": 0.8084468667292255, + "eval_loss": 0.27011793851852417, + "eval_pr_auc": 0.6647736112265655, + "eval_precision": 0.695459201092523, + "eval_precision_macro": 0.8159475347119822, + "eval_pred_class_0": 16739, + "eval_pred_class_1": 2929, + "eval_predicted_binding_ratio": 0.14892210697579825, + "eval_recall": 0.6568848758465011, + "eval_recall_macro": 0.801521450417969, + "eval_runtime": 0.2294, + "eval_samples_per_second": 710.491, + "eval_steps_per_second": 4.359, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4628 + }, + { + "epoch": 179.0, + "eval_accuracy": 0.9003965832824893, + "eval_auc": 0.9258087711499611, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6755010766937221, + "eval_f1_macro": 0.8083352405901716, + "eval_loss": 0.2701837122440338, + "eval_pr_auc": 0.6649310182754444, + "eval_precision": 0.6944822888283378, + "eval_precision_macro": 0.8155055479522995, + "eval_pred_class_0": 16732, + "eval_pred_class_1": 2936, + "eval_predicted_binding_ratio": 0.14927801504982713, + "eval_recall": 0.6575298290873912, + "eval_recall_macro": 0.8016930246420839, + "eval_runtime": 0.1963, + "eval_samples_per_second": 830.526, + "eval_steps_per_second": 5.095, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4654 + }, + { + "epoch": 180.0, + "eval_accuracy": 0.9004474272930649, + "eval_auc": 0.9258260560681089, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6762566137566137, + "eval_f1_macro": 0.8087183092815753, + "eval_loss": 0.27005937695503235, + "eval_pr_auc": 0.6649666141685525, + "eval_precision": 0.6939260264675942, + "eval_precision_macro": 0.8153859544454471, + "eval_pred_class_0": 16721, + "eval_pred_class_1": 2947, + "eval_predicted_binding_ratio": 0.14983729916615823, + "eval_recall": 0.6594646888100613, + "eval_recall_macro": 0.802509552107089, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.51, + "eval_steps_per_second": 3.85, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4680 + }, + { + "epoch": 181.0, + "eval_accuracy": 0.9005999593247915, + "eval_auc": 0.9259436830505047, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6764851894754261, + "eval_f1_macro": 0.8088820685009666, + "eval_loss": 0.2699625492095947, + "eval_pr_auc": 0.6656768815498022, + "eval_precision": 0.6947654656696125, + "eval_precision_macro": 0.8157852199805673, + "eval_pred_class_0": 16726, + "eval_pred_class_1": 2942, + "eval_predicted_binding_ratio": 0.14958307911328045, + "eval_recall": 0.6591422121896162, + "eval_recall_macro": 0.8024690357139305, + "eval_runtime": 0.1825, + "eval_samples_per_second": 893.316, + "eval_steps_per_second": 5.48, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4706 + }, + { + "epoch": 182.0, + "eval_accuracy": 0.9011083994305471, + "eval_auc": 0.9259346220939755, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6787778695293146, + "eval_f1_macro": 0.8101680579881181, + "eval_loss": 0.2698967456817627, + "eval_pr_auc": 0.6657214869012321, + "eval_precision": 0.6956668923493569, + "eval_precision_macro": 0.8165423129929146, + "eval_pred_class_0": 16714, + "eval_pred_class_1": 2954, + "eval_predicted_binding_ratio": 0.15019320724018712, + "eval_recall": 0.6626894550145115, + "eval_recall_macro": 0.8042124766471122, + "eval_runtime": 0.2487, + "eval_samples_per_second": 655.302, + "eval_steps_per_second": 4.02, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4732 + }, + { + "epoch": 183.0, + "eval_accuracy": 0.9010575554199716, + "eval_auc": 0.9260415822575143, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6782407407407407, + "eval_f1_macro": 0.8098906179070202, + "eval_loss": 0.26987963914871216, + "eval_pr_auc": 0.6659658035928079, + "eval_precision": 0.6959619952494062, + "eval_precision_macro": 0.8165833539431051, + "eval_pred_class_0": 16721, + "eval_pred_class_1": 2947, + "eval_predicted_binding_ratio": 0.14983729916615823, + "eval_recall": 0.6613995485327314, + "eval_recall_macro": 0.8036580648440201, + "eval_runtime": 0.2595, + "eval_samples_per_second": 628.055, + "eval_steps_per_second": 3.853, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4758 + }, + { + "epoch": 184.0, + "eval_accuracy": 0.9010575554199716, + "eval_auc": 0.9260998701937684, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6781343036718491, + "eval_f1_macro": 0.8098391554406106, + "eval_loss": 0.26986023783683777, + "eval_pr_auc": 0.6661468987350531, + "eval_precision": 0.6960950764006791, + "eval_precision_macro": 0.8166237506024205, + "eval_pred_class_0": 16723, + "eval_pred_class_1": 2945, + "eval_predicted_binding_ratio": 0.1497356111450071, + "eval_recall": 0.6610770719122864, + "eval_recall_macro": 0.8035270070130636, + "eval_runtime": 0.237, + "eval_samples_per_second": 687.681, + "eval_steps_per_second": 4.219, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4784 + }, + { + "epoch": 185.0, + "eval_accuracy": 0.9012609314622737, + "eval_auc": 0.9262103340569317, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6787958981144558, + "eval_f1_macro": 0.810230030763446, + "eval_loss": 0.26986950635910034, + "eval_pr_auc": 0.6665273243194801, + "eval_precision": 0.6967741935483871, + "eval_precision_macro": 0.8170231070594294, + "eval_pred_class_0": 16723, + "eval_pred_class_1": 2945, + "eval_predicted_binding_ratio": 0.1497356111450071, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8039098445920405, + "eval_runtime": 0.2365, + "eval_samples_per_second": 689.338, + "eval_steps_per_second": 4.229, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4810 + }, + { + "epoch": 186.0, + "eval_accuracy": 0.9014134634940004, + "eval_auc": 0.9262454683781669, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6797687861271676, + "eval_f1_macro": 0.8107536578092345, + "eval_loss": 0.2697572410106659, + "eval_pr_auc": 0.666716804071224, + "eval_precision": 0.6966824644549763, + "eval_precision_macro": 0.8171398441695726, + "eval_pred_class_0": 16714, + "eval_pred_class_1": 2954, + "eval_predicted_binding_ratio": 0.15019320724018712, + "eval_recall": 0.6636568848758465, + "eval_recall_macro": 0.8047867330155776, + "eval_runtime": 0.1794, + "eval_samples_per_second": 908.74, + "eval_steps_per_second": 5.575, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4836 + }, + { + "epoch": 187.0, + "eval_accuracy": 0.9011083994305471, + "eval_auc": 0.9263210315000697, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6784592494627211, + "eval_f1_macro": 0.810014015033881, + "eval_loss": 0.26983824372291565, + "eval_pr_auc": 0.6669884807739552, + "eval_precision": 0.6960651289009498, + "eval_precision_macro": 0.8166629472255945, + "eval_pred_class_0": 16720, + "eval_pred_class_1": 2948, + "eval_predicted_binding_ratio": 0.14988814317673377, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8038193031542425, + "eval_runtime": 0.1795, + "eval_samples_per_second": 908.044, + "eval_steps_per_second": 5.571, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4862 + }, + { + "epoch": 188.0, + "eval_accuracy": 0.9012100874516982, + "eval_auc": 0.9264191350895575, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6786836447825368, + "eval_f1_macro": 0.8101580079180191, + "eval_loss": 0.26967287063598633, + "eval_pr_auc": 0.667515048707415, + "eval_precision": 0.6965376782077393, + "eval_precision_macro": 0.8169029737767557, + "eval_pred_class_0": 16722, + "eval_pred_class_1": 2946, + "eval_predicted_binding_ratio": 0.14978645515558267, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8038796641127746, + "eval_runtime": 0.1851, + "eval_samples_per_second": 880.782, + "eval_steps_per_second": 5.404, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4888 + }, + { + "epoch": 189.0, + "eval_accuracy": 0.9013626194834249, + "eval_auc": 0.9265750497228504, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6782752902155887, + "eval_f1_macro": 0.8100137635248964, + "eval_loss": 0.2697126567363739, + "eval_pr_auc": 0.6680073523436961, + "eval_precision": 0.698190508706043, + "eval_precision_macro": 0.8175521514197519, + "eval_pred_class_0": 16739, + "eval_pred_class_1": 2929, + "eval_predicted_binding_ratio": 0.14892210697579825, + "eval_recall": 0.6594646888100613, + "eval_recall_macro": 0.8030528007338771, + "eval_runtime": 0.2626, + "eval_samples_per_second": 620.766, + "eval_steps_per_second": 3.808, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4914 + }, + { + "epoch": 190.0, + "eval_accuracy": 0.9017185275574537, + "eval_auc": 0.9265310393625665, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6808651147432723, + "eval_f1_macro": 0.8113902183590456, + "eval_loss": 0.26952171325683594, + "eval_pr_auc": 0.6676611850618266, + "eval_precision": 0.6975642760487145, + "eval_precision_macro": 0.8176966904417818, + "eval_pred_class_0": 16712, + "eval_pred_class_1": 2956, + "eval_predicted_binding_ratio": 0.1502948952613382, + "eval_recall": 0.6649467913576266, + "eval_recall_macro": 0.8054920472149997, + "eval_runtime": 0.2625, + "eval_samples_per_second": 620.883, + "eval_steps_per_second": 3.809, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4940 + }, + { + "epoch": 191.0, + "eval_accuracy": 0.901921903599756, + "eval_auc": 0.9266825451738318, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6812097174020822, + "eval_f1_macro": 0.8116278420268636, + "eval_loss": 0.2694892883300781, + "eval_pr_auc": 0.668268169384226, + "eval_precision": 0.6986440677966101, + "eval_precision_macro": 0.8182178348314311, + "eval_pred_class_0": 16718, + "eval_pred_class_1": 2950, + "eval_predicted_binding_ratio": 0.1499898311978849, + "eval_recall": 0.6646243147371815, + "eval_recall_macro": 0.8054817113011072, + "eval_runtime": 0.2617, + "eval_samples_per_second": 622.763, + "eval_steps_per_second": 3.821, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4966 + }, + { + "epoch": 192.0, + "eval_accuracy": 0.9018710595891803, + "eval_auc": 0.9266840147811743, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6814130075932651, + "eval_f1_macro": 0.8117083668893665, + "eval_loss": 0.26940062642097473, + "eval_pr_auc": 0.6682450098479531, + "eval_precision": 0.6980047345282381, + "eval_precision_macro": 0.817974900326174, + "eval_pred_class_0": 16711, + "eval_pred_class_1": 2957, + "eval_predicted_binding_ratio": 0.15034573927191378, + "eval_recall": 0.6655917445985166, + "eval_recall_macro": 0.8058447043147107, + "eval_runtime": 0.2532, + "eval_samples_per_second": 643.715, + "eval_steps_per_second": 3.949, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4992 + }, + { + "epoch": 192.30769230769232, + "grad_norm": 15858.0107421875, + "learning_rate": 7.72994743624204e-07, + "loss": 0.2316, + "step": 5000 + }, + { + "epoch": 193.0, + "eval_accuracy": 0.9021761236526337, + "eval_auc": 0.9268377687996988, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6816677696889477, + "eval_f1_macro": 0.8119380540142443, + "eval_loss": 0.26932862401008606, + "eval_pr_auc": 0.6692251134414691, + "eval_precision": 0.6999660210669385, + "eval_precision_macro": 0.8188619343002854, + "eval_pred_class_0": 16725, + "eval_pred_class_1": 2943, + "eval_predicted_binding_ratio": 0.149633923123856, + "eval_recall": 0.6643018381167365, + "eval_recall_macro": 0.8055015558664808, + "eval_runtime": 0.1767, + "eval_samples_per_second": 922.594, + "eval_steps_per_second": 5.66, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5018 + }, + { + "epoch": 194.0, + "eval_accuracy": 0.9022269676632093, + "eval_auc": 0.92693509378927, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6815697963238947, + "eval_f1_macro": 0.8119084228038069, + "eval_loss": 0.2693846523761749, + "eval_pr_auc": 0.6695232673057094, + "eval_precision": 0.7004765146358066, + "eval_precision_macro": 0.8190667092007484, + "eval_pred_class_0": 16730, + "eval_pred_class_1": 2938, + "eval_predicted_binding_ratio": 0.14937970307097823, + "eval_recall": 0.6636568848758465, + "eval_recall_macro": 0.8052696206838338, + "eval_runtime": 0.2611, + "eval_samples_per_second": 624.258, + "eval_steps_per_second": 3.83, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5044 + }, + { + "epoch": 195.0, + "eval_accuracy": 0.9022778116737848, + "eval_auc": 0.926939395553809, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6825239511067063, + "eval_f1_macro": 0.8123875088746679, + "eval_loss": 0.269380122423172, + "eval_pr_auc": 0.6693235837806535, + "eval_precision": 0.6996274974602099, + "eval_precision_macro": 0.8188535333546936, + "eval_pred_class_0": 16715, + "eval_pred_class_1": 2953, + "eval_predicted_binding_ratio": 0.15014236322961155, + "eval_recall": 0.6662366978394066, + "eval_recall_macro": 0.8063482638107518, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.818, + "eval_steps_per_second": 3.944, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5070 + }, + { + "epoch": 196.0, + "eval_accuracy": 0.9018202155786048, + "eval_auc": 0.9269277165550606, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6819304892110031, + "eval_f1_macro": 0.8119407443800393, + "eval_loss": 0.2693455219268799, + "eval_pr_auc": 0.6690854783865479, + "eval_precision": 0.696969696969697, + "eval_precision_macro": 0.8176128877709905, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.6675266043211867, + "eval_recall_macro": 0.8066008708211837, + "eval_runtime": 0.2679, + "eval_samples_per_second": 608.505, + "eval_steps_per_second": 3.733, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5096 + }, + { + "epoch": 197.0, + "eval_accuracy": 0.9018202155786048, + "eval_auc": 0.926915803976337, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6826622843056697, + "eval_f1_macro": 0.8122944214527055, + "eval_loss": 0.2691311538219452, + "eval_pr_auc": 0.6692617138980786, + "eval_precision": 0.6960455764075067, + "eval_precision_macro": 0.8173347038115213, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6697839406643018, + "eval_recall_macro": 0.8075182756378791, + "eval_runtime": 0.261, + "eval_samples_per_second": 624.485, + "eval_steps_per_second": 3.831, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5122 + }, + { + "epoch": 198.0, + "eval_accuracy": 0.9023794996949359, + "eval_auc": 0.9270834170733764, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6822244289970208, + "eval_f1_macro": 0.8122781903500151, + "eval_loss": 0.26926350593566895, + "eval_pr_auc": 0.6700425139918407, + "eval_precision": 0.7007820469228153, + "eval_precision_macro": 0.8193035600788525, + "eval_pred_class_0": 16727, + "eval_pred_class_1": 2941, + "eval_predicted_binding_ratio": 0.1495322351027049, + "eval_recall": 0.6646243147371815, + "eval_recall_macro": 0.8057533356145012, + "eval_runtime": 0.1796, + "eval_samples_per_second": 907.821, + "eval_steps_per_second": 5.569, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5148 + }, + { + "epoch": 199.0, + "eval_accuracy": 0.9023286556843604, + "eval_auc": 0.9271362061477199, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6822167080231596, + "eval_f1_macro": 0.812256712426767, + "eval_loss": 0.26931333541870117, + "eval_pr_auc": 0.6701662107301889, + "eval_precision": 0.7004076086956522, + "eval_precision_macro": 0.8191406615590195, + "eval_pred_class_0": 16724, + "eval_pred_class_1": 2944, + "eval_predicted_binding_ratio": 0.14968476713443157, + "eval_recall": 0.6649467913576266, + "eval_recall_macro": 0.8058542129661919, + "eval_runtime": 0.2544, + "eval_samples_per_second": 640.682, + "eval_steps_per_second": 3.931, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5174 + }, + { + "epoch": 200.0, + "eval_accuracy": 0.9019727476103315, + "eval_auc": 0.9271006630615285, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6823723228995058, + "eval_f1_macro": 0.8122076248057319, + "eval_loss": 0.26923447847366333, + "eval_pr_auc": 0.6700054234044599, + "eval_precision": 0.6975412596833951, + "eval_precision_macro": 0.8179304597716335, + "eval_pred_class_0": 16699, + "eval_pred_class_1": 2969, + "eval_predicted_binding_ratio": 0.15095586739882041, + "eval_recall": 0.6678490809416318, + "eval_recall_macro": 0.8068224700899382, + "eval_runtime": 0.2001, + "eval_samples_per_second": 814.446, + "eval_steps_per_second": 4.997, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5200 + }, + { + "epoch": 201.0, + "eval_accuracy": 0.9022269676632093, + "eval_auc": 0.9272154481542286, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6827256228345158, + "eval_f1_macro": 0.8124672441745833, + "eval_loss": 0.2691180408000946, + "eval_pr_auc": 0.6705128874375396, + "eval_precision": 0.6989864864864865, + "eval_precision_macro": 0.8186098340979236, + "eval_pred_class_0": 16708, + "eval_pred_class_1": 2960, + "eval_predicted_binding_ratio": 0.15049827130364044, + "eval_recall": 0.6672041277007417, + "eval_recall_macro": 0.8067112568243553, + "eval_runtime": 0.2326, + "eval_samples_per_second": 700.909, + "eval_steps_per_second": 4.3, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5226 + }, + { + "epoch": 202.0, + "eval_accuracy": 0.9027354077689648, + "eval_auc": 0.9272916730860608, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6847915636842973, + "eval_f1_macro": 0.8136435649304945, + "eval_loss": 0.2689346969127655, + "eval_pr_auc": 0.6709998700001464, + "eval_precision": 0.7001347708894878, + "eval_precision_macro": 0.8194386429297739, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.6701064172847468, + "eval_recall_macro": 0.8081925820956238, + "eval_runtime": 0.1664, + "eval_samples_per_second": 979.381, + "eval_steps_per_second": 6.008, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5252 + }, + { + "epoch": 203.0, + "eval_accuracy": 0.9025828757372382, + "eval_auc": 0.9274415730349983, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6833057851239669, + "eval_f1_macro": 0.8128720237282395, + "eval_loss": 0.269077867269516, + "eval_pr_auc": 0.6716085952371595, + "eval_precision": 0.7009155645981688, + "eval_precision_macro": 0.819534880211639, + "eval_pred_class_0": 16719, + "eval_pred_class_1": 2949, + "eval_predicted_binding_ratio": 0.14993898718730933, + "eval_recall": 0.6665591744598517, + "eval_recall_macro": 0.8066604045173044, + "eval_runtime": 0.2556, + "eval_samples_per_second": 637.756, + "eval_steps_per_second": 3.913, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5278 + }, + { + "epoch": 204.0, + "eval_accuracy": 0.902837095790116, + "eval_auc": 0.9274026625041677, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6848095002474023, + "eval_f1_macro": 0.8136877723940104, + "eval_loss": 0.2691201865673065, + "eval_pr_auc": 0.6711363619469519, + "eval_precision": 0.700877785280216, + "eval_precision_macro": 0.8197612917781423, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.6694614640438569, + "eval_recall_macro": 0.8079908273922429, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.544, + "eval_steps_per_second": 3.868, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5304 + }, + { + "epoch": 205.0, + "eval_accuracy": 0.9029896278218426, + "eval_auc": 0.9274924350745481, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6850445691647409, + "eval_f1_macro": 0.8138546963859644, + "eval_loss": 0.2691231071949005, + "eval_pr_auc": 0.6713794419677425, + "eval_precision": 0.7017247210010146, + "eval_precision_macro": 0.8201640180913157, + "eval_pred_class_0": 16711, + "eval_pred_class_1": 2957, + "eval_predicted_binding_ratio": 0.15034573927191378, + "eval_recall": 0.6691389874234118, + "eval_recall_macro": 0.8079503109990844, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.331, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5330 + }, + { + "epoch": 206.0, + "eval_accuracy": 0.9031421598535693, + "eval_auc": 0.9275284258556915, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6860062633921213, + "eval_f1_macro": 0.8143728753012186, + "eval_loss": 0.26890990138053894, + "eval_pr_auc": 0.6717808840440014, + "eval_precision": 0.7016183412002697, + "eval_precision_macro": 0.8202739053624388, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6710738471460819, + "eval_recall_macro": 0.8088271994226215, + "eval_runtime": 0.2637, + "eval_samples_per_second": 618.148, + "eval_steps_per_second": 3.792, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5356 + }, + { + "epoch": 207.0, + "eval_accuracy": 0.9030913158429937, + "eval_auc": 0.9275997261430513, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6854785478547855, + "eval_f1_macro": 0.8141000144010073, + "eval_loss": 0.2689792513847351, + "eval_pr_auc": 0.6718662790463032, + "eval_precision": 0.7019263264616424, + "eval_precision_macro": 0.8203209943398044, + "eval_pred_class_0": 16709, + "eval_pred_class_1": 2959, + "eval_predicted_binding_ratio": 0.15044742729306487, + "eval_recall": 0.6697839406643018, + "eval_recall_macro": 0.8082727876195295, + "eval_runtime": 0.195, + "eval_samples_per_second": 835.813, + "eval_steps_per_second": 5.128, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5382 + }, + { + "epoch": 208.0, + "eval_accuracy": 0.9032946918852959, + "eval_auc": 0.9276906860783045, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6862421643022105, + "eval_f1_macro": 0.8145402081954642, + "eval_loss": 0.2688303291797638, + "eval_pr_auc": 0.6723694322774509, + "eval_precision": 0.7024653833164471, + "eval_precision_macro": 0.8206766373097469, + "eval_pred_class_0": 16707, + "eval_pred_class_1": 2961, + "eval_predicted_binding_ratio": 0.150549115314216, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.808786683029463, + "eval_runtime": 0.2599, + "eval_samples_per_second": 627.22, + "eval_steps_per_second": 3.848, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5408 + }, + { + "epoch": 209.0, + "eval_accuracy": 0.9031421598535693, + "eval_auc": 0.9277029684919884, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6851760039662865, + "eval_f1_macro": 0.8139715080669648, + "eval_loss": 0.2689387798309326, + "eval_pr_auc": 0.6722283267888528, + "eval_precision": 0.7027118644067797, + "eval_precision_macro": 0.8206106277411336, + "eval_pred_class_0": 16718, + "eval_pred_class_1": 2950, + "eval_predicted_binding_ratio": 0.1499898311978849, + "eval_recall": 0.6684940341825217, + "eval_recall_macro": 0.8077787367749694, + "eval_runtime": 0.2435, + "eval_samples_per_second": 669.46, + "eval_steps_per_second": 4.107, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5434 + }, + { + "epoch": 210.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9277600593308708, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6846370100876468, + "eval_f1_macro": 0.8136754097270521, + "eval_loss": 0.26892563700675964, + "eval_pr_auc": 0.6725269853749476, + "eval_precision": 0.7026476578411406, + "eval_precision_macro": 0.8204961767258567, + "eval_pred_class_0": 16722, + "eval_pred_class_1": 2946, + "eval_predicted_binding_ratio": 0.14978645515558267, + "eval_recall": 0.6675266043211867, + "eval_recall_macro": 0.807325202323568, + "eval_runtime": 0.2601, + "eval_samples_per_second": 626.763, + "eval_steps_per_second": 3.845, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5460 + }, + { + "epoch": 211.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9278453744167288, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.686128979053274, + "eval_f1_macro": 0.8144677293907912, + "eval_loss": 0.26878559589385986, + "eval_pr_auc": 0.6729846306066621, + "eval_precision": 0.7022282241728561, + "eval_precision_macro": 0.8205562286912407, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.808756502550197, + "eval_runtime": 0.2433, + "eval_samples_per_second": 669.879, + "eval_steps_per_second": 4.11, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5486 + }, + { + "epoch": 211.53846153846155, + "grad_norm": 16655.041015625, + "learning_rate": 7.144675667015729e-07, + "loss": 0.2259, + "step": 5500 + }, + { + "epoch": 212.0, + "eval_accuracy": 0.9028879398006915, + "eval_auc": 0.9279623006591996, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6845061116617113, + "eval_f1_macro": 0.8135588667797169, + "eval_loss": 0.26880088448524475, + "eval_pr_auc": 0.6734939312101108, + "eval_precision": 0.7016593294954284, + "eval_precision_macro": 0.8200489288817256, + "eval_pred_class_0": 16715, + "eval_pred_class_1": 2953, + "eval_predicted_binding_ratio": 0.15014236322961155, + "eval_recall": 0.6681715575620768, + "eval_recall_macro": 0.8074967765476829, + "eval_runtime": 0.2166, + "eval_samples_per_second": 752.433, + "eval_steps_per_second": 4.616, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5512 + }, + { + "epoch": 213.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9279055601902797, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6863359156090325, + "eval_f1_macro": 0.8145677594216373, + "eval_loss": 0.2687283456325531, + "eval_pr_auc": 0.6734074239428265, + "eval_precision": 0.7019554956169926, + "eval_precision_macro": 0.820472419105347, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6713963237665269, + "eval_recall_macro": 0.80901861821211, + "eval_runtime": 0.2336, + "eval_samples_per_second": 697.742, + "eval_steps_per_second": 4.281, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5538 + }, + { + "epoch": 214.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9279595755594916, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6864392815949909, + "eval_f1_macro": 0.8146177229810407, + "eval_loss": 0.2687055766582489, + "eval_pr_auc": 0.6734235549479375, + "eval_precision": 0.7018194070080862, + "eval_precision_macro": 0.8204306615878754, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8091496760430665, + "eval_runtime": 0.225, + "eval_samples_per_second": 724.337, + "eval_steps_per_second": 4.444, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5564 + }, + { + "epoch": 215.0, + "eval_accuracy": 0.9031421598535693, + "eval_auc": 0.9280330753916157, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.685383980181668, + "eval_f1_macro": 0.8140720567955604, + "eval_loss": 0.2687467932701111, + "eval_pr_auc": 0.6737104569152422, + "eval_precision": 0.7024373730534867, + "eval_precision_macro": 0.8205258541706347, + "eval_pred_class_0": 16714, + "eval_pred_class_1": 2954, + "eval_predicted_binding_ratio": 0.15019320724018712, + "eval_recall": 0.6691389874234118, + "eval_recall_macro": 0.8080408524368825, + "eval_runtime": 0.1833, + "eval_samples_per_second": 889.114, + "eval_steps_per_second": 5.455, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5590 + }, + { + "epoch": 216.0, + "eval_accuracy": 0.9031930038641448, + "eval_auc": 0.9281060496687963, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6860158311345647, + "eval_f1_macro": 0.8143952682963037, + "eval_loss": 0.26888203620910645, + "eval_pr_auc": 0.6738155361634312, + "eval_precision": 0.7019912251096861, + "eval_precision_macro": 0.8204358998939631, + "eval_pred_class_0": 16705, + "eval_pred_class_1": 2963, + "eval_predicted_binding_ratio": 0.1506508033353671, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.808726322070931, + "eval_runtime": 0.1764, + "eval_samples_per_second": 923.938, + "eval_steps_per_second": 5.668, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5616 + }, + { + "epoch": 217.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9281046287239484, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6865425794761983, + "eval_f1_macro": 0.8146676522813128, + "eval_loss": 0.2686736285686493, + "eval_pr_auc": 0.6740322097472393, + "eval_precision": 0.7016835016835017, + "eval_precision_macro": 0.8203890020095554, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.672041277007417, + "eval_recall_macro": 0.809280733874023, + "eval_runtime": 0.2537, + "eval_samples_per_second": 642.37, + "eval_steps_per_second": 3.941, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5642 + }, + { + "epoch": 218.0, + "eval_accuracy": 0.9033963799064471, + "eval_auc": 0.9282425576991689, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6862615587846763, + "eval_f1_macro": 0.814585106315415, + "eval_loss": 0.26872488856315613, + "eval_pr_auc": 0.6745604450622946, + "eval_precision": 0.7032148900169205, + "eval_precision_macro": 0.8210025266814094, + "eval_pred_class_0": 16713, + "eval_pred_class_1": 2955, + "eval_predicted_binding_ratio": 0.15024405125076265, + "eval_recall": 0.6701064172847468, + "eval_recall_macro": 0.808584928326082, + "eval_runtime": 0.2515, + "eval_samples_per_second": 648.077, + "eval_steps_per_second": 3.976, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5668 + }, + { + "epoch": 219.0, + "eval_accuracy": 0.9032946918852959, + "eval_auc": 0.9283728174652107, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6862421643022105, + "eval_f1_macro": 0.8145402081954642, + "eval_loss": 0.26861947774887085, + "eval_pr_auc": 0.675175157595335, + "eval_precision": 0.7024653833164471, + "eval_precision_macro": 0.8206766373097469, + "eval_pred_class_0": 16707, + "eval_pred_class_1": 2961, + "eval_predicted_binding_ratio": 0.150549115314216, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.808786683029463, + "eval_runtime": 0.269, + "eval_samples_per_second": 605.917, + "eval_steps_per_second": 3.717, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5694 + }, + { + "epoch": 220.0, + "eval_accuracy": 0.9035997559487492, + "eval_auc": 0.9283705984554486, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6875411997363217, + "eval_f1_macro": 0.8152747479984963, + "eval_loss": 0.2684967517852783, + "eval_pr_auc": 0.6752603675091132, + "eval_precision": 0.703067071115605, + "eval_precision_macro": 0.8211461336058236, + "eval_pred_class_0": 16701, + "eval_pred_class_1": 2967, + "eval_predicted_binding_ratio": 0.15085417937766932, + "eval_recall": 0.672686230248307, + "eval_recall_macro": 0.809754112890798, + "eval_runtime": 0.2663, + "eval_samples_per_second": 612.018, + "eval_steps_per_second": 3.755, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5720 + }, + { + "epoch": 221.0, + "eval_accuracy": 0.9037014439699004, + "eval_auc": 0.9285080992007146, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6871489924017179, + "eval_f1_macro": 0.8151206773197821, + "eval_loss": 0.2685548961162567, + "eval_pr_auc": 0.6758164431668767, + "eval_precision": 0.7043684388757196, + "eval_precision_macro": 0.8216427895844347, + "eval_pred_class_0": 16715, + "eval_pred_class_1": 2953, + "eval_predicted_binding_ratio": 0.15014236322961155, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.809028126863591, + "eval_runtime": 0.1986, + "eval_samples_per_second": 820.602, + "eval_steps_per_second": 5.034, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5746 + }, + { + "epoch": 222.0, + "eval_accuracy": 0.9034980679275981, + "eval_auc": 0.9285004105265384, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6870052770448549, + "eval_f1_macro": 0.8149801571567146, + "eval_loss": 0.2685752809047699, + "eval_pr_auc": 0.6755687553750968, + "eval_precision": 0.7030037124535943, + "eval_precision_macro": 0.8210319370409247, + "eval_pred_class_0": 16705, + "eval_pred_class_1": 2963, + "eval_predicted_binding_ratio": 0.1506508033353671, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8093005784393965, + "eval_runtime": 0.2128, + "eval_samples_per_second": 765.976, + "eval_steps_per_second": 4.699, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5772 + }, + { + "epoch": 223.0, + "eval_accuracy": 0.9033963799064471, + "eval_auc": 0.9285113303903685, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6871913072110636, + "eval_f1_macro": 0.815034532807023, + "eval_loss": 0.2685534358024597, + "eval_pr_auc": 0.675465436437485, + "eval_precision": 0.7019845274133871, + "eval_precision_macro": 0.8206238899420935, + "eval_pred_class_0": 16695, + "eval_pred_class_1": 2973, + "eval_predicted_binding_ratio": 0.15115924344112264, + "eval_recall": 0.673008706868752, + "eval_recall_macro": 0.8097644488046905, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.434, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5798 + }, + { + "epoch": 224.0, + "eval_accuracy": 0.9033963799064471, + "eval_auc": 0.9286790602773953, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.685742639761826, + "eval_f1_macro": 0.814334221653217, + "eval_loss": 0.2686038315296173, + "eval_pr_auc": 0.6763975611431872, + "eval_precision": 0.7039049235993209, + "eval_precision_macro": 0.8212163498580232, + "eval_pred_class_0": 16723, + "eval_pred_class_1": 2945, + "eval_predicted_binding_ratio": 0.1497356111450071, + "eval_recall": 0.6684940341825217, + "eval_recall_macro": 0.8079296391712996, + "eval_runtime": 0.2587, + "eval_samples_per_second": 630.051, + "eval_steps_per_second": 3.865, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5824 + }, + { + "epoch": 225.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9286844910118134, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6857801944307135, + "eval_f1_macro": 0.8142280597608222, + "eval_loss": 0.268373966217041, + "eval_pr_auc": 0.6766849702960268, + "eval_precision": 0.7011455525606469, + "eval_precision_macro": 0.8200338541246348, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.6710738471460819, + "eval_recall_macro": 0.8087668384640894, + "eval_runtime": 0.2618, + "eval_samples_per_second": 622.527, + "eval_steps_per_second": 3.819, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5850 + }, + { + "epoch": 226.0, + "eval_accuracy": 0.9031421598535693, + "eval_auc": 0.9286265437130228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6868321551865856, + "eval_f1_macro": 0.8147720454758898, + "eval_loss": 0.268480122089386, + "eval_pr_auc": 0.6762864798501788, + "eval_precision": 0.7005365526492288, + "eval_precision_macro": 0.8199434531195322, + "eval_pred_class_0": 16686, + "eval_pred_class_1": 2982, + "eval_predicted_binding_ratio": 0.15161683953630262, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8098756620702734, + "eval_runtime": 0.1801, + "eval_samples_per_second": 905.249, + "eval_steps_per_second": 5.554, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5876 + }, + { + "epoch": 227.0, + "eval_accuracy": 0.9034472239170226, + "eval_auc": 0.9286911091111043, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6878185106033208, + "eval_f1_macro": 0.8153554406082493, + "eval_loss": 0.2684793770313263, + "eval_pr_auc": 0.6763719243742072, + "eval_precision": 0.7015425888665325, + "eval_precision_macro": 0.8205363669491479, + "eval_pred_class_0": 16686, + "eval_pred_class_1": 2982, + "eval_predicted_binding_ratio": 0.15161683953630262, + "eval_recall": 0.6746210899709771, + "eval_recall_macro": 0.810449918438739, + "eval_runtime": 0.2526, + "eval_samples_per_second": 645.327, + "eval_steps_per_second": 3.959, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5902 + }, + { + "epoch": 228.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9287447935753516, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6870580496628844, + "eval_f1_macro": 0.8149167860703537, + "eval_loss": 0.2684626877307892, + "eval_pr_auc": 0.6765555444970285, + "eval_precision": 0.701006711409396, + "eval_precision_macro": 0.8201821668264622, + "eval_pred_class_0": 16688, + "eval_pred_class_1": 2980, + "eval_predicted_binding_ratio": 0.15151515151515152, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8099360230288054, + "eval_runtime": 0.253, + "eval_samples_per_second": 644.184, + "eval_steps_per_second": 3.952, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5928 + }, + { + "epoch": 229.0, + "eval_accuracy": 0.9037522879804759, + "eval_auc": 0.92885044958403, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6877783275606135, + "eval_f1_macro": 0.815442675636767, + "eval_loss": 0.2684047222137451, + "eval_pr_auc": 0.6771563538797724, + "eval_precision": 0.7039162727886563, + "eval_precision_macro": 0.8215498998326138, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.6723637536278619, + "eval_recall_macro": 0.8097135964976395, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.352, + "eval_steps_per_second": 3.886, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5954 + }, + { + "epoch": 230.0, + "eval_accuracy": 0.9031930038641448, + "eval_auc": 0.9288052907888691, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6866359447004609, + "eval_f1_macro": 0.814695001815053, + "eval_loss": 0.2683703601360321, + "eval_pr_auc": 0.6770692308922137, + "eval_precision": 0.7011764705882353, + "eval_precision_macro": 0.8201862703986524, + "eval_pred_class_0": 16693, + "eval_pred_class_1": 2975, + "eval_predicted_binding_ratio": 0.15126093146227373, + "eval_recall": 0.672686230248307, + "eval_recall_macro": 0.80951266905667, + "eval_runtime": 0.1803, + "eval_samples_per_second": 903.896, + "eval_steps_per_second": 5.545, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5980 + }, + { + "epoch": 230.76923076923077, + "grad_norm": 18226.349609375, + "learning_rate": 6.520804793983146e-07, + "loss": 0.2213, + "step": 6000 + }, + { + "epoch": 231.0, + "eval_accuracy": 0.9036505999593248, + "eval_auc": 0.9289028104284194, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6881685041961494, + "eval_f1_macro": 0.8155957226774667, + "eval_loss": 0.26838722825050354, + "eval_pr_auc": 0.677382862396771, + "eval_precision": 0.7026209677419355, + "eval_precision_macro": 0.821056469972094, + "eval_pred_class_0": 16692, + "eval_pred_class_1": 2976, + "eval_predicted_binding_ratio": 0.1513117754728493, + "eval_recall": 0.6742986133505321, + "eval_recall_macro": 0.8104395825248465, + "eval_runtime": 0.2471, + "eval_samples_per_second": 659.676, + "eval_steps_per_second": 4.047, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6006 + }, + { + "epoch": 232.0, + "eval_accuracy": 0.9036505999593248, + "eval_auc": 0.9289413705892875, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6882710972199375, + "eval_f1_macro": 0.8156453059542872, + "eval_loss": 0.2681979238986969, + "eval_pr_auc": 0.6777962434595076, + "eval_precision": 0.7024848891873741, + "eval_precision_macro": 0.8210147633474318, + "eval_pred_class_0": 16690, + "eval_pred_class_1": 2978, + "eval_predicted_binding_ratio": 0.1514134634940004, + "eval_recall": 0.6746210899709771, + "eval_recall_macro": 0.8105706403558031, + "eval_runtime": 0.2338, + "eval_samples_per_second": 697.187, + "eval_steps_per_second": 4.277, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6032 + }, + { + "epoch": 233.0, + "eval_accuracy": 0.9039556640227782, + "eval_auc": 0.9290857424788171, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6879233437964646, + "eval_f1_macro": 0.8155838213438953, + "eval_loss": 0.2683457136154175, + "eval_pr_auc": 0.6782535995592411, + "eval_precision": 0.7052845528455285, + "eval_precision_macro": 0.8221624965711252, + "eval_pred_class_0": 16716, + "eval_pred_class_1": 2952, + "eval_predicted_binding_ratio": 0.150091519219036, + "eval_recall": 0.6713963237665269, + "eval_recall_macro": 0.8094411449218342, + "eval_runtime": 0.2259, + "eval_samples_per_second": 721.682, + "eval_steps_per_second": 4.427, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6058 + }, + { + "epoch": 234.0, + "eval_accuracy": 0.9040065080333537, + "eval_auc": 0.9291390181781085, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6881400726792204, + "eval_f1_macro": 0.8157063562723066, + "eval_loss": 0.26850852370262146, + "eval_pr_auc": 0.678201898193761, + "eval_precision": 0.7053843548933288, + "eval_precision_macro": 0.8222404873479507, + "eval_pred_class_0": 16715, + "eval_pred_class_1": 2953, + "eval_predicted_binding_ratio": 0.15014236322961155, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8096023832320567, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.187, + "eval_steps_per_second": 3.915, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6084 + }, + { + "epoch": 235.0, + "eval_accuracy": 0.9037014439699004, + "eval_auc": 0.9291422688327602, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.687767886580943, + "eval_f1_macro": 0.8154198615351363, + "eval_loss": 0.26855188608169556, + "eval_pr_auc": 0.6780321638936206, + "eval_precision": 0.7035413153456999, + "eval_precision_macro": 0.8213868942770528, + "eval_pred_class_0": 16703, + "eval_pred_class_1": 2965, + "eval_predicted_binding_ratio": 0.1507524913565182, + "eval_recall": 0.672686230248307, + "eval_recall_macro": 0.80981447384933, + "eval_runtime": 0.1711, + "eval_samples_per_second": 952.835, + "eval_steps_per_second": 5.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6110 + }, + { + "epoch": 236.0, + "eval_accuracy": 0.903853976001627, + "eval_auc": 0.9292183185796111, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6881081972620815, + "eval_f1_macro": 0.8156376648859622, + "eval_loss": 0.2684246003627777, + "eval_pr_auc": 0.6783782374353945, + "eval_precision": 0.7042538825118163, + "eval_precision_macro": 0.8217486340608884, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.672686230248307, + "eval_recall_macro": 0.8099050152871281, + "eval_runtime": 0.2571, + "eval_samples_per_second": 633.985, + "eval_steps_per_second": 3.889, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6136 + }, + { + "epoch": 237.0, + "eval_accuracy": 0.9039048200122025, + "eval_auc": 0.9291723227895398, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6891447368421053, + "eval_f1_macro": 0.8161564434751782, + "eval_loss": 0.2682003080844879, + "eval_pr_auc": 0.678474943261759, + "eval_precision": 0.7032561262168513, + "eval_precision_macro": 0.8214884501897367, + "eval_pred_class_0": 16689, + "eval_pred_class_1": 2979, + "eval_predicted_binding_ratio": 0.15146430750457596, + "eval_recall": 0.6755885198323122, + "eval_recall_macro": 0.8111147162450025, + "eval_runtime": 0.2533, + "eval_samples_per_second": 643.44, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6162 + }, + { + "epoch": 238.0, + "eval_accuracy": 0.9041081960545048, + "eval_auc": 0.9292599542101496, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6891891891891891, + "eval_f1_macro": 0.8162490373023017, + "eval_loss": 0.2683660686016083, + "eval_pr_auc": 0.6787382085049865, + "eval_precision": 0.704752275025278, + "eval_precision_macro": 0.8221384271958916, + "eval_pred_class_0": 16701, + "eval_pred_class_1": 2967, + "eval_predicted_binding_ratio": 0.15085417937766932, + "eval_recall": 0.6742986133505321, + "eval_recall_macro": 0.8107112068382407, + "eval_runtime": 0.251, + "eval_samples_per_second": 649.396, + "eval_steps_per_second": 3.984, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6188 + }, + { + "epoch": 239.0, + "eval_accuracy": 0.9039556640227782, + "eval_auc": 0.9292687329242089, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6886434811274106, + "eval_f1_macro": 0.8159319482645679, + "eval_loss": 0.2683408558368683, + "eval_pr_auc": 0.6788700814485856, + "eval_precision": 0.7043155765340526, + "eval_precision_macro": 0.8218620153057044, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8103585497385295, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.65, + "eval_steps_per_second": 3.826, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6214 + }, + { + "epoch": 240.0, + "eval_accuracy": 0.9038031319910514, + "eval_auc": 0.9293738244479479, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6886109282422647, + "eval_f1_macro": 0.8158628904590758, + "eval_loss": 0.26828694343566895, + "eval_pr_auc": 0.6792947923537326, + "eval_precision": 0.7031932773109244, + "eval_precision_macro": 0.8213743898086402, + "eval_pred_class_0": 16693, + "eval_pred_class_1": 2975, + "eval_predicted_binding_ratio": 0.15126093146227373, + "eval_recall": 0.6746210899709771, + "eval_recall_macro": 0.8106611817936011, + "eval_runtime": 0.1692, + "eval_samples_per_second": 963.308, + "eval_steps_per_second": 5.91, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6240 + }, + { + "epoch": 241.0, + "eval_accuracy": 0.9039048200122025, + "eval_auc": 0.929388987681323, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6891447368421053, + "eval_f1_macro": 0.8161564434751782, + "eval_loss": 0.26832982897758484, + "eval_pr_auc": 0.679377997729221, + "eval_precision": 0.7032561262168513, + "eval_precision_macro": 0.8214884501897367, + "eval_pred_class_0": 16689, + "eval_pred_class_1": 2979, + "eval_predicted_binding_ratio": 0.15146430750457596, + "eval_recall": 0.6755885198323122, + "eval_recall_macro": 0.8111147162450025, + "eval_runtime": 0.2624, + "eval_samples_per_second": 621.159, + "eval_steps_per_second": 3.811, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6266 + }, + { + "epoch": 242.0, + "eval_accuracy": 0.9040065080333537, + "eval_auc": 0.9294373582011398, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6891669410602568, + "eval_f1_macro": 0.8162027357577154, + "eval_loss": 0.26806166768074036, + "eval_pr_auc": 0.6798375856033828, + "eval_precision": 0.7040026908846283, + "eval_precision_macro": 0.8218126661970311, + "eval_pred_class_0": 16695, + "eval_pred_class_1": 2973, + "eval_predicted_binding_ratio": 0.15115924344112264, + "eval_recall": 0.6749435665914221, + "eval_recall_macro": 0.8109129615416215, + "eval_runtime": 0.2508, + "eval_samples_per_second": 649.948, + "eval_steps_per_second": 3.987, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6292 + }, + { + "epoch": 243.0, + "eval_accuracy": 0.9040065080333537, + "eval_auc": 0.929443917905437, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6897798225435425, + "eval_f1_macro": 0.8164989338281623, + "eval_loss": 0.26814863085746765, + "eval_pr_auc": 0.6797263827568155, + "eval_precision": 0.7031825795644892, + "eval_precision_macro": 0.8215607197408852, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6768784263140922, + "eval_recall_macro": 0.8116993085273606, + "eval_runtime": 0.2535, + "eval_samples_per_second": 642.959, + "eval_steps_per_second": 3.945, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6318 + }, + { + "epoch": 244.0, + "eval_accuracy": 0.903853976001627, + "eval_auc": 0.9295684160320964, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6882110469909316, + "eval_f1_macro": 0.8156873815700654, + "eval_loss": 0.26816216111183167, + "eval_pr_auc": 0.6803755613590039, + "eval_precision": 0.7041160593792173, + "eval_precision_macro": 0.8217060181953557, + "eval_pred_class_0": 16704, + "eval_pred_class_1": 2964, + "eval_predicted_binding_ratio": 0.15070164734594266, + "eval_recall": 0.673008706868752, + "eval_recall_macro": 0.8100360731180846, + "eval_runtime": 0.2375, + "eval_samples_per_second": 686.328, + "eval_steps_per_second": 4.211, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6344 + }, + { + "epoch": 245.0, + "eval_accuracy": 0.9041081960545048, + "eval_auc": 0.9295755402213329, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6888815572418344, + "eval_f1_macro": 0.8161003326270482, + "eval_loss": 0.2682454288005829, + "eval_pr_auc": 0.6803390373338819, + "eval_precision": 0.7051671732522796, + "eval_precision_macro": 0.8222669528798059, + "eval_pred_class_0": 16707, + "eval_pred_class_1": 2961, + "eval_predicted_binding_ratio": 0.150549115314216, + "eval_recall": 0.6733311834891971, + "eval_recall_macro": 0.8103180333453712, + "eval_runtime": 0.2519, + "eval_samples_per_second": 647.048, + "eval_steps_per_second": 3.97, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6370 + }, + { + "epoch": 246.0, + "eval_accuracy": 0.903853976001627, + "eval_auc": 0.9296204070415253, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6882110469909316, + "eval_f1_macro": 0.8156873815700654, + "eval_loss": 0.2682053744792938, + "eval_pr_auc": 0.6804463740899893, + "eval_precision": 0.7041160593792173, + "eval_precision_macro": 0.8217060181953557, + "eval_pred_class_0": 16704, + "eval_pred_class_1": 2964, + "eval_predicted_binding_ratio": 0.15070164734594266, + "eval_recall": 0.673008706868752, + "eval_recall_macro": 0.8100360731180846, + "eval_runtime": 0.2411, + "eval_samples_per_second": 676.2, + "eval_steps_per_second": 4.148, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6396 + }, + { + "epoch": 247.0, + "eval_accuracy": 0.9045657921496848, + "eval_auc": 0.9296705294111545, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6909270541742137, + "eval_f1_macro": 0.817248994423186, + "eval_loss": 0.2681281566619873, + "eval_pr_auc": 0.6807214505356617, + "eval_precision": 0.7059219380888291, + "eval_precision_macro": 0.8229238344013863, + "eval_pred_class_0": 16696, + "eval_pred_class_1": 2972, + "eval_predicted_binding_ratio": 0.15110839943054707, + "eval_recall": 0.6765559496936472, + "eval_recall_macro": 0.8119002359683303, + "eval_runtime": 0.1963, + "eval_samples_per_second": 830.371, + "eval_steps_per_second": 5.094, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6422 + }, + { + "epoch": 248.0, + "eval_accuracy": 0.9046166361602603, + "eval_auc": 0.9296871135893773, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6912442396313364, + "eval_f1_macro": 0.8174200753177728, + "eval_loss": 0.26824310421943665, + "eval_pr_auc": 0.6805326752113899, + "eval_precision": 0.7058823529411765, + "eval_precision_macro": 0.8229585490219571, + "eval_pred_class_0": 16693, + "eval_pred_class_1": 2975, + "eval_predicted_binding_ratio": 0.15126093146227373, + "eval_recall": 0.6772009029345373, + "eval_recall_macro": 0.8121925321095093, + "eval_runtime": 0.2645, + "eval_samples_per_second": 616.162, + "eval_steps_per_second": 3.78, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6448 + }, + { + "epoch": 249.0, + "eval_accuracy": 0.9047691681919869, + "eval_auc": 0.929768866580617, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6914841047603361, + "eval_f1_macro": 0.8175893393183914, + "eval_loss": 0.2681940495967865, + "eval_pr_auc": 0.6810052289277716, + "eval_precision": 0.7067340067340068, + "eval_precision_macro": 0.8233634101223034, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.6768784263140922, + "eval_recall_macro": 0.8121520157163508, + "eval_runtime": 0.2396, + "eval_samples_per_second": 680.397, + "eval_steps_per_second": 4.174, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6474 + }, + { + "epoch": 250.0, + "grad_norm": 35924.55078125, + "learning_rate": 5.869563021464528e-07, + "loss": 0.2171, + "step": 6500 + }, + { + "epoch": 250.0, + "eval_accuracy": 0.9048200122025626, + "eval_auc": 0.9298367215633461, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6912928759894459, + "eval_f1_macro": 0.817514675551828, + "eval_loss": 0.2681121826171875, + "eval_pr_auc": 0.681415067318076, + "eval_precision": 0.7073911576105298, + "eval_precision_macro": 0.8236147646777582, + "eval_pred_class_0": 16705, + "eval_pred_class_1": 2963, + "eval_predicted_binding_ratio": 0.1506508033353671, + "eval_recall": 0.6759109964527572, + "eval_recall_macro": 0.8117890227027473, + "eval_runtime": 0.2606, + "eval_samples_per_second": 625.453, + "eval_steps_per_second": 3.837, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6500 + }, + { + "epoch": 251.0, + "eval_accuracy": 0.9046674801708359, + "eval_auc": 0.9298684105799504, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6909510466457887, + "eval_f1_macro": 0.8172961371074986, + "eval_loss": 0.2681705951690674, + "eval_pr_auc": 0.6814913977659953, + "eval_precision": 0.7066756574511126, + "eval_precision_macro": 0.8232516115060616, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6759109964527572, + "eval_recall_macro": 0.8116984812649493, + "eval_runtime": 0.271, + "eval_samples_per_second": 601.566, + "eval_steps_per_second": 3.691, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6526 + }, + { + "epoch": 252.0, + "eval_accuracy": 0.9045657921496848, + "eval_auc": 0.9300244614682288, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6893926857521099, + "eval_f1_macro": 0.8165072340543806, + "eval_loss": 0.2683667540550232, + "eval_pr_auc": 0.6820860933741758, + "eval_precision": 0.7080217539089055, + "eval_precision_macro": 0.8235792136757251, + "eval_pred_class_0": 16726, + "eval_pred_class_1": 2942, + "eval_predicted_binding_ratio": 0.14958307911328045, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8099343685039828, + "eval_runtime": 0.2316, + "eval_samples_per_second": 703.782, + "eval_steps_per_second": 4.318, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6552 + }, + { + "epoch": 253.0, + "eval_accuracy": 0.9046166361602603, + "eval_auc": 0.9299541441632636, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6910408432147562, + "eval_f1_macro": 0.8173217684087248, + "eval_loss": 0.26824095845222473, + "eval_pr_auc": 0.6816317339832768, + "eval_precision": 0.7061595422416694, + "eval_precision_macro": 0.8230444354317887, + "eval_pred_class_0": 16697, + "eval_pred_class_1": 2971, + "eval_predicted_binding_ratio": 0.15105755541997154, + "eval_recall": 0.6765559496936472, + "eval_recall_macro": 0.8119304164475962, + "eval_runtime": 0.2652, + "eval_samples_per_second": 614.632, + "eval_steps_per_second": 3.771, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6578 + }, + { + "epoch": 254.0, + "eval_accuracy": 0.9044641041285336, + "eval_auc": 0.9299257349988078, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6913093477903729, + "eval_f1_macro": 0.8173981849782266, + "eval_loss": 0.2680823504924774, + "eval_pr_auc": 0.6815799614400636, + "eval_precision": 0.7046215673141326, + "eval_precision_macro": 0.8224282755645115, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6784908094163173, + "eval_recall_macro": 0.8126262219955371, + "eval_runtime": 0.1721, + "eval_samples_per_second": 947.131, + "eval_steps_per_second": 5.811, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6604 + }, + { + "epoch": 255.0, + "eval_accuracy": 0.9045657921496848, + "eval_auc": 0.9300487927156216, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6902129064202014, + "eval_f1_macro": 0.8169037907104764, + "eval_loss": 0.2682023346424103, + "eval_pr_auc": 0.6819970017971004, + "eval_precision": 0.7068965517241379, + "eval_precision_macro": 0.8232268515652408, + "eval_pred_class_0": 16710, + "eval_pred_class_1": 2958, + "eval_predicted_binding_ratio": 0.1503965832824893, + "eval_recall": 0.6742986133505321, + "eval_recall_macro": 0.8109828311516347, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.732, + "eval_steps_per_second": 3.925, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6630 + }, + { + "epoch": 256.0, + "eval_accuracy": 0.9046166361602603, + "eval_auc": 0.9300701263533355, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6908371786420567, + "eval_f1_macro": 0.8172233266061071, + "eval_loss": 0.26801708340644836, + "eval_pr_auc": 0.6822973012887885, + "eval_precision": 0.7064374789349511, + "eval_precision_macro": 0.8231307207859595, + "eval_pred_class_0": 16701, + "eval_pred_class_1": 2967, + "eval_predicted_binding_ratio": 0.15085417937766932, + "eval_recall": 0.6759109964527572, + "eval_recall_macro": 0.8116683007856833, + "eval_runtime": 0.2302, + "eval_samples_per_second": 708.13, + "eval_steps_per_second": 4.344, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6656 + }, + { + "epoch": 257.0, + "eval_accuracy": 0.9048708562131381, + "eval_auc": 0.9301923470752391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6911012052171042, + "eval_f1_macro": 0.8174397819709127, + "eval_loss": 0.26805296540260315, + "eval_pr_auc": 0.6828138126269635, + "eval_precision": 0.7080514208389715, + "eval_precision_macro": 0.8238677400987582, + "eval_pred_class_0": 16712, + "eval_pred_class_1": 2956, + "eval_predicted_binding_ratio": 0.1502948952613382, + "eval_recall": 0.6749435665914221, + "eval_recall_macro": 0.8114260296891438, + "eval_runtime": 0.2566, + "eval_samples_per_second": 635.189, + "eval_steps_per_second": 3.897, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6682 + }, + { + "epoch": 258.0, + "eval_accuracy": 0.9047691681919869, + "eval_auc": 0.9301935928351055, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6912806988626998, + "eval_f1_macro": 0.8174910212279173, + "eval_loss": 0.26811105012893677, + "eval_pr_auc": 0.6827059930276215, + "eval_precision": 0.7070128118678355, + "eval_precision_macro": 0.8234501252489699, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6762334730732021, + "eval_recall_macro": 0.8118899000544377, + "eval_runtime": 0.2455, + "eval_samples_per_second": 663.913, + "eval_steps_per_second": 4.073, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6708 + }, + { + "epoch": 259.0, + "eval_accuracy": 0.9049217002237137, + "eval_auc": 0.9302954726341887, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6913172664245626, + "eval_f1_macro": 0.817561992789179, + "eval_loss": 0.2680983245372772, + "eval_pr_auc": 0.6832649652047296, + "eval_precision": 0.7081501521812648, + "eval_precision_macro": 0.8239452215038332, + "eval_pred_class_0": 16711, + "eval_pred_class_1": 2957, + "eval_predicted_binding_ratio": 0.15034573927191378, + "eval_recall": 0.6752660432118671, + "eval_recall_macro": 0.8115872679993663, + "eval_runtime": 0.2456, + "eval_samples_per_second": 663.63, + "eval_steps_per_second": 4.071, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6734 + }, + { + "epoch": 260.0, + "eval_accuracy": 0.9048200122025626, + "eval_auc": 0.9302687082620565, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6920039486673247, + "eval_f1_macro": 0.8178583697873878, + "eval_loss": 0.2680213451385498, + "eval_pr_auc": 0.6832868902825516, + "eval_precision": 0.7064158548874706, + "eval_precision_macro": 0.8233115761166728, + "eval_pred_class_0": 16691, + "eval_pred_class_1": 2977, + "eval_predicted_binding_ratio": 0.15136261948342486, + "eval_recall": 0.6781683327958723, + "eval_recall_macro": 0.8127064275194428, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.797, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6760 + }, + { + "epoch": 261.0, + "eval_accuracy": 0.9049725442342892, + "eval_auc": 0.9303376240871719, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.691635043722158, + "eval_f1_macro": 0.817733374603378, + "eval_loss": 0.2681059241294861, + "eval_pr_auc": 0.6834436649713198, + "eval_precision": 0.7081081081081081, + "eval_precision_macro": 0.8239786410782342, + "eval_pred_class_0": 16708, + "eval_pred_class_1": 2960, + "eval_predicted_binding_ratio": 0.15049827130364044, + "eval_recall": 0.6759109964527572, + "eval_recall_macro": 0.8118795641405453, + "eval_runtime": 0.1861, + "eval_samples_per_second": 875.956, + "eval_steps_per_second": 5.374, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6786 + }, + { + "epoch": 262.0, + "eval_accuracy": 0.9049725442342892, + "eval_auc": 0.9303520963131211, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6920415224913494, + "eval_f1_macro": 0.8179298603528982, + "eval_loss": 0.2680259048938751, + "eval_pr_auc": 0.683712511498021, + "eval_precision": 0.7075471698113207, + "eval_precision_macro": 0.8238035250254208, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.6772009029345373, + "eval_recall_macro": 0.8124037954643712, + "eval_runtime": 0.2544, + "eval_samples_per_second": 640.771, + "eval_steps_per_second": 3.931, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6812 + }, + { + "epoch": 263.0, + "eval_accuracy": 0.9051759202765914, + "eval_auc": 0.9303956005834594, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.692700609655627, + "eval_f1_macro": 0.8183195235731167, + "eval_loss": 0.2679544985294342, + "eval_pr_auc": 0.6840791766505604, + "eval_precision": 0.7082210242587601, + "eval_precision_macro": 0.8242003324886615, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.6778458561754273, + "eval_recall_macro": 0.8127866330433483, + "eval_runtime": 0.224, + "eval_samples_per_second": 727.59, + "eval_steps_per_second": 4.464, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6838 + }, + { + "epoch": 264.0, + "eval_accuracy": 0.905328452308318, + "eval_auc": 0.9304257713302264, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6933465085638999, + "eval_f1_macro": 0.8186850387937344, + "eval_loss": 0.26790833473205566, + "eval_pr_auc": 0.6843453300290927, + "eval_precision": 0.70851565129586, + "eval_precision_macro": 0.8244321084532245, + "eval_pred_class_0": 16697, + "eval_pred_class_1": 2971, + "eval_predicted_binding_ratio": 0.15105755541997154, + "eval_recall": 0.6788132860367624, + "eval_recall_macro": 0.8132703479740159, + "eval_runtime": 0.2587, + "eval_samples_per_second": 630.122, + "eval_steps_per_second": 3.866, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6864 + }, + { + "epoch": 265.0, + "eval_accuracy": 0.905328452308318, + "eval_auc": 0.9304709495903853, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6934474810668423, + "eval_f1_macro": 0.8187338421508825, + "eval_loss": 0.26806485652923584, + "eval_pr_auc": 0.684236710788699, + "eval_precision": 0.7083753784056509, + "eval_precision_macro": 0.8243883480827296, + "eval_pred_class_0": 16695, + "eval_pred_class_1": 2973, + "eval_predicted_binding_ratio": 0.15115924344112264, + "eval_recall": 0.6791357626572073, + "eval_recall_macro": 0.8134014058049723, + "eval_runtime": 0.2519, + "eval_samples_per_second": 646.986, + "eval_steps_per_second": 3.969, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6890 + }, + { + "epoch": 266.0, + "eval_accuracy": 0.9054301403294692, + "eval_auc": 0.9305072226139984, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6937767533750412, + "eval_f1_macro": 0.8189285426426647, + "eval_loss": 0.2682030200958252, + "eval_pr_auc": 0.6841419811140891, + "eval_precision": 0.708711738984191, + "eval_precision_macro": 0.8245864774585525, + "eval_pred_class_0": 16695, + "eval_pred_class_1": 2973, + "eval_predicted_binding_ratio": 0.15115924344112264, + "eval_recall": 0.6794582392776524, + "eval_recall_macro": 0.8135928245944608, + "eval_runtime": 0.1928, + "eval_samples_per_second": 845.449, + "eval_steps_per_second": 5.187, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6916 + }, + { + "epoch": 267.0, + "eval_accuracy": 0.9054809843400448, + "eval_auc": 0.9304825799266392, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6942937016938003, + "eval_f1_macro": 0.8191961667392471, + "eval_loss": 0.2681148052215576, + "eval_pr_auc": 0.6840537331785722, + "eval_precision": 0.7083892617449664, + "eval_precision_macro": 0.824532598274209, + "eval_pred_class_0": 16688, + "eval_pred_class_1": 2980, + "eval_predicted_binding_ratio": 0.15151515151515152, + "eval_recall": 0.6807481457594324, + "eval_recall_macro": 0.8141472363975528, + "eval_runtime": 0.2454, + "eval_samples_per_second": 664.157, + "eval_steps_per_second": 4.075, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6942 + }, + { + "epoch": 268.0, + "eval_accuracy": 0.9057860484034981, + "eval_auc": 0.930527787384295, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6951801283105774, + "eval_f1_macro": 0.8197312675109731, + "eval_loss": 0.2679014503955841, + "eval_pr_auc": 0.6848281199608002, + "eval_precision": 0.7095366017461383, + "eval_precision_macro": 0.8251697388598875, + "eval_pred_class_0": 16690, + "eval_pred_class_1": 2978, + "eval_predicted_binding_ratio": 0.1514134634940004, + "eval_recall": 0.6813930990003225, + "eval_recall_macro": 0.8145904349350619, + "eval_runtime": 0.2174, + "eval_samples_per_second": 749.726, + "eval_steps_per_second": 4.6, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6968 + }, + { + "epoch": 269.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.9305513205667733, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6949375410913873, + "eval_f1_macro": 0.8195606747920547, + "eval_loss": 0.26782992482185364, + "eval_pr_auc": 0.6850252128050689, + "eval_precision": 0.7086825343613812, + "eval_precision_macro": 0.8247638023919581, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6817155756207675, + "eval_recall_macro": 0.8146309513282204, + "eval_runtime": 0.2304, + "eval_samples_per_second": 707.619, + "eval_steps_per_second": 4.341, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6994 + }, + { + "epoch": 269.2307692307692, + "grad_norm": 17604.1328125, + "learning_rate": 5.202671165416819e-07, + "loss": 0.2132, + "step": 7000 + }, + { + "epoch": 270.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.9305936180072407, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6950377916529741, + "eval_f1_macro": 0.8196091213903969, + "eval_loss": 0.2680298984050751, + "eval_pr_auc": 0.6846741481205671, + "eval_precision": 0.7085427135678392, + "eval_precision_macro": 0.8247203168031008, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6820380522412125, + "eval_recall_macro": 0.814762009159177, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.148, + "eval_steps_per_second": 3.829, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7020 + }, + { + "epoch": 271.0, + "eval_accuracy": 0.9054809843400448, + "eval_auc": 0.9306496966662317, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6938909929194796, + "eval_f1_macro": 0.8190015347004277, + "eval_loss": 0.2681294083595276, + "eval_pr_auc": 0.6847333752342389, + "eval_precision": 0.7089502018842531, + "eval_precision_macro": 0.8247074919339809, + "eval_pred_class_0": 16696, + "eval_pred_class_1": 2972, + "eval_predicted_binding_ratio": 0.15110839943054707, + "eval_recall": 0.6794582392776524, + "eval_recall_macro": 0.8136230050737269, + "eval_runtime": 0.1821, + "eval_samples_per_second": 895.335, + "eval_steps_per_second": 5.493, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7046 + }, + { + "epoch": 272.0, + "eval_accuracy": 0.9053792963188937, + "eval_auc": 0.9306815900653141, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6932586121641668, + "eval_f1_macro": 0.8186603259504293, + "eval_loss": 0.26823949813842773, + "eval_pr_auc": 0.6847385438827486, + "eval_precision": 0.7090357383681726, + "eval_precision_macro": 0.8246412077064189, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6781683327958723, + "eval_recall_macro": 0.8130384127913689, + "eval_runtime": 0.1793, + "eval_samples_per_second": 909.129, + "eval_steps_per_second": 5.577, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7072 + }, + { + "epoch": 273.0, + "eval_accuracy": 0.905328452308318, + "eval_auc": 0.9307212597310633, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6931443638760711, + "eval_f1_macro": 0.8185873316314347, + "eval_loss": 0.2681174576282501, + "eval_pr_auc": 0.6852124592316542, + "eval_precision": 0.7087967644084934, + "eval_precision_macro": 0.8245199318120546, + "eval_pred_class_0": 16701, + "eval_pred_class_1": 2967, + "eval_predicted_binding_ratio": 0.15085417937766932, + "eval_recall": 0.6781683327958723, + "eval_recall_macro": 0.8130082323121028, + "eval_runtime": 0.1693, + "eval_samples_per_second": 962.904, + "eval_steps_per_second": 5.907, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7098 + }, + { + "epoch": 274.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.9307878592214269, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6947368421052632, + "eval_f1_macro": 0.8194636820581644, + "eval_loss": 0.2679351270198822, + "eval_pr_auc": 0.685659065605018, + "eval_precision": 0.7089627391742196, + "eval_precision_macro": 0.8248510741829513, + "eval_pred_class_0": 16689, + "eval_pred_class_1": 2979, + "eval_predicted_binding_ratio": 0.15146430750457596, + "eval_recall": 0.6810706223798775, + "eval_recall_macro": 0.8143688356663075, + "eval_runtime": 0.2474, + "eval_samples_per_second": 658.933, + "eval_steps_per_second": 4.043, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7124 + }, + { + "epoch": 275.0, + "eval_accuracy": 0.905684360382347, + "eval_auc": 0.9308354511413273, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6945496459739832, + "eval_f1_macro": 0.8193909880953703, + "eval_loss": 0.2679973542690277, + "eval_pr_auc": 0.6858486839853987, + "eval_precision": 0.7096231493943472, + "eval_precision_macro": 0.8251038602745574, + "eval_pred_class_0": 16696, + "eval_pred_class_1": 2972, + "eval_predicted_binding_ratio": 0.15110839943054707, + "eval_recall": 0.6801031925185425, + "eval_recall_macro": 0.8140058426527039, + "eval_runtime": 0.1976, + "eval_samples_per_second": 825.097, + "eval_steps_per_second": 5.062, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7150 + }, + { + "epoch": 276.0, + "eval_accuracy": 0.9055826723611958, + "eval_auc": 0.9308506727696961, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.694320987654321, + "eval_f1_macro": 0.819244917025501, + "eval_loss": 0.26807495951652527, + "eval_pr_auc": 0.6856600891550617, + "eval_precision": 0.7091459314055144, + "eval_precision_macro": 0.8248616921913159, + "eval_pred_class_0": 16694, + "eval_pred_class_1": 2974, + "eval_predicted_binding_ratio": 0.1512100874516982, + "eval_recall": 0.6801031925185425, + "eval_recall_macro": 0.8139454816941719, + "eval_runtime": 0.2211, + "eval_samples_per_second": 737.174, + "eval_steps_per_second": 4.523, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7176 + }, + { + "epoch": 277.0, + "eval_accuracy": 0.9057352043929225, + "eval_auc": 0.9308575828439557, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6946640316205533, + "eval_f1_macro": 0.8194640504423113, + "eval_loss": 0.26810142397880554, + "eval_pr_auc": 0.6855507532370865, + "eval_precision": 0.709861999326826, + "eval_precision_macro": 0.8252250644654733, + "eval_pred_class_0": 16697, + "eval_pred_class_1": 2971, + "eval_predicted_binding_ratio": 0.15105755541997154, + "eval_recall": 0.6801031925185425, + "eval_recall_macro": 0.81403602313197, + "eval_runtime": 0.2072, + "eval_samples_per_second": 786.583, + "eval_steps_per_second": 4.826, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7202 + }, + { + "epoch": 278.0, + "eval_accuracy": 0.905684360382347, + "eval_auc": 0.9309145374278527, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6941467436108821, + "eval_f1_macro": 0.8191962415719043, + "eval_loss": 0.26818612217903137, + "eval_pr_auc": 0.6856281742604067, + "eval_precision": 0.7101889338731444, + "eval_precision_macro": 0.8252812485457677, + "eval_pred_class_0": 16704, + "eval_pred_class_1": 2964, + "eval_predicted_binding_ratio": 0.15070164734594266, + "eval_recall": 0.6788132860367624, + "eval_recall_macro": 0.813481611328878, + "eval_runtime": 0.1872, + "eval_samples_per_second": 870.561, + "eval_steps_per_second": 5.341, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7228 + }, + { + "epoch": 279.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.9309855846702396, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.69433465085639, + "eval_f1_macro": 0.8192692975301671, + "eval_loss": 0.2680346667766571, + "eval_pr_auc": 0.686232632159634, + "eval_precision": 0.7095254123190845, + "eval_precision_macro": 0.825026825462411, + "eval_pred_class_0": 16697, + "eval_pred_class_1": 2971, + "eval_predicted_binding_ratio": 0.15105755541997154, + "eval_recall": 0.6797807158980974, + "eval_recall_macro": 0.8138446043424814, + "eval_runtime": 0.241, + "eval_samples_per_second": 676.212, + "eval_steps_per_second": 4.149, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7254 + }, + { + "epoch": 280.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.930922634866985, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6957377049180328, + "eval_f1_macro": 0.8199473215888755, + "eval_loss": 0.26806843280792236, + "eval_pr_auc": 0.6857403383581059, + "eval_precision": 0.70756918972991, + "eval_precision_macro": 0.8244187060893835, + "eval_pred_class_0": 16669, + "eval_pred_class_1": 2999, + "eval_predicted_binding_ratio": 0.15248118771608704, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8156794139758723, + "eval_runtime": 0.1979, + "eval_samples_per_second": 823.751, + "eval_steps_per_second": 5.054, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7280 + }, + { + "epoch": 281.0, + "eval_accuracy": 0.9055826723611958, + "eval_auc": 0.9309180411274773, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6959227116423776, + "eval_f1_macro": 0.8200188959217034, + "eval_loss": 0.2680058181285858, + "eval_pr_auc": 0.6857513786045211, + "eval_precision": 0.7069194943446441, + "eval_precision_macro": 0.8241715464761271, + "eval_pred_class_0": 16662, + "eval_pred_class_1": 3006, + "eval_predicted_binding_ratio": 0.15283709579011592, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.8160424069894758, + "eval_runtime": 0.2526, + "eval_samples_per_second": 645.248, + "eval_steps_per_second": 3.959, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7306 + }, + { + "epoch": 282.0, + "eval_accuracy": 0.9055318283506203, + "eval_auc": 0.930996368279084, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6950098489822718, + "eval_f1_macro": 0.8195600321797414, + "eval_loss": 0.2679016888141632, + "eval_pr_auc": 0.6864832067617813, + "eval_precision": 0.7077900367769977, + "eval_precision_macro": 0.824393309448042, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6826830054821026, + "eval_recall_macro": 0.814963763862558, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.664, + "eval_steps_per_second": 3.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7332 + }, + { + "epoch": 283.0, + "eval_accuracy": 0.905684360382347, + "eval_auc": 0.9310520771031147, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.695152013147083, + "eval_f1_macro": 0.8196821086456596, + "eval_loss": 0.2679460644721985, + "eval_pr_auc": 0.6867185372000254, + "eval_precision": 0.7087801608579088, + "eval_precision_macro": 0.8248408116684653, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6820380522412125, + "eval_recall_macro": 0.8147921896384429, + "eval_runtime": 0.2003, + "eval_samples_per_second": 813.949, + "eval_steps_per_second": 4.994, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7358 + }, + { + "epoch": 284.0, + "eval_accuracy": 0.905684360382347, + "eval_auc": 0.931065877786636, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.695152013147083, + "eval_f1_macro": 0.8196821086456596, + "eval_loss": 0.26795604825019836, + "eval_pr_auc": 0.6868264245068394, + "eval_precision": 0.7087801608579088, + "eval_precision_macro": 0.8248408116684653, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6820380522412125, + "eval_recall_macro": 0.8147921896384429, + "eval_runtime": 0.2632, + "eval_samples_per_second": 619.283, + "eval_steps_per_second": 3.799, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7384 + }, + { + "epoch": 285.0, + "eval_accuracy": 0.9057352043929225, + "eval_auc": 0.9310428312291054, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696165191740413, + "eval_f1_macro": 0.8201894743681304, + "eval_loss": 0.2679717540740967, + "eval_pr_auc": 0.6865300625458848, + "eval_precision": 0.7077640786404532, + "eval_precision_macro": 0.8245726255085029, + "eval_pred_class_0": 16667, + "eval_pred_class_1": 3001, + "eval_predicted_binding_ratio": 0.15258287573723817, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8160018905963173, + "eval_runtime": 0.2365, + "eval_samples_per_second": 689.133, + "eval_steps_per_second": 4.228, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7410 + }, + { + "epoch": 286.0, + "eval_accuracy": 0.9054301403294692, + "eval_auc": 0.9311142093764568, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6946815495732108, + "eval_f1_macro": 0.8193658018591599, + "eval_loss": 0.2679450213909149, + "eval_pr_auc": 0.6869273130451355, + "eval_precision": 0.7074557004346372, + "eval_precision_macro": 0.8241961598653369, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6823605288616575, + "eval_recall_macro": 0.8147723450730694, + "eval_runtime": 0.1793, + "eval_samples_per_second": 908.844, + "eval_steps_per_second": 5.576, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7436 + }, + { + "epoch": 287.0, + "eval_accuracy": 0.9058368924140736, + "eval_auc": 0.9311615190538873, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.695695037791653, + "eval_f1_macro": 0.8199978948356761, + "eval_loss": 0.2679760158061981, + "eval_pr_auc": 0.6872164392300586, + "eval_precision": 0.7092127303182579, + "eval_precision_macro": 0.8251152664358777, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6826830054821026, + "eval_recall_macro": 0.8151448467381539, + "eval_runtime": 0.1776, + "eval_samples_per_second": 917.947, + "eval_steps_per_second": 5.632, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7462 + }, + { + "epoch": 288.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9311948528628156, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6959237343852729, + "eval_f1_macro": 0.8201439915761322, + "eval_loss": 0.26804664731025696, + "eval_pr_auc": 0.6873402512916988, + "eval_precision": 0.7096882333221589, + "eval_precision_macro": 0.8253565529811274, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6826830054821026, + "eval_recall_macro": 0.815205207696686, + "eval_runtime": 0.2679, + "eval_samples_per_second": 608.503, + "eval_steps_per_second": 3.733, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7488 + }, + { + "epoch": 288.46153846153845, + "grad_norm": 18250.5078125, + "learning_rate": 4.5321317063898914e-07, + "loss": 0.2101, + "step": 7500 + }, + { + "epoch": 289.0, + "eval_accuracy": 0.9058368924140736, + "eval_auc": 0.9312627857055362, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6958949096880132, + "eval_f1_macro": 0.8200944800500465, + "eval_loss": 0.26791396737098694, + "eval_pr_auc": 0.6879250403674073, + "eval_precision": 0.7089327534292406, + "eval_precision_macro": 0.8250281609942534, + "eval_pred_class_0": 16679, + "eval_pred_class_1": 2989, + "eval_predicted_binding_ratio": 0.1519727476103315, + "eval_recall": 0.6833279587229926, + "eval_recall_macro": 0.8154069624000669, + "eval_runtime": 0.2277, + "eval_samples_per_second": 715.925, + "eval_steps_per_second": 4.392, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7514 + }, + { + "epoch": 290.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9312241087546806, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696522309711286, + "eval_f1_macro": 0.8204332366847645, + "eval_loss": 0.26787513494491577, + "eval_pr_auc": 0.6878638814996979, + "eval_precision": 0.7088480801335559, + "eval_precision_macro": 0.8250951850316913, + "eval_pred_class_0": 16673, + "eval_pred_class_1": 2995, + "eval_predicted_binding_ratio": 0.15227781167378482, + "eval_recall": 0.6846178652047726, + "eval_recall_macro": 0.815991554682425, + "eval_runtime": 0.2212, + "eval_samples_per_second": 736.836, + "eval_steps_per_second": 4.52, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7540 + }, + { + "epoch": 291.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.9312868249779602, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6975369458128079, + "eval_f1_macro": 0.8210658921448086, + "eval_loss": 0.26807519793510437, + "eval_pr_auc": 0.6876156626538744, + "eval_precision": 0.7106055536968886, + "eval_precision_macro": 0.8260144502101566, + "eval_pred_class_0": 16679, + "eval_pred_class_1": 2989, + "eval_predicted_binding_ratio": 0.1519727476103315, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8163640563475095, + "eval_runtime": 0.2581, + "eval_samples_per_second": 631.522, + "eval_steps_per_second": 3.874, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7566 + }, + { + "epoch": 292.0, + "eval_accuracy": 0.9061928004881025, + "eval_auc": 0.9313246454689077, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6971935007385525, + "eval_f1_macro": 0.8208465473190101, + "eval_loss": 0.26810526847839355, + "eval_pr_auc": 0.6877564948921628, + "eval_precision": 0.7098930481283422, + "eval_precision_macro": 0.8256529284776994, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8162735149097116, + "eval_runtime": 0.206, + "eval_samples_per_second": 791.322, + "eval_steps_per_second": 4.855, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7592 + }, + { + "epoch": 293.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9313187962370344, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6969647251845775, + "eval_f1_macro": 0.8207004065741184, + "eval_loss": 0.26795074343681335, + "eval_pr_auc": 0.6879453119558532, + "eval_precision": 0.7094188376753507, + "eval_precision_macro": 0.8254123095657551, + "eval_pred_class_0": 16674, + "eval_pred_class_1": 2994, + "eval_predicted_binding_ratio": 0.15222696766320928, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8162131539511794, + "eval_runtime": 0.2264, + "eval_samples_per_second": 719.954, + "eval_steps_per_second": 4.417, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7618 + }, + { + "epoch": 294.0, + "eval_accuracy": 0.9061419564775269, + "eval_auc": 0.9313842959550158, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966809069996713, + "eval_f1_macro": 0.820581055003595, + "eval_loss": 0.2678394019603729, + "eval_pr_auc": 0.6884837475836854, + "eval_precision": 0.7102177554438861, + "eval_precision_macro": 0.8257076908850431, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8157191031066195, + "eval_runtime": 0.1788, + "eval_samples_per_second": 911.642, + "eval_steps_per_second": 5.593, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7644 + }, + { + "epoch": 295.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9314417858263554, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696337658071933, + "eval_f1_macro": 0.8203618088536944, + "eval_loss": 0.26788315176963806, + "eval_pr_auc": 0.6885944465147925, + "eval_precision": 0.7095046854082999, + "eval_precision_macro": 0.8253458678840061, + "eval_pred_class_0": 16680, + "eval_pred_class_1": 2988, + "eval_predicted_binding_ratio": 0.15192190359975594, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8156285616688215, + "eval_runtime": 0.2717, + "eval_samples_per_second": 599.832, + "eval_steps_per_second": 3.68, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7670 + }, + { + "epoch": 296.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9314500389854711, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696636587366694, + "eval_f1_macro": 0.8205062543343502, + "eval_loss": 0.2678987681865692, + "eval_pr_auc": 0.6885305487751676, + "eval_precision": 0.7090848363393454, + "eval_precision_macro": 0.8252153220919469, + "eval_pred_class_0": 16674, + "eval_pred_class_1": 2994, + "eval_predicted_binding_ratio": 0.15222696766320928, + "eval_recall": 0.6846178652047726, + "eval_recall_macro": 0.816021735161691, + "eval_runtime": 0.2551, + "eval_samples_per_second": 639.002, + "eval_steps_per_second": 3.92, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7696 + }, + { + "epoch": 297.0, + "eval_accuracy": 0.9061928004881025, + "eval_auc": 0.931550575699698, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965959546127282, + "eval_f1_macro": 0.8205577878611678, + "eval_loss": 0.2679198086261749, + "eval_pr_auc": 0.6889351423284887, + "eval_precision": 0.710738255033557, + "eval_precision_macro": 0.8259168264621285, + "eval_pred_class_0": 16688, + "eval_pred_class_1": 2980, + "eval_predicted_binding_ratio": 0.15151515151515152, + "eval_recall": 0.6830054821025475, + "eval_recall_macro": 0.8154871679239726, + "eval_runtime": 0.2409, + "eval_samples_per_second": 676.584, + "eval_steps_per_second": 4.151, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7722 + }, + { + "epoch": 298.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9316071409836368, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6964256300444738, + "eval_f1_macro": 0.82051102635547, + "eval_loss": 0.2680239677429199, + "eval_pr_auc": 0.6888956424322248, + "eval_precision": 0.7117845117845117, + "eval_precision_macro": 0.8263378182350514, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.6817155756207675, + "eval_recall_macro": 0.8150232975586785, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.263, + "eval_steps_per_second": 3.842, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7748 + }, + { + "epoch": 299.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9315966104197652, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6972235912600624, + "eval_f1_macro": 0.8208966763783243, + "eval_loss": 0.26803261041641235, + "eval_pr_auc": 0.6887732742755047, + "eval_precision": 0.7106496985934361, + "eval_precision_macro": 0.8259818448607991, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8160717602063305, + "eval_runtime": 0.2484, + "eval_samples_per_second": 656.12, + "eval_steps_per_second": 4.025, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7774 + }, + { + "epoch": 300.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9315960653998236, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966218432272876, + "eval_f1_macro": 0.8204813289787078, + "eval_loss": 0.26792290806770325, + "eval_pr_auc": 0.6890570542847262, + "eval_precision": 0.7087087087087087, + "eval_precision_macro": 0.8250519729735134, + "eval_pred_class_0": 16671, + "eval_pred_class_1": 2997, + "eval_predicted_binding_ratio": 0.15237949969493594, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8161226125133815, + "eval_runtime": 0.2626, + "eval_samples_per_second": 620.694, + "eval_steps_per_second": 3.808, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7800 + }, + { + "epoch": 301.0, + "eval_accuracy": 0.9061928004881025, + "eval_auc": 0.9316357350655727, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966957093539372, + "eval_f1_macro": 0.8206059967994839, + "eval_loss": 0.26809555292129517, + "eval_pr_auc": 0.6889378655811479, + "eval_precision": 0.710596914822267, + "eval_precision_macro": 0.8258725914156881, + "eval_pred_class_0": 16686, + "eval_pred_class_1": 2982, + "eval_predicted_binding_ratio": 0.15161683953630262, + "eval_recall": 0.6833279587229926, + "eval_recall_macro": 0.815618225754929, + "eval_runtime": 0.2178, + "eval_samples_per_second": 748.303, + "eval_steps_per_second": 4.591, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7826 + }, + { + "epoch": 302.0, + "eval_accuracy": 0.9061419564775269, + "eval_auc": 0.9317000084886855, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965811965811965, + "eval_f1_macro": 0.8205328694321837, + "eval_loss": 0.26798245310783386, + "eval_pr_auc": 0.6894273728032447, + "eval_precision": 0.7103586992960107, + "eval_precision_macro": 0.8257517200405735, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6833279587229926, + "eval_recall_macro": 0.815588045275663, + "eval_runtime": 0.2561, + "eval_samples_per_second": 636.477, + "eval_steps_per_second": 3.905, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7852 + }, + { + "epoch": 303.0, + "eval_accuracy": 0.9060402684563759, + "eval_auc": 0.9316820812256066, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966513460275772, + "eval_f1_macro": 0.8205311837826491, + "eval_loss": 0.26780617237091064, + "eval_pr_auc": 0.6897785082602487, + "eval_precision": 0.7094617184887997, + "eval_precision_macro": 0.8253790573615671, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8159208578100005, + "eval_runtime": 0.1902, + "eval_samples_per_second": 856.826, + "eval_steps_per_second": 5.257, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7878 + }, + { + "epoch": 304.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9317363691047894, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6963230466185161, + "eval_f1_macro": 0.8203369534620676, + "eval_loss": 0.26783081889152527, + "eval_pr_auc": 0.690108350201664, + "eval_precision": 0.7091273821464393, + "eval_precision_macro": 0.8251819077788622, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815729439020512, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.745, + "eval_steps_per_second": 3.784, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7904 + }, + { + "epoch": 305.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9318277475374976, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6962670613385956, + "eval_f1_macro": 0.8203632705580364, + "eval_loss": 0.267810195684433, + "eval_pr_auc": 0.6906198234983021, + "eval_precision": 0.7104026845637584, + "eval_precision_macro": 0.82571907957814, + "eval_pred_class_0": 16688, + "eval_pred_class_1": 2980, + "eval_predicted_binding_ratio": 0.15151515151515152, + "eval_recall": 0.6826830054821026, + "eval_recall_macro": 0.8152957491344841, + "eval_runtime": 0.2457, + "eval_samples_per_second": 663.513, + "eval_steps_per_second": 4.071, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7930 + }, + { + "epoch": 306.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9318495094051658, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965664530967636, + "eval_f1_macro": 0.8205079551116469, + "eval_loss": 0.26770758628845215, + "eval_pr_auc": 0.6907229577841941, + "eval_precision": 0.709979906229069, + "eval_precision_macro": 0.8255870038278783, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8156889226273535, + "eval_runtime": 0.2472, + "eval_samples_per_second": 659.459, + "eval_steps_per_second": 4.046, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7956 + }, + { + "epoch": 307.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.9318431735483448, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6973381531383503, + "eval_f1_macro": 0.8209698284488745, + "eval_loss": 0.26784345507621765, + "eval_pr_auc": 0.6903028236900399, + "eval_precision": 0.7108877721943049, + "eval_precision_macro": 0.8261026405178202, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8161019406855965, + "eval_runtime": 0.2339, + "eval_samples_per_second": 696.845, + "eval_steps_per_second": 4.275, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7982 + }, + { + "epoch": 307.6923076923077, + "grad_norm": 18753.48046875, + "learning_rate": 3.8700127731844033e-07, + "loss": 0.2071, + "step": 8000 + }, + { + "epoch": 308.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9318915635331595, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966661192314009, + "eval_f1_macro": 0.8205561173351938, + "eval_loss": 0.2679731547832489, + "eval_pr_auc": 0.6902808443840289, + "eval_precision": 0.7098393574297188, + "eval_precision_macro": 0.8255431799139001, + "eval_pred_class_0": 16680, + "eval_pred_class_1": 2988, + "eval_predicted_binding_ratio": 0.15192190359975594, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.81581998045831, + "eval_runtime": 0.2227, + "eval_samples_per_second": 731.952, + "eval_steps_per_second": 4.491, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8008 + }, + { + "epoch": 309.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9319105808361218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6975217462662071, + "eval_f1_macro": 0.821040751603759, + "eval_loss": 0.2678290605545044, + "eval_pr_auc": 0.6904266123808676, + "eval_precision": 0.7102272727272727, + "eval_precision_macro": 0.8258500239865676, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.8164649336992, + "eval_runtime": 0.2365, + "eval_samples_per_second": 689.086, + "eval_steps_per_second": 4.228, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8034 + }, + { + "epoch": 310.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9319563819762139, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6977201902575036, + "eval_f1_macro": 0.821136637744354, + "eval_loss": 0.2677942216396332, + "eval_pr_auc": 0.6905770539125178, + "eval_precision": 0.7099465954606141, + "eval_precision_macro": 0.8257626451391362, + "eval_pred_class_0": 16672, + "eval_pred_class_1": 2996, + "eval_predicted_binding_ratio": 0.15232865568436038, + "eval_recall": 0.6859077716865527, + "eval_recall_macro": 0.816727049361113, + "eval_runtime": 0.2422, + "eval_samples_per_second": 672.991, + "eval_steps_per_second": 4.129, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8060 + }, + { + "epoch": 311.0, + "eval_accuracy": 0.9064470205409803, + "eval_auc": 0.9320261250637406, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6975673898750822, + "eval_f1_macro": 0.8211161862162611, + "eval_loss": 0.2678627669811249, + "eval_pr_auc": 0.6908896136415948, + "eval_precision": 0.7113643982567884, + "eval_precision_macro": 0.8263444706297427, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8161623016441286, + "eval_runtime": 0.264, + "eval_samples_per_second": 617.505, + "eval_steps_per_second": 3.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8086 + }, + { + "epoch": 312.0, + "eval_accuracy": 0.9060402684563759, + "eval_auc": 0.9320056478859348, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696551724137931, + "eval_f1_macro": 0.8204830448879512, + "eval_loss": 0.2678248882293701, + "eval_pr_auc": 0.6908301522653787, + "eval_precision": 0.7096018735362998, + "eval_precision_macro": 0.8254226766806146, + "eval_pred_class_0": 16679, + "eval_pred_class_1": 2989, + "eval_predicted_binding_ratio": 0.1519727476103315, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815789799979044, + "eval_runtime": 0.2616, + "eval_samples_per_second": 623.086, + "eval_steps_per_second": 3.823, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8112 + }, + { + "epoch": 313.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9319988935316585, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965370096832431, + "eval_f1_macro": 0.8204581387495119, + "eval_loss": 0.2678254544734955, + "eval_pr_auc": 0.6908227024589884, + "eval_precision": 0.7092245989304813, + "eval_precision_macro": 0.8252587374599636, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8158906773307344, + "eval_runtime": 0.2589, + "eval_samples_per_second": 629.545, + "eval_steps_per_second": 3.862, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8138 + }, + { + "epoch": 314.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9320198184044164, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6971634694212165, + "eval_f1_macro": 0.8207964351950081, + "eval_loss": 0.2678711414337158, + "eval_pr_auc": 0.6909501280556708, + "eval_precision": 0.7091394262841895, + "eval_precision_macro": 0.8253255619723288, + "eval_pred_class_0": 16670, + "eval_pred_class_1": 2998, + "eval_predicted_binding_ratio": 0.15243034370551148, + "eval_recall": 0.6855852950661077, + "eval_recall_macro": 0.8164752696130925, + "eval_runtime": 0.2654, + "eval_samples_per_second": 614.201, + "eval_steps_per_second": 3.768, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8164 + }, + { + "epoch": 315.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9320753325784678, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6963230466185161, + "eval_f1_macro": 0.8203369534620676, + "eval_loss": 0.2678229808807373, + "eval_pr_auc": 0.6914105766155315, + "eval_precision": 0.7091273821464393, + "eval_precision_macro": 0.8251819077788622, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815729439020512, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.375, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8190 + }, + { + "epoch": 316.0, + "eval_accuracy": 0.9061928004881025, + "eval_auc": 0.9321298151076297, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6967953985209532, + "eval_f1_macro": 0.8206541727499956, + "eval_loss": 0.26796844601631165, + "eval_pr_auc": 0.6912288183485439, + "eval_precision": 0.710455764075067, + "eval_precision_macro": 0.8258284574391159, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8157492835858855, + "eval_runtime": 0.2237, + "eval_samples_per_second": 728.567, + "eval_steps_per_second": 4.47, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8216 + }, + { + "epoch": 317.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9321722877330785, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6956235603817045, + "eval_f1_macro": 0.8199989231339035, + "eval_loss": 0.2681267261505127, + "eval_pr_auc": 0.6912998271961284, + "eval_precision": 0.7101108498488411, + "eval_precision_macro": 0.8254885924997606, + "eval_pred_class_0": 16691, + "eval_pred_class_1": 2977, + "eval_predicted_binding_ratio": 0.15136261948342486, + "eval_recall": 0.6817155756207675, + "eval_recall_macro": 0.8148120342038165, + "eval_runtime": 0.1877, + "eval_samples_per_second": 868.412, + "eval_steps_per_second": 5.328, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8242 + }, + { + "epoch": 318.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9321751880177678, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965664530967636, + "eval_f1_macro": 0.8205079551116469, + "eval_loss": 0.26805615425109863, + "eval_pr_auc": 0.6913470747613989, + "eval_precision": 0.709979906229069, + "eval_precision_macro": 0.8255870038278783, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8156889226273535, + "eval_runtime": 0.2426, + "eval_samples_per_second": 671.932, + "eval_steps_per_second": 4.122, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8268 + }, + { + "epoch": 319.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9321843949617812, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6961380443714051, + "eval_f1_macro": 0.8202653471082613, + "eval_loss": 0.267932653427124, + "eval_pr_auc": 0.6916487257323465, + "eval_precision": 0.7097855227882037, + "eval_precision_macro": 0.8254333991308556, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6830054821025475, + "eval_recall_macro": 0.8153664460069084, + "eval_runtime": 0.2569, + "eval_samples_per_second": 634.38, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8294 + }, + { + "epoch": 320.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.932188093311385, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6964373666064686, + "eval_f1_macro": 0.8204099902666875, + "eval_loss": 0.2679634094238281, + "eval_pr_auc": 0.6915655068510385, + "eval_precision": 0.7093645484949833, + "eval_precision_macro": 0.8253022526621696, + "eval_pred_class_0": 16678, + "eval_pred_class_1": 2990, + "eval_predicted_binding_ratio": 0.15202359162090706, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815759619499778, + "eval_runtime": 0.2483, + "eval_samples_per_second": 656.448, + "eval_steps_per_second": 4.027, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8320 + }, + { + "epoch": 321.0, + "eval_accuracy": 0.9058877364246491, + "eval_auc": 0.9321906237611137, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965076242006887, + "eval_f1_macro": 0.8204083386821658, + "eval_loss": 0.26790735125541687, + "eval_pr_auc": 0.6917736045731879, + "eval_precision": 0.7084723148765844, + "eval_precision_macro": 0.8249320182661266, + "eval_pred_class_0": 16670, + "eval_pred_class_1": 2998, + "eval_predicted_binding_ratio": 0.15243034370551148, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8160924320341154, + "eval_runtime": 0.2593, + "eval_samples_per_second": 628.581, + "eval_steps_per_second": 3.856, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8346 + }, + { + "epoch": 322.0, + "eval_accuracy": 0.9058368924140736, + "eval_auc": 0.932275423024527, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6955950032873109, + "eval_f1_macro": 0.8199495526481063, + "eval_loss": 0.2680682837963104, + "eval_pr_auc": 0.6919499023976591, + "eval_precision": 0.709353000335233, + "eval_precision_macro": 0.8251589694514043, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6823605288616575, + "eval_recall_macro": 0.8150137889071974, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.703, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8372 + }, + { + "epoch": 323.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9322442887603632, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965370096832431, + "eval_f1_macro": 0.8204581387495119, + "eval_loss": 0.26799651980400085, + "eval_pr_auc": 0.6918309607756195, + "eval_precision": 0.7092245989304813, + "eval_precision_macro": 0.8252587374599636, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8158906773307344, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.368, + "eval_steps_per_second": 3.965, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8398 + }, + { + "epoch": 324.0, + "eval_accuracy": 0.9058368924140736, + "eval_auc": 0.9322356073712934, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6960945191992124, + "eval_f1_macro": 0.8201909332654507, + "eval_loss": 0.2680003046989441, + "eval_pr_auc": 0.6918708088537314, + "eval_precision": 0.7086535248914133, + "eval_precision_macro": 0.8249414550993799, + "eval_pred_class_0": 16675, + "eval_pred_class_1": 2993, + "eval_predicted_binding_ratio": 0.15217612365263372, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.81566907806198, + "eval_runtime": 0.1979, + "eval_samples_per_second": 823.473, + "eval_steps_per_second": 5.052, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8424 + }, + { + "epoch": 325.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9322670238779269, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6961235216819974, + "eval_f1_macro": 0.8202405385118361, + "eval_loss": 0.26794886589050293, + "eval_pr_auc": 0.6921960622354616, + "eval_precision": 0.7094074322062269, + "eval_precision_macro": 0.8252690299332196, + "eval_pred_class_0": 16681, + "eval_pred_class_1": 2987, + "eval_predicted_binding_ratio": 0.1518710595891804, + "eval_recall": 0.6833279587229926, + "eval_recall_macro": 0.815467323358599, + "eval_runtime": 0.2683, + "eval_samples_per_second": 607.5, + "eval_steps_per_second": 3.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8450 + }, + { + "epoch": 326.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9322930096501425, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6963230466185161, + "eval_f1_macro": 0.8203369534620676, + "eval_loss": 0.2680268883705139, + "eval_pr_auc": 0.6921477872574622, + "eval_precision": 0.7091273821464393, + "eval_precision_macro": 0.8251819077788622, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815729439020512, + "eval_runtime": 0.1733, + "eval_samples_per_second": 940.367, + "eval_steps_per_second": 5.769, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8476 + }, + { + "epoch": 326.9230769230769, + "grad_norm": 17241.076171875, + "learning_rate": 3.2282309449959705e-07, + "loss": 0.2047, + "step": 8500 + }, + { + "epoch": 327.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9323445529646195, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6967657199146281, + "eval_f1_macro": 0.820604246632002, + "eval_loss": 0.26804205775260925, + "eval_pr_auc": 0.6924439463998024, + "eval_precision": 0.7096989966555184, + "eval_precision_macro": 0.8254994563562998, + "eval_pred_class_0": 16678, + "eval_pred_class_1": 2990, + "eval_predicted_binding_ratio": 0.15202359162090706, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8159510382892665, + "eval_runtime": 0.1771, + "eval_samples_per_second": 920.47, + "eval_steps_per_second": 5.647, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8502 + }, + { + "epoch": 328.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9323743441439272, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6967657199146281, + "eval_f1_macro": 0.820604246632002, + "eval_loss": 0.26802849769592285, + "eval_pr_auc": 0.6925977669253861, + "eval_precision": 0.7096989966555184, + "eval_precision_macro": 0.8254994563562998, + "eval_pred_class_0": 16678, + "eval_pred_class_1": 2990, + "eval_predicted_binding_ratio": 0.15202359162090706, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8159510382892665, + "eval_runtime": 0.1678, + "eval_samples_per_second": 971.421, + "eval_steps_per_second": 5.96, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8528 + }, + { + "epoch": 329.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.9323637649175607, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6983295119554537, + "eval_f1_macro": 0.8214488366277419, + "eval_loss": 0.2680566608905792, + "eval_pr_auc": 0.6924500045026715, + "eval_precision": 0.7094841930116472, + "eval_precision_macro": 0.825665699698526, + "eval_pred_class_0": 16663, + "eval_pred_class_1": 3005, + "eval_predicted_binding_ratio": 0.15278625177954036, + "eval_recall": 0.6875201547887778, + "eval_recall_macro": 0.8174125189951615, + "eval_runtime": 0.2683, + "eval_samples_per_second": 607.54, + "eval_steps_per_second": 3.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8554 + }, + { + "epoch": 330.0, + "eval_accuracy": 0.906243644498678, + "eval_auc": 0.9324242037360844, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6974072858549393, + "eval_f1_macro": 0.8209676462967013, + "eval_loss": 0.26804018020629883, + "eval_pr_auc": 0.6928556723686659, + "eval_precision": 0.7099899766120948, + "eval_precision_macro": 0.8257296209897056, + "eval_pred_class_0": 16675, + "eval_pred_class_1": 2993, + "eval_predicted_binding_ratio": 0.15217612365263372, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.8164347532199341, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.585, + "eval_steps_per_second": 3.869, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8580 + }, + { + "epoch": 331.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9323838917254041, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6981162981162982, + "eval_f1_macro": 0.8213280175544326, + "eval_loss": 0.2678423821926117, + "eval_pr_auc": 0.6929480254197629, + "eval_precision": 0.7093874833555259, + "eval_precision_macro": 0.8255890849326837, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6871976781683328, + "eval_recall_macro": 0.817251280684939, + "eval_runtime": 0.2474, + "eval_samples_per_second": 658.814, + "eval_steps_per_second": 4.042, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8606 + }, + { + "epoch": 332.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9323530591687079, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6993784756297023, + "eval_f1_macro": 0.8220268454242666, + "eval_loss": 0.26779934763908386, + "eval_pr_auc": 0.6928104051729911, + "eval_precision": 0.7095917690009956, + "eval_precision_macro": 0.8258856473344816, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183195878979646, + "eval_runtime": 0.2656, + "eval_samples_per_second": 613.815, + "eval_steps_per_second": 3.766, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8632 + }, + { + "epoch": 333.0, + "eval_accuracy": 0.9063961765304047, + "eval_auc": 0.932432009200248, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6983450761920367, + "eval_f1_macro": 0.8214741659959972, + "eval_loss": 0.2679860591888428, + "eval_pr_auc": 0.6929996618872077, + "eval_precision": 0.7098600932711525, + "eval_precision_macro": 0.8258288825890143, + "eval_pred_class_0": 16666, + "eval_pred_class_1": 3002, + "eval_predicted_binding_ratio": 0.1526337197478137, + "eval_recall": 0.6871976781683328, + "eval_recall_macro": 0.817311641643471, + "eval_runtime": 0.1802, + "eval_samples_per_second": 904.69, + "eval_steps_per_second": 5.55, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8658 + }, + { + "epoch": 334.0, + "eval_accuracy": 0.9063961765304047, + "eval_auc": 0.9325279911049631, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6975521603417119, + "eval_f1_macro": 0.8210910370116632, + "eval_loss": 0.26808273792266846, + "eval_pr_auc": 0.6933573725062909, + "eval_precision": 0.7109845947756196, + "eval_precision_macro": 0.8261792653772595, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6846178652047726, + "eval_recall_macro": 0.816263178995819, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.721, + "eval_steps_per_second": 3.827, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8684 + }, + { + "epoch": 335.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.932545276023111, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6976362442547603, + "eval_f1_macro": 0.8211138747443938, + "eval_loss": 0.26816996932029724, + "eval_pr_auc": 0.6932853419412803, + "eval_precision": 0.710464727515881, + "eval_precision_macro": 0.8259705061096825, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.816495114178466, + "eval_runtime": 0.254, + "eval_samples_per_second": 641.649, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8710 + }, + { + "epoch": 336.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9325305604846879, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6982772764561116, + "eval_f1_macro": 0.821477015533191, + "eval_loss": 0.2680180072784424, + "eval_pr_auc": 0.6935243427052671, + "eval_precision": 0.7107548430193721, + "eval_precision_macro": 0.8262002594609874, + "eval_pred_class_0": 16674, + "eval_pred_class_1": 2994, + "eval_predicted_binding_ratio": 0.15222696766320928, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8169788291091336, + "eval_runtime": 0.264, + "eval_samples_per_second": 617.515, + "eval_steps_per_second": 3.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8736 + }, + { + "epoch": 337.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.932509878924404, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.698032786885246, + "eval_f1_macro": 0.8213054775682699, + "eval_loss": 0.2678954601287842, + "eval_pr_auc": 0.6935428752602653, + "eval_precision": 0.7099033011003668, + "eval_precision_macro": 0.8257957323787274, + "eval_pred_class_0": 16669, + "eval_pred_class_1": 2999, + "eval_predicted_binding_ratio": 0.15248118771608704, + "eval_recall": 0.6865527249274428, + "eval_recall_macro": 0.817019345502292, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.491, + "eval_steps_per_second": 3.825, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8762 + }, + { + "epoch": 338.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9325608674864403, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6989176779271893, + "eval_f1_macro": 0.8218398486513014, + "eval_loss": 0.26806220412254333, + "eval_pr_auc": 0.6935170525264562, + "eval_precision": 0.7110443777110443, + "eval_precision_macro": 0.8264297528888735, + "eval_pred_class_0": 16671, + "eval_pred_class_1": 2997, + "eval_predicted_binding_ratio": 0.15237949969493594, + "eval_recall": 0.6871976781683328, + "eval_recall_macro": 0.8174625440398011, + "eval_runtime": 0.1767, + "eval_samples_per_second": 922.3, + "eval_steps_per_second": 5.658, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8788 + }, + { + "epoch": 339.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9325807801793065, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6990163934426229, + "eval_f1_macro": 0.8218875444165816, + "eval_loss": 0.26803991198539734, + "eval_pr_auc": 0.6936355510877774, + "eval_precision": 0.7109036345448483, + "eval_precision_macro": 0.8263858865027319, + "eval_pred_class_0": 16669, + "eval_pred_class_1": 2999, + "eval_predicted_binding_ratio": 0.15248118771608704, + "eval_recall": 0.6875201547887778, + "eval_recall_macro": 0.8175936018707576, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.235, + "eval_steps_per_second": 3.891, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8814 + }, + { + "epoch": 340.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9326051308916972, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6983762506150566, + "eval_f1_macro": 0.8215248382050282, + "eval_loss": 0.26792144775390625, + "eval_pr_auc": 0.6939264625162457, + "eval_precision": 0.7106141522029372, + "eval_precision_macro": 0.8261564043164398, + "eval_pred_class_0": 16672, + "eval_pred_class_1": 2996, + "eval_predicted_binding_ratio": 0.15232865568436038, + "eval_recall": 0.6865527249274428, + "eval_recall_macro": 0.8171098869400901, + "eval_runtime": 0.2744, + "eval_samples_per_second": 593.925, + "eval_steps_per_second": 3.644, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8840 + }, + { + "epoch": 341.0, + "eval_accuracy": 0.9063961765304047, + "eval_auc": 0.9325905710732574, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6980482204362801, + "eval_f1_macro": 0.821330737974691, + "eval_loss": 0.26793381571769714, + "eval_pr_auc": 0.6938155556406347, + "eval_precision": 0.7102803738317757, + "eval_precision_macro": 0.825959524727788, + "eval_pred_class_0": 16672, + "eval_pred_class_1": 2996, + "eval_predicted_binding_ratio": 0.15232865568436038, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8169184681506015, + "eval_runtime": 0.2601, + "eval_samples_per_second": 626.704, + "eval_steps_per_second": 3.845, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8866 + }, + { + "epoch": 342.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9325911160931989, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997542997542997, + "eval_f1_macro": 0.8222974802915219, + "eval_loss": 0.26800957322120667, + "eval_pr_auc": 0.6936597261010234, + "eval_precision": 0.711051930758988, + "eval_precision_macro": 0.8265713326382553, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8182083746323816, + "eval_runtime": 0.1766, + "eval_samples_per_second": 923.118, + "eval_steps_per_second": 5.663, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8892 + }, + { + "epoch": 343.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9326288587241547, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6982772764561116, + "eval_f1_macro": 0.821477015533191, + "eval_loss": 0.2680290937423706, + "eval_pr_auc": 0.6938544134850919, + "eval_precision": 0.7107548430193721, + "eval_precision_macro": 0.8262002594609874, + "eval_pred_class_0": 16674, + "eval_pred_class_1": 2994, + "eval_predicted_binding_ratio": 0.15222696766320928, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8169788291091336, + "eval_runtime": 0.2553, + "eval_samples_per_second": 638.353, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8918 + }, + { + "epoch": 344.0, + "eval_accuracy": 0.9064470205409803, + "eval_auc": 0.9326493359019604, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6982617251557888, + "eval_f1_macro": 0.8214517001734176, + "eval_loss": 0.2680736482143402, + "eval_pr_auc": 0.6938105747317047, + "eval_precision": 0.710377043710377, + "eval_precision_macro": 0.8260361014844849, + "eval_pred_class_0": 16671, + "eval_pred_class_1": 2997, + "eval_predicted_binding_ratio": 0.15237949969493594, + "eval_recall": 0.6865527249274428, + "eval_recall_macro": 0.8170797064608241, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.605, + "eval_steps_per_second": 3.869, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8944 + }, + { + "epoch": 345.0, + "eval_accuracy": 0.9063961765304047, + "eval_auc": 0.9326923535473509, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6977507798391068, + "eval_f1_macro": 0.8211870157279457, + "eval_loss": 0.2681424021720886, + "eval_pr_auc": 0.6938928037843046, + "eval_precision": 0.7107023411371237, + "eval_precision_macro": 0.8260910674386901, + "eval_pred_class_0": 16678, + "eval_pred_class_1": 2990, + "eval_predicted_binding_ratio": 0.15202359162090706, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.816525294657732, + "eval_runtime": 0.2453, + "eval_samples_per_second": 664.523, + "eval_steps_per_second": 4.077, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8970 + }, + { + "epoch": 346.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9327347288478101, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6979799638692724, + "eval_f1_macro": 0.821333351261192, + "eval_loss": 0.2682046592235565, + "eval_pr_auc": 0.6939995353512864, + "eval_precision": 0.7111780455153949, + "eval_precision_macro": 0.8263324280334768, + "eval_pred_class_0": 16680, + "eval_pred_class_1": 2988, + "eval_predicted_binding_ratio": 0.15192190359975594, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.8165856556162641, + "eval_runtime": 0.2656, + "eval_samples_per_second": 613.642, + "eval_steps_per_second": 3.765, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8996 + }, + { + "epoch": 346.15384615384613, + "grad_norm": 18666.783203125, + "learning_rate": 2.618336781094791e-07, + "loss": 0.2031, + "step": 9000 + }, + { + "epoch": 347.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9326942611171465, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6983139630054018, + "eval_f1_macro": 0.8214235117341392, + "eval_loss": 0.2681069076061249, + "eval_pr_auc": 0.6938781737586003, + "eval_precision": 0.7091090425531915, + "eval_precision_macro": 0.8255029006283365, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6878426314092229, + "eval_recall_macro": 0.817513396346852, + "eval_runtime": 0.1933, + "eval_samples_per_second": 843.456, + "eval_steps_per_second": 5.175, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9022 + }, + { + "epoch": 348.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9326940859321652, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6992961204779833, + "eval_f1_macro": 0.8220048784892098, + "eval_loss": 0.26795056462287903, + "eval_pr_auc": 0.6940722215736992, + "eval_precision": 0.7101063829787234, + "eval_precision_macro": 0.8260916068555082, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8180876527153176, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.287, + "eval_steps_per_second": 3.842, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9048 + }, + { + "epoch": 349.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9327572303853989, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991310050828005, + "eval_f1_macro": 0.8219607247335354, + "eval_loss": 0.267974317073822, + "eval_pr_auc": 0.6943572217621294, + "eval_precision": 0.7111407605070047, + "eval_precision_macro": 0.8265061930909349, + "eval_pred_class_0": 16670, + "eval_pred_class_1": 2998, + "eval_predicted_binding_ratio": 0.15243034370551148, + "eval_recall": 0.6875201547887778, + "eval_recall_macro": 0.8176237823500236, + "eval_runtime": 0.2638, + "eval_samples_per_second": 617.819, + "eval_steps_per_second": 3.79, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9074 + }, + { + "epoch": 350.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9327859996523161, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6985064828491712, + "eval_f1_macro": 0.8216233644580062, + "eval_loss": 0.2680947780609131, + "eval_pr_auc": 0.6943020020881897, + "eval_precision": 0.7112299465240641, + "eval_precision_macro": 0.8264413105131714, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8170391900676656, + "eval_runtime": 0.2408, + "eval_samples_per_second": 676.86, + "eval_steps_per_second": 4.153, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9100 + }, + { + "epoch": 351.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9328183504788495, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6986211424819435, + "eval_f1_macro": 0.8216965657061384, + "eval_loss": 0.2681223750114441, + "eval_pr_auc": 0.694450730809475, + "eval_precision": 0.7114677365429622, + "eval_precision_macro": 0.8265619548577976, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8170693705469316, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.919, + "eval_steps_per_second": 3.901, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9126 + }, + { + "epoch": 352.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9328099415997506, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991310050828005, + "eval_f1_macro": 0.8219607247335354, + "eval_loss": 0.26799651980400085, + "eval_pr_auc": 0.6945293388795055, + "eval_precision": 0.7111407605070047, + "eval_precision_macro": 0.8265061930909349, + "eval_pred_class_0": 16670, + "eval_pred_class_1": 2998, + "eval_predicted_binding_ratio": 0.15243034370551148, + "eval_recall": 0.6875201547887778, + "eval_recall_macro": 0.8176237823500236, + "eval_runtime": 0.1729, + "eval_samples_per_second": 942.612, + "eval_steps_per_second": 5.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9152 + }, + { + "epoch": 353.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9328013186056745, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000819000819001, + "eval_f1_macro": 0.8224913728389398, + "eval_loss": 0.26793336868286133, + "eval_pr_auc": 0.6945633927009858, + "eval_precision": 0.7113848202396804, + "eval_precision_macro": 0.8267677821793697, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8183997934218701, + "eval_runtime": 0.2553, + "eval_samples_per_second": 638.431, + "eval_steps_per_second": 3.917, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9178 + }, + { + "epoch": 354.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.932783060437631, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26802781224250793, + "eval_pr_auc": 0.694307607889245, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.039, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9204 + }, + { + "epoch": 355.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9327856687473517, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997875469848014, + "eval_f1_macro": 0.8222422697443199, + "eval_loss": 0.2679577171802521, + "eval_pr_auc": 0.6944152708065687, + "eval_precision": 0.7094102054340623, + "eval_precision_macro": 0.8258762738882024, + "eval_pred_class_0": 16650, + "eval_pred_class_1": 3018, + "eval_predicted_binding_ratio": 0.15344722391702256, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8187429418701, + "eval_runtime": 0.1868, + "eval_samples_per_second": 872.379, + "eval_steps_per_second": 5.352, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9230 + }, + { + "epoch": 356.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9328543023299973, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000490918016691, + "eval_f1_macro": 0.8224398957879677, + "eval_loss": 0.26801130175590515, + "eval_pr_auc": 0.6946173571124205, + "eval_precision": 0.7106312292358804, + "eval_precision_macro": 0.8264405996101362, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8186015481252511, + "eval_runtime": 0.1679, + "eval_samples_per_second": 971.014, + "eval_steps_per_second": 5.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9256 + }, + { + "epoch": 357.0, + "eval_accuracy": 0.9069554606467358, + "eval_auc": 0.9328912663610364, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002947920078612, + "eval_f1_macro": 0.8226120363891247, + "eval_loss": 0.26803824305534363, + "eval_pr_auc": 0.6948368351435844, + "eval_precision": 0.7114808652246256, + "eval_precision_macro": 0.8268440754137292, + "eval_pred_class_0": 16663, + "eval_pred_class_1": 3005, + "eval_predicted_binding_ratio": 0.15278625177954036, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8185610317320926, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.12, + "eval_steps_per_second": 3.841, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9282 + }, + { + "epoch": 358.0, + "eval_accuracy": 0.9069554606467358, + "eval_auc": 0.9328766286826047, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003929273084479, + "eval_f1_macro": 0.8226594466805872, + "eval_loss": 0.2680445909500122, + "eval_pr_auc": 0.6947543185413181, + "eval_precision": 0.711340206185567, + "eval_precision_macro": 0.8268002873554328, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8186920895630492, + "eval_runtime": 0.2229, + "eval_samples_per_second": 731.38, + "eval_steps_per_second": 4.487, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9308 + }, + { + "epoch": 359.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9328791980623294, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002617801047121, + "eval_f1_macro": 0.8225604590386311, + "eval_loss": 0.2679673135280609, + "eval_pr_auc": 0.694877850403851, + "eval_precision": 0.7107273331119229, + "eval_precision_macro": 0.8265169354519211, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187627864354736, + "eval_runtime": 0.1766, + "eval_samples_per_second": 922.941, + "eval_steps_per_second": 5.662, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9334 + }, + { + "epoch": 360.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9328811640271188, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700228832951945, + "eval_f1_macro": 0.8225089013937882, + "eval_loss": 0.2680214047431946, + "eval_pr_auc": 0.6947522638420174, + "eval_precision": 0.7099767981438515, + "eval_precision_macro": 0.8261913298268353, + "eval_pred_class_0": 16651, + "eval_pred_class_1": 3017, + "eval_predicted_binding_ratio": 0.15339637990644703, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189645411388546, + "eval_runtime": 0.2235, + "eval_samples_per_second": 729.222, + "eval_steps_per_second": 4.474, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9360 + }, + { + "epoch": 361.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.932900882070006, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000163478829492, + "eval_f1_macro": 0.8223884382480462, + "eval_loss": 0.26796379685401917, + "eval_pr_auc": 0.6949176089109205, + "eval_precision": 0.7098806366047745, + "eval_precision_macro": 0.8261149519800236, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188033028286321, + "eval_runtime": 0.1694, + "eval_samples_per_second": 962.09, + "eval_steps_per_second": 5.902, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9386 + }, + { + "epoch": 362.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9329447561753046, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7005723630417008, + "eval_f1_macro": 0.8227283114988764, + "eval_loss": 0.26795387268066406, + "eval_pr_auc": 0.6950891938157696, + "eval_precision": 0.7106834771068348, + "eval_precision_macro": 0.8265498567232265, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190550825766527, + "eval_runtime": 0.2571, + "eval_samples_per_second": 633.968, + "eval_steps_per_second": 3.889, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9412 + }, + { + "epoch": 363.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9329731556072616, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7001636661211129, + "eval_f1_macro": 0.8225130616165066, + "eval_loss": 0.26798126101493835, + "eval_pr_auc": 0.6952376733836106, + "eval_precision": 0.7108673978065803, + "eval_precision_macro": 0.8265604171937038, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818631728604517, + "eval_runtime": 0.2635, + "eval_samples_per_second": 618.549, + "eval_steps_per_second": 3.795, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9438 + }, + { + "epoch": 364.0, + "eval_accuracy": 0.9070063046573114, + "eval_auc": 0.9329669754704237, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7008994276369583, + "eval_f1_macro": 0.8229219452383643, + "eval_loss": 0.26792433857917786, + "eval_pr_auc": 0.6952423959524336, + "eval_precision": 0.7110152621101526, + "eval_precision_macro": 0.8267457720422265, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.8192465013661412, + "eval_runtime": 0.2599, + "eval_samples_per_second": 627.085, + "eval_steps_per_second": 3.847, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9464 + }, + { + "epoch": 365.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9329833552661686, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003598298985934, + "eval_f1_macro": 0.8226078241660808, + "eval_loss": 0.2679993808269501, + "eval_pr_auc": 0.6952272078548911, + "eval_precision": 0.7105874543644208, + "eval_precision_macro": 0.8264735530603251, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188938442664302, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.317, + "eval_steps_per_second": 3.849, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9490 + }, + { + "epoch": 365.38461538461536, + "grad_norm": 18768.416015625, + "learning_rate": 2.0513069380006943e-07, + "loss": 0.2014, + "step": 9500 + }, + { + "epoch": 366.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9330138179879044, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003763704794632, + "eval_f1_macro": 0.822633632944773, + "eval_loss": 0.26798829436302185, + "eval_pr_auc": 0.6953770669982303, + "eval_precision": 0.7109634551495017, + "eval_precision_macro": 0.8266367281750631, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187929669147396, + "eval_runtime": 0.198, + "eval_samples_per_second": 823.426, + "eval_steps_per_second": 5.052, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9516 + }, + { + "epoch": 367.0, + "eval_accuracy": 0.9069554606467358, + "eval_auc": 0.9330281442263691, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7005890052356021, + "eval_f1_macro": 0.8227541703278901, + "eval_loss": 0.2679848372936249, + "eval_pr_auc": 0.6954512195878219, + "eval_precision": 0.7110594486881435, + "eval_precision_macro": 0.8267130106501293, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189542052249621, + "eval_runtime": 0.265, + "eval_samples_per_second": 615.184, + "eval_steps_per_second": 3.774, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9542 + }, + { + "epoch": 368.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9330502564639999, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.2679852545261383, + "eval_pr_auc": 0.6955032656447977, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.651, + "eval_steps_per_second": 3.887, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9568 + }, + { + "epoch": 369.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9330669768972081, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.2680181562900543, + "eval_pr_auc": 0.6955153529738297, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.2378, + "eval_samples_per_second": 685.421, + "eval_steps_per_second": 4.205, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9594 + }, + { + "epoch": 370.0, + "eval_accuracy": 0.9070063046573114, + "eval_auc": 0.9330907631246593, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7008015704236872, + "eval_f1_macro": 0.8228746737830142, + "eval_loss": 0.26799651980400085, + "eval_pr_auc": 0.6956635287744017, + "eval_precision": 0.7111553784860558, + "eval_precision_macro": 0.8267892646512892, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8191154435351846, + "eval_runtime": 0.1746, + "eval_samples_per_second": 933.529, + "eval_steps_per_second": 5.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9620 + }, + { + "epoch": 371.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9331252940309591, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6999672453324599, + "eval_f1_macro": 0.8224181697622275, + "eval_loss": 0.26801252365112305, + "eval_pr_auc": 0.6958269523241747, + "eval_precision": 0.7111480865224625, + "eval_precision_macro": 0.8266476794611952, + "eval_pred_class_0": 16663, + "eval_pred_class_1": 3005, + "eval_predicted_binding_ratio": 0.15278625177954036, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8183696129426041, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.791, + "eval_steps_per_second": 3.851, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9646 + }, + { + "epoch": 372.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9331262672808548, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7005723630417008, + "eval_f1_macro": 0.8227283114988764, + "eval_loss": 0.26803725957870483, + "eval_pr_auc": 0.6957878915651574, + "eval_precision": 0.7106834771068348, + "eval_precision_macro": 0.8265498567232265, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190550825766527, + "eval_runtime": 0.2654, + "eval_samples_per_second": 614.266, + "eval_steps_per_second": 3.769, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9672 + }, + { + "epoch": 373.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9331573528825239, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699607329842932, + "eval_f1_macro": 0.8221730364601127, + "eval_loss": 0.26807889342308044, + "eval_pr_auc": 0.6959044832644976, + "eval_precision": 0.7100631019594819, + "eval_precision_macro": 0.8261247850555049, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183799488564967, + "eval_runtime": 0.2659, + "eval_samples_per_second": 613.113, + "eval_steps_per_second": 3.761, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9698 + }, + { + "epoch": 374.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9331759224905339, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6996235063021771, + "eval_f1_macro": 0.8221986674075668, + "eval_loss": 0.2681734561920166, + "eval_pr_auc": 0.695850524773773, + "eval_precision": 0.710438829787234, + "eval_precision_macro": 0.8262878422645654, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.818279071504806, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.284, + "eval_steps_per_second": 3.842, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9724 + }, + { + "epoch": 375.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933143698186487, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700228832951945, + "eval_f1_macro": 0.8225089013937882, + "eval_loss": 0.2681812345981598, + "eval_pr_auc": 0.6956723886102156, + "eval_precision": 0.7099767981438515, + "eval_precision_macro": 0.8261913298268353, + "eval_pred_class_0": 16651, + "eval_pred_class_1": 3017, + "eval_predicted_binding_ratio": 0.15339637990644703, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189645411388546, + "eval_runtime": 0.1783, + "eval_samples_per_second": 914.221, + "eval_steps_per_second": 5.609, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9750 + }, + { + "epoch": 376.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9331405935193198, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002123835974514, + "eval_f1_macro": 0.8224831299290885, + "eval_loss": 0.2681320309638977, + "eval_pr_auc": 0.6957066245846253, + "eval_precision": 0.7096026490066225, + "eval_precision_macro": 0.8260290996114323, + "eval_pred_class_0": 16648, + "eval_pred_class_1": 3020, + "eval_predicted_binding_ratio": 0.1535489119381737, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.819065418490545, + "eval_runtime": 0.2617, + "eval_samples_per_second": 622.75, + "eval_steps_per_second": 3.821, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9776 + }, + { + "epoch": 377.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9331510656881976, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7010443864229765, + "eval_f1_macro": 0.8229384784439624, + "eval_loss": 0.2682173550128937, + "eval_pr_auc": 0.6955658216411763, + "eval_precision": 0.709613478691774, + "eval_precision_macro": 0.8261726428372638, + "eval_pred_class_0": 16641, + "eval_pred_class_1": 3027, + "eval_predicted_binding_ratio": 0.15390482001220257, + "eval_recall": 0.6926797807158981, + "eval_recall_macro": 0.8198112490831255, + "eval_runtime": 0.2446, + "eval_samples_per_second": 666.285, + "eval_steps_per_second": 4.088, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9802 + }, + { + "epoch": 378.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9331855381995037, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994767822105952, + "eval_f1_macro": 0.8220743333087894, + "eval_loss": 0.2681441009044647, + "eval_pr_auc": 0.6959256270686769, + "eval_precision": 0.709452736318408, + "eval_precision_macro": 0.8258426835378145, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8184506457289211, + "eval_runtime": 0.2608, + "eval_samples_per_second": 625.04, + "eval_steps_per_second": 3.835, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9828 + }, + { + "epoch": 379.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9331895674540718, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003267973856209, + "eval_f1_macro": 0.8225562214288413, + "eval_loss": 0.26809969544410706, + "eval_pr_auc": 0.6959934558189067, + "eval_precision": 0.7098376946008612, + "eval_precision_macro": 0.8261483505739005, + "eval_pred_class_0": 16649, + "eval_pred_class_1": 3019, + "eval_predicted_binding_ratio": 0.15349806792759813, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.8190955989698111, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.005, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9854 + }, + { + "epoch": 380.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9332175581210724, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994928840176673, + "eval_f1_macro": 0.8220999320609059, + "eval_loss": 0.26810789108276367, + "eval_pr_auc": 0.6960299382483204, + "eval_precision": 0.7098273572377158, + "eval_precision_macro": 0.8260051771779358, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183497683772306, + "eval_runtime": 0.1782, + "eval_samples_per_second": 914.547, + "eval_steps_per_second": 5.611, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9880 + }, + { + "epoch": 381.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332336167443518, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699607329842932, + "eval_f1_macro": 0.8221730364601127, + "eval_loss": 0.2680502235889435, + "eval_pr_auc": 0.6962912618518755, + "eval_precision": 0.7100631019594819, + "eval_precision_macro": 0.8261247850555049, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183799488564967, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.169, + "eval_steps_per_second": 3.946, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9906 + }, + { + "epoch": 382.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332394951737218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994106090373281, + "eval_f1_macro": 0.8220780022434744, + "eval_loss": 0.26803064346313477, + "eval_pr_auc": 0.696271279785013, + "eval_precision": 0.71034253408713, + "eval_precision_macro": 0.8262114206958068, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8181178331945835, + "eval_runtime": 0.1775, + "eval_samples_per_second": 918.497, + "eval_steps_per_second": 5.635, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9932 + }, + { + "epoch": 383.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332396703587031, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.2680654227733612, + "eval_pr_auc": 0.6962410474404584, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.2478, + "eval_samples_per_second": 657.85, + "eval_steps_per_second": 4.036, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9958 + }, + { + "epoch": 384.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332586000691747, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995090016366612, + "eval_f1_macro": 0.8221255355501671, + "eval_loss": 0.26804468035697937, + "eval_pr_auc": 0.6963737898708651, + "eval_precision": 0.7102027251578598, + "eval_precision_macro": 0.8261680532566416, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8182488910255401, + "eval_runtime": 0.1642, + "eval_samples_per_second": 992.895, + "eval_steps_per_second": 6.091, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9984 + }, + { + "epoch": 384.61538461538464, + "grad_norm": 19506.416015625, + "learning_rate": 1.5373466155541264e-07, + "loss": 0.1999, + "step": 10000 + }, + { + "epoch": 385.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9332885664334637, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6990990990990991, + "eval_f1_macro": 0.8219096951966862, + "eval_loss": 0.26817384362220764, + "eval_pr_auc": 0.6963813810367415, + "eval_precision": 0.7103861517976032, + "eval_precision_macro": 0.8261784335560267, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6881651080296678, + "eval_recall_macro": 0.8178255370534045, + "eval_runtime": 0.2609, + "eval_samples_per_second": 624.846, + "eval_steps_per_second": 3.833, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10010 + }, + { + "epoch": 386.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9332890919884074, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995251350908793, + "eval_f1_macro": 0.8221511437890823, + "eval_loss": 0.2681432366371155, + "eval_pr_auc": 0.6963686032935126, + "eval_precision": 0.7105788423153693, + "eval_precision_macro": 0.8263313128873689, + "eval_pred_class_0": 16662, + "eval_pred_class_1": 3006, + "eval_predicted_binding_ratio": 0.15283709579011592, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8181480136738496, + "eval_runtime": 0.2677, + "eval_samples_per_second": 609.003, + "eval_steps_per_second": 3.736, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10036 + }, + { + "epoch": 387.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9332816368942063, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6992961204779833, + "eval_f1_macro": 0.8220048784892098, + "eval_loss": 0.2681044936180115, + "eval_pr_auc": 0.6963340299375516, + "eval_precision": 0.7101063829787234, + "eval_precision_macro": 0.8260916068555082, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8180876527153176, + "eval_runtime": 0.2503, + "eval_samples_per_second": 651.154, + "eval_steps_per_second": 3.995, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10062 + }, + { + "epoch": 388.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332853547088078, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699607329842932, + "eval_f1_macro": 0.8221730364601127, + "eval_loss": 0.26804089546203613, + "eval_pr_auc": 0.6964477494759991, + "eval_precision": 0.7100631019594819, + "eval_precision_macro": 0.8261247850555049, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183799488564967, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.349, + "eval_steps_per_second": 3.794, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10088 + }, + { + "epoch": 389.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9333026785569515, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997218131238749, + "eval_f1_macro": 0.8222461586311625, + "eval_loss": 0.26805874705314636, + "eval_pr_auc": 0.6965416515768459, + "eval_precision": 0.7102990033222591, + "eval_precision_macro": 0.8262444710452093, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184101293357626, + "eval_runtime": 0.1895, + "eval_samples_per_second": 860.093, + "eval_steps_per_second": 5.277, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10114 + }, + { + "epoch": 390.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9332940944928713, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26800334453582764, + "eval_pr_auc": 0.6965549091166009, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2601, + "eval_samples_per_second": 626.7, + "eval_steps_per_second": 3.845, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10140 + }, + { + "epoch": 391.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9332915835081403, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699591169255928, + "eval_f1_macro": 0.8221474102804127, + "eval_loss": 0.26801303029060364, + "eval_pr_auc": 0.69649915263674, + "eval_precision": 0.7096881220968813, + "eval_precision_macro": 0.8259621107662262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8184808262081871, + "eval_runtime": 0.2322, + "eval_samples_per_second": 702.068, + "eval_steps_per_second": 4.307, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10166 + }, + { + "epoch": 392.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9333394576705105, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2680598199367523, + "eval_pr_auc": 0.6966844521188784, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2619, + "eval_samples_per_second": 622.281, + "eval_steps_per_second": 3.818, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10192 + }, + { + "epoch": 393.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9333311363839021, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26812389492988586, + "eval_pr_auc": 0.696579843318821, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2606, + "eval_samples_per_second": 625.459, + "eval_steps_per_second": 3.837, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10218 + }, + { + "epoch": 394.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9333747574442278, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6990990990990991, + "eval_f1_macro": 0.8219096951966862, + "eval_loss": 0.26815417408943176, + "eval_pr_auc": 0.6966758102563304, + "eval_precision": 0.7103861517976032, + "eval_precision_macro": 0.8261784335560267, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6881651080296678, + "eval_recall_macro": 0.8178255370534045, + "eval_runtime": 0.2489, + "eval_samples_per_second": 654.797, + "eval_steps_per_second": 4.017, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10244 + }, + { + "epoch": 395.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.93337473797923, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994106090373281, + "eval_f1_macro": 0.8220780022434744, + "eval_loss": 0.26812419295310974, + "eval_pr_auc": 0.6967071460749926, + "eval_precision": 0.71034253408713, + "eval_precision_macro": 0.8262114206958068, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8181178331945835, + "eval_runtime": 0.2546, + "eval_samples_per_second": 640.225, + "eval_steps_per_second": 3.928, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10270 + }, + { + "epoch": 396.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9333609762257046, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6966270582247817, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.1654, + "eval_samples_per_second": 985.392, + "eval_steps_per_second": 6.045, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10296 + }, + { + "epoch": 397.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9333962857319209, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997218131238749, + "eval_f1_macro": 0.8222461586311625, + "eval_loss": 0.268099308013916, + "eval_pr_auc": 0.6968255966064625, + "eval_precision": 0.7102990033222591, + "eval_precision_macro": 0.8262444710452093, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184101293357626, + "eval_runtime": 0.1974, + "eval_samples_per_second": 825.595, + "eval_steps_per_second": 5.065, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10322 + }, + { + "epoch": 398.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9334254248337986, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6987714987714988, + "eval_f1_macro": 0.8217158026492684, + "eval_loss": 0.26809167861938477, + "eval_pr_auc": 0.6970114900505864, + "eval_precision": 0.7100532623169108, + "eval_precision_macro": 0.8259819840149124, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6878426314092229, + "eval_recall_macro": 0.8176341182639161, + "eval_runtime": 0.1847, + "eval_samples_per_second": 882.621, + "eval_steps_per_second": 5.415, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10348 + }, + { + "epoch": 399.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9334154587548664, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994928840176673, + "eval_f1_macro": 0.8220999320609059, + "eval_loss": 0.26815035939216614, + "eval_pr_auc": 0.6968381832516852, + "eval_precision": 0.7098273572377158, + "eval_precision_macro": 0.8260051771779358, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183497683772306, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.281, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10374 + }, + { + "epoch": 400.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9334343592678412, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991976420501065, + "eval_f1_macro": 0.821957303073866, + "eval_loss": 0.26815110445022583, + "eval_pr_auc": 0.6969254623478301, + "eval_precision": 0.7102461743180306, + "eval_precision_macro": 0.826134970486347, + "eval_pred_class_0": 16662, + "eval_pred_class_1": 3006, + "eval_predicted_binding_ratio": 0.15283709579011592, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.817956594884361, + "eval_runtime": 0.2607, + "eval_samples_per_second": 625.319, + "eval_steps_per_second": 3.836, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10400 + }, + { + "epoch": 401.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9334508655860725, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6992961204779833, + "eval_f1_macro": 0.8220048784892098, + "eval_loss": 0.2681412398815155, + "eval_pr_auc": 0.6970213943546584, + "eval_precision": 0.7101063829787234, + "eval_precision_macro": 0.8260916068555082, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8180876527153176, + "eval_runtime": 0.2502, + "eval_samples_per_second": 651.598, + "eval_steps_per_second": 3.998, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10426 + }, + { + "epoch": 402.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9334578048578289, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6996235063021771, + "eval_f1_macro": 0.8221986674075668, + "eval_loss": 0.26816073060035706, + "eval_pr_auc": 0.697032431219364, + "eval_precision": 0.710438829787234, + "eval_precision_macro": 0.8262878422645654, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.818279071504806, + "eval_runtime": 0.2131, + "eval_samples_per_second": 764.767, + "eval_steps_per_second": 4.692, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10452 + }, + { + "epoch": 403.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9334644910846125, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6993945344460808, + "eval_f1_macro": 0.8220524214743572, + "eval_loss": 0.2681373655796051, + "eval_pr_auc": 0.6971117657586055, + "eval_precision": 0.7099667774086379, + "eval_precision_macro": 0.8260483424802825, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8182187105462742, + "eval_runtime": 0.1917, + "eval_samples_per_second": 850.439, + "eval_steps_per_second": 5.217, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10478 + }, + { + "epoch": 403.84615384615387, + "grad_norm": 20065.328125, + "learning_rate": 1.0857058873879127e-07, + "loss": 0.1991, + "step": 10500 + }, + { + "epoch": 404.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9334760435608745, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994106090373281, + "eval_f1_macro": 0.8220780022434744, + "eval_loss": 0.26816511154174805, + "eval_pr_auc": 0.6971952135976213, + "eval_precision": 0.71034253408713, + "eval_precision_macro": 0.8262114206958068, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8181178331945835, + "eval_runtime": 0.2491, + "eval_samples_per_second": 654.311, + "eval_steps_per_second": 4.014, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10504 + }, + { + "epoch": 405.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9334837419675497, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997218131238749, + "eval_f1_macro": 0.8222461586311625, + "eval_loss": 0.26816821098327637, + "eval_pr_auc": 0.6972179050703514, + "eval_precision": 0.7102990033222591, + "eval_precision_macro": 0.8262444710452093, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184101293357626, + "eval_runtime": 0.2552, + "eval_samples_per_second": 638.823, + "eval_steps_per_second": 3.919, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10530 + }, + { + "epoch": 406.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9334893478869489, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997218131238749, + "eval_f1_macro": 0.8222461586311625, + "eval_loss": 0.2681950330734253, + "eval_pr_auc": 0.6972103120395237, + "eval_precision": 0.7102990033222591, + "eval_precision_macro": 0.8262444710452093, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184101293357626, + "eval_runtime": 0.2003, + "eval_samples_per_second": 813.636, + "eval_steps_per_second": 4.992, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10556 + }, + { + "epoch": 407.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9334909050867821, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998363338788871, + "eval_f1_macro": 0.822319298583337, + "eval_loss": 0.2681744396686554, + "eval_pr_auc": 0.6972816477778223, + "eval_precision": 0.71053506148222, + "eval_precision_macro": 0.8263642352251727, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184403098150286, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.723, + "eval_steps_per_second": 4.004, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10582 + }, + { + "epoch": 408.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.933494973271346, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995090016366612, + "eval_f1_macro": 0.8221255355501671, + "eval_loss": 0.2681480348110199, + "eval_pr_auc": 0.697380910091478, + "eval_precision": 0.7102027251578598, + "eval_precision_macro": 0.8261680532566416, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8182488910255401, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.704, + "eval_steps_per_second": 3.943, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10608 + }, + { + "epoch": 409.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9334896009319218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2680969536304474, + "eval_pr_auc": 0.6974108729960042, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2261, + "eval_samples_per_second": 721.003, + "eval_steps_per_second": 4.423, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10634 + }, + { + "epoch": 410.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9334926569365942, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699607329842932, + "eval_f1_macro": 0.8221730364601127, + "eval_loss": 0.26808932423591614, + "eval_pr_auc": 0.6974463532877748, + "eval_precision": 0.7100631019594819, + "eval_precision_macro": 0.8261247850555049, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183799488564967, + "eval_runtime": 0.2604, + "eval_samples_per_second": 625.923, + "eval_steps_per_second": 3.84, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10660 + }, + { + "epoch": 411.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9334912943867403, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2680690288543701, + "eval_pr_auc": 0.6974656777279113, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.181, + "eval_samples_per_second": 900.801, + "eval_steps_per_second": 5.526, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10686 + }, + { + "epoch": 412.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9334969197711376, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000490918016691, + "eval_f1_macro": 0.8224398957879677, + "eval_loss": 0.26803234219551086, + "eval_pr_auc": 0.6975509824558107, + "eval_precision": 0.7106312292358804, + "eval_precision_macro": 0.8264405996101362, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8186015481252511, + "eval_runtime": 0.1785, + "eval_samples_per_second": 913.403, + "eval_steps_per_second": 5.604, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10712 + }, + { + "epoch": 413.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9334973090710958, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000490918016691, + "eval_f1_macro": 0.8224398957879677, + "eval_loss": 0.26804181933403015, + "eval_pr_auc": 0.697539016898834, + "eval_precision": 0.7106312292358804, + "eval_precision_macro": 0.8264405996101362, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8186015481252511, + "eval_runtime": 0.1858, + "eval_samples_per_second": 877.163, + "eval_steps_per_second": 5.381, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10738 + }, + { + "epoch": 414.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9335163069090602, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998363338788871, + "eval_f1_macro": 0.822319298583337, + "eval_loss": 0.2680352032184601, + "eval_pr_auc": 0.697653341121327, + "eval_precision": 0.71053506148222, + "eval_precision_macro": 0.8263642352251727, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184403098150286, + "eval_runtime": 0.239, + "eval_samples_per_second": 682.004, + "eval_steps_per_second": 4.184, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10764 + }, + { + "epoch": 415.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9335094357647963, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.2680439054965973, + "eval_pr_auc": 0.6975754183405896, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.2547, + "eval_samples_per_second": 640.063, + "eval_steps_per_second": 3.927, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10790 + }, + { + "epoch": 416.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9335170855089767, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.26802244782447815, + "eval_pr_auc": 0.6976835126920541, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.1858, + "eval_samples_per_second": 877.122, + "eval_steps_per_second": 5.381, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10816 + }, + { + "epoch": 417.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9335191877287514, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.26805901527404785, + "eval_pr_auc": 0.6976076287719296, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.253, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10842 + }, + { + "epoch": 418.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9335267206829443, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002617801047121, + "eval_f1_macro": 0.8225604590386311, + "eval_loss": 0.26806166768074036, + "eval_pr_auc": 0.6976721144908643, + "eval_precision": 0.7107273331119229, + "eval_precision_macro": 0.8265169354519211, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187627864354736, + "eval_runtime": 0.1992, + "eval_samples_per_second": 818.364, + "eval_steps_per_second": 5.021, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10868 + }, + { + "epoch": 419.0, + "eval_accuracy": 0.9069554606467358, + "eval_auc": 0.9335306331475249, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7006869479882237, + "eval_f1_macro": 0.8228014837466855, + "eval_loss": 0.2680812180042267, + "eval_pr_auc": 0.6976481882085733, + "eval_precision": 0.7109193494855626, + "eval_precision_macro": 0.8266695216356063, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190852630559187, + "eval_runtime": 0.2498, + "eval_samples_per_second": 652.627, + "eval_steps_per_second": 4.004, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10894 + }, + { + "epoch": 420.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9335429739562026, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26813384890556335, + "eval_pr_auc": 0.6976080200264206, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2232, + "eval_samples_per_second": 730.143, + "eval_steps_per_second": 4.479, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10920 + }, + { + "epoch": 421.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9335431686061818, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6999182338511856, + "eval_f1_macro": 0.8223410440199006, + "eval_loss": 0.2681698799133301, + "eval_pr_auc": 0.6975308484626278, + "eval_precision": 0.710019907100199, + "eval_precision_macro": 0.8261580260852261, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8186722449976755, + "eval_runtime": 0.1913, + "eval_samples_per_second": 852.153, + "eval_steps_per_second": 5.228, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10946 + }, + { + "epoch": 422.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9335561517597906, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.2681558430194855, + "eval_pr_auc": 0.6975926126749412, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.252, + "eval_samples_per_second": 646.818, + "eval_steps_per_second": 3.968, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10972 + }, + { + "epoch": 423.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9335701957557857, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6989687346537895, + "eval_f1_macro": 0.821811089570853, + "eval_loss": 0.2681851089000702, + "eval_pr_auc": 0.6976584248764129, + "eval_precision": 0.7097739361702128, + "eval_precision_macro": 0.825895371446451, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.8178962339258291, + "eval_runtime": 0.2518, + "eval_samples_per_second": 647.463, + "eval_steps_per_second": 3.972, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10998 + }, + { + "epoch": 423.0769230769231, + "grad_norm": 19880.513671875, + "learning_rate": 7.045132214180816e-08, + "loss": 0.198, + "step": 11000 + }, + { + "epoch": 424.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.933567850223537, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002452984464431, + "eval_f1_macro": 0.8225346777593885, + "eval_loss": 0.2681238353252411, + "eval_pr_auc": 0.6977041251052366, + "eval_precision": 0.7103516921035169, + "eval_precision_macro": 0.8263539414042262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188636637871641, + "eval_runtime": 0.1802, + "eval_samples_per_second": 904.736, + "eval_steps_per_second": 5.551, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11024 + }, + { + "epoch": 425.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9335591688344673, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26810285449028015, + "eval_pr_auc": 0.6976771787125668, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2422, + "eval_samples_per_second": 673.038, + "eval_steps_per_second": 4.129, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11050 + }, + { + "epoch": 426.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9335456990559106, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700228832951945, + "eval_f1_macro": 0.8225089013937882, + "eval_loss": 0.2680869400501251, + "eval_pr_auc": 0.6976467269760291, + "eval_precision": 0.7099767981438515, + "eval_precision_macro": 0.8261913298268353, + "eval_pred_class_0": 16651, + "eval_pred_class_1": 3017, + "eval_predicted_binding_ratio": 0.15339637990644703, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189645411388546, + "eval_runtime": 0.1727, + "eval_samples_per_second": 943.897, + "eval_steps_per_second": 5.791, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11076 + }, + { + "epoch": 427.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9335531541501119, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004412485700278, + "eval_f1_macro": 0.8226293306702985, + "eval_loss": 0.26812946796417236, + "eval_pr_auc": 0.6976292182478663, + "eval_precision": 0.7100728959575878, + "eval_precision_macro": 0.8262676792100252, + "eval_pred_class_0": 16650, + "eval_pred_class_1": 3018, + "eval_predicted_binding_ratio": 0.15344722391702256, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.8191257794490772, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.952, + "eval_steps_per_second": 3.902, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11102 + }, + { + "epoch": 428.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9335772907475254, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002452984464431, + "eval_f1_macro": 0.8225346777593885, + "eval_loss": 0.26813971996307373, + "eval_pr_auc": 0.6977601654966087, + "eval_precision": 0.7103516921035169, + "eval_precision_macro": 0.8263539414042262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188636637871641, + "eval_runtime": 0.2428, + "eval_samples_per_second": 671.434, + "eval_steps_per_second": 4.119, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11128 + }, + { + "epoch": 429.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9335880938213678, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6989687346537895, + "eval_f1_macro": 0.821811089570853, + "eval_loss": 0.26816368103027344, + "eval_pr_auc": 0.6977910817778257, + "eval_precision": 0.7097739361702128, + "eval_precision_macro": 0.825895371446451, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.8178962339258291, + "eval_runtime": 0.1821, + "eval_samples_per_second": 895.286, + "eval_steps_per_second": 5.493, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11154 + }, + { + "epoch": 430.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9335897288811925, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6993945344460808, + "eval_f1_macro": 0.8220524214743572, + "eval_loss": 0.2681582272052765, + "eval_pr_auc": 0.6978031299323104, + "eval_precision": 0.7099667774086379, + "eval_precision_macro": 0.8260483424802825, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8182187105462742, + "eval_runtime": 0.259, + "eval_samples_per_second": 629.36, + "eval_steps_per_second": 3.861, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11180 + }, + { + "epoch": 431.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9336081038392237, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6990831696136215, + "eval_f1_macro": 0.8218841874311036, + "eval_loss": 0.2681788206100464, + "eval_pr_auc": 0.6978648412555615, + "eval_precision": 0.7100099767209843, + "eval_precision_macro": 0.8260151318092648, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.8179264144050951, + "eval_runtime": 0.1932, + "eval_samples_per_second": 843.696, + "eval_steps_per_second": 5.176, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11206 + }, + { + "epoch": 432.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.933607091659332, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6989687346537895, + "eval_f1_macro": 0.821811089570853, + "eval_loss": 0.26817184686660767, + "eval_pr_auc": 0.6978628373729787, + "eval_precision": 0.7097739361702128, + "eval_precision_macro": 0.825895371446451, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.8178962339258291, + "eval_runtime": 0.2181, + "eval_samples_per_second": 747.26, + "eval_steps_per_second": 4.584, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11232 + }, + { + "epoch": 433.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336009699174881, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002452984464431, + "eval_f1_macro": 0.8225346777593885, + "eval_loss": 0.26814383268356323, + "eval_pr_auc": 0.6978726222778764, + "eval_precision": 0.7103516921035169, + "eval_precision_macro": 0.8263539414042262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188636637871641, + "eval_runtime": 0.2581, + "eval_samples_per_second": 631.607, + "eval_steps_per_second": 3.875, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11258 + }, + { + "epoch": 434.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9335997338901205, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002452984464431, + "eval_f1_macro": 0.8225346777593885, + "eval_loss": 0.26813551783561707, + "eval_pr_auc": 0.6978611289947059, + "eval_precision": 0.7103516921035169, + "eval_precision_macro": 0.8263539414042262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188636637871641, + "eval_runtime": 0.1828, + "eval_samples_per_second": 891.596, + "eval_steps_per_second": 5.47, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11284 + }, + { + "epoch": 435.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9335949455006336, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7001308044473512, + "eval_f1_macro": 0.8224615491231337, + "eval_loss": 0.2681216299533844, + "eval_pr_auc": 0.6978496657032133, + "eval_precision": 0.7101160862354893, + "eval_precision_macro": 0.8262344077367322, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188334833078981, + "eval_runtime": 0.2595, + "eval_samples_per_second": 628.033, + "eval_steps_per_second": 3.853, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11310 + }, + { + "epoch": 436.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933600045330087, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7001308044473512, + "eval_f1_macro": 0.8224615491231337, + "eval_loss": 0.2681162655353546, + "eval_pr_auc": 0.6978775373770459, + "eval_precision": 0.7101160862354893, + "eval_precision_macro": 0.8262344077367322, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188334833078981, + "eval_runtime": 0.2578, + "eval_samples_per_second": 632.163, + "eval_steps_per_second": 3.878, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11336 + }, + { + "epoch": 437.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9336053398095198, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.26813197135925293, + "eval_pr_auc": 0.6979041519553001, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.1888, + "eval_samples_per_second": 863.236, + "eval_steps_per_second": 5.296, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11362 + }, + { + "epoch": 438.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.933606196269428, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26813068985939026, + "eval_pr_auc": 0.6979019651404079, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2467, + "eval_samples_per_second": 660.854, + "eval_steps_per_second": 4.054, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11388 + }, + { + "epoch": 439.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336083958141924, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26813721656799316, + "eval_pr_auc": 0.6979009956604231, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.1795, + "eval_samples_per_second": 908.065, + "eval_steps_per_second": 5.571, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11414 + }, + { + "epoch": 440.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336056512494865, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681269645690918, + "eval_pr_auc": 0.697889844022227, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.569, + "eval_steps_per_second": 3.881, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11440 + }, + { + "epoch": 441.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336110917164033, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681290805339813, + "eval_pr_auc": 0.6979179015521837, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.824, + "eval_steps_per_second": 3.784, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11466 + }, + { + "epoch": 442.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336152864234539, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681342363357544, + "eval_pr_auc": 0.697937589363129, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.1746, + "eval_samples_per_second": 933.511, + "eval_steps_per_second": 5.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11492 + }, + { + "epoch": 442.3076923076923, + "grad_norm": 19259.90625, + "learning_rate": 4.0062918659231006e-08, + "loss": 0.1984, + "step": 11500 + }, + { + "epoch": 443.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336222548927073, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6999182338511856, + "eval_f1_macro": 0.8223410440199006, + "eval_loss": 0.26813840866088867, + "eval_pr_auc": 0.6979755820701472, + "eval_precision": 0.710019907100199, + "eval_precision_macro": 0.8261580260852261, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8186722449976755, + "eval_runtime": 0.189, + "eval_samples_per_second": 862.533, + "eval_steps_per_second": 5.292, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11518 + }, + { + "epoch": 444.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336241624625029, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6999182338511856, + "eval_f1_macro": 0.8223410440199006, + "eval_loss": 0.2681417763233185, + "eval_pr_auc": 0.6980183846984902, + "eval_precision": 0.710019907100199, + "eval_precision_macro": 0.8261580260852261, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8186722449976755, + "eval_runtime": 0.1783, + "eval_samples_per_second": 913.982, + "eval_steps_per_second": 5.607, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11544 + }, + { + "epoch": 445.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.933623228142603, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26812103390693665, + "eval_pr_auc": 0.6980203083536239, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2463, + "eval_samples_per_second": 661.813, + "eval_steps_per_second": 4.06, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11570 + }, + { + "epoch": 446.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.933622994562628, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26812058687210083, + "eval_pr_auc": 0.6980222288630917, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.25, + "eval_samples_per_second": 651.994, + "eval_steps_per_second": 4.0, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11596 + }, + { + "epoch": 447.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336267026447306, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681138813495636, + "eval_pr_auc": 0.6980542679927909, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.1865, + "eval_samples_per_second": 873.899, + "eval_steps_per_second": 5.361, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11622 + }, + { + "epoch": 448.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336341674714307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26811888813972473, + "eval_pr_auc": 0.6980920829430033, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2363, + "eval_samples_per_second": 689.772, + "eval_steps_per_second": 4.232, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11648 + }, + { + "epoch": 449.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336351017913307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7001472272206772, + "eval_f1_macro": 0.8224873029219602, + "eval_loss": 0.26812025904655457, + "eval_pr_auc": 0.6980982354073687, + "eval_precision": 0.7104913678618858, + "eval_precision_macro": 0.8263972209146124, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187326059562077, + "eval_runtime": 0.2566, + "eval_samples_per_second": 635.262, + "eval_steps_per_second": 3.897, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11674 + }, + { + "epoch": 450.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336350044663411, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681255340576172, + "eval_pr_auc": 0.69809285211945, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.1865, + "eval_samples_per_second": 874.141, + "eval_steps_per_second": 5.363, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11700 + }, + { + "epoch": 451.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9336387320134417, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.2681383192539215, + "eval_pr_auc": 0.6981051991963814, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.252, + "eval_steps_per_second": 3.965, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11726 + }, + { + "epoch": 452.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336409802207007, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003598298985934, + "eval_f1_macro": 0.8226078241660808, + "eval_loss": 0.26812514662742615, + "eval_pr_auc": 0.6981289571890097, + "eval_precision": 0.7105874543644208, + "eval_precision_macro": 0.8264735530603251, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188938442664302, + "eval_runtime": 0.1751, + "eval_samples_per_second": 930.856, + "eval_steps_per_second": 5.711, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11752 + }, + { + "epoch": 453.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336424400955443, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26812946796417236, + "eval_pr_auc": 0.6981354214954091, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2533, + "eval_samples_per_second": 643.502, + "eval_steps_per_second": 3.948, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11778 + }, + { + "epoch": 454.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336481628049311, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2681373655796051, + "eval_pr_auc": 0.6981529233150263, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2333, + "eval_samples_per_second": 698.566, + "eval_steps_per_second": 4.286, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11804 + }, + { + "epoch": 455.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336448537552857, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26815658807754517, + "eval_pr_auc": 0.6981319681162118, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.1829, + "eval_samples_per_second": 891.289, + "eval_steps_per_second": 5.468, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11830 + }, + { + "epoch": 456.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933646138445148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.268153578042984, + "eval_pr_auc": 0.6981406301220767, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.693, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11856 + }, + { + "epoch": 457.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.933646138445148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.268160343170166, + "eval_pr_auc": 0.6981424953255787, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.2524, + "eval_samples_per_second": 645.855, + "eval_steps_per_second": 3.962, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11882 + }, + { + "epoch": 458.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9336505959296704, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.26815977692604065, + "eval_pr_auc": 0.6981608628032375, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.1785, + "eval_samples_per_second": 913.407, + "eval_steps_per_second": 5.604, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11908 + }, + { + "epoch": 459.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336521336645056, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26815950870513916, + "eval_pr_auc": 0.6981611753342029, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.1894, + "eval_samples_per_second": 860.719, + "eval_steps_per_second": 5.28, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11934 + }, + { + "epoch": 460.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9336499146547433, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.2681548595428467, + "eval_pr_auc": 0.6981527417806164, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.2094, + "eval_samples_per_second": 778.376, + "eval_steps_per_second": 4.775, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11960 + }, + { + "epoch": 461.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336533599593742, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2681526839733124, + "eval_pr_auc": 0.6981686335311912, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.254, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11986 + }, + { + "epoch": 461.53846153846155, + "grad_norm": 19181.365234375, + "learning_rate": 1.7952297882945e-08, + "loss": 0.1977, + "step": 12000 + }, + { + "epoch": 462.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336556178991323, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26815271377563477, + "eval_pr_auc": 0.6981807528411922, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2521, + "eval_samples_per_second": 646.614, + "eval_steps_per_second": 3.967, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12012 + }, + { + "epoch": 463.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336526397544513, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.2681511640548706, + "eval_pr_auc": 0.6981676703517014, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2048, + "eval_samples_per_second": 795.899, + "eval_steps_per_second": 4.883, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12038 + }, + { + "epoch": 464.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336516859695536, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26814842224121094, + "eval_pr_auc": 0.698161790632896, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2527, + "eval_samples_per_second": 645.01, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12064 + }, + { + "epoch": 465.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336534183543679, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26814672350883484, + "eval_pr_auc": 0.6981725600302674, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.1868, + "eval_samples_per_second": 872.776, + "eval_steps_per_second": 5.354, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12090 + }, + { + "epoch": 466.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336528344044305, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.2681439220905304, + "eval_pr_auc": 0.6981719802867735, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2538, + "eval_samples_per_second": 642.256, + "eval_steps_per_second": 3.94, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12116 + }, + { + "epoch": 467.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336571167039716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26814955472946167, + "eval_pr_auc": 0.6981930050756842, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.251, + "eval_samples_per_second": 649.332, + "eval_steps_per_second": 3.984, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12142 + }, + { + "epoch": 468.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336574573414352, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26814743876457214, + "eval_pr_auc": 0.6981957415820915, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.1759, + "eval_samples_per_second": 926.564, + "eval_steps_per_second": 5.684, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12168 + }, + { + "epoch": 469.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336602894986317, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2681457996368408, + "eval_pr_auc": 0.6982076318844164, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.1997, + "eval_samples_per_second": 816.139, + "eval_steps_per_second": 5.007, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12194 + }, + { + "epoch": 470.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336571945639633, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681419849395752, + "eval_pr_auc": 0.6981934072595471, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.578, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12220 + }, + { + "epoch": 471.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336578855713892, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681434154510498, + "eval_pr_auc": 0.6981968133129176, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.471, + "eval_steps_per_second": 3.868, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12246 + }, + { + "epoch": 472.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336576422589153, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26814115047454834, + "eval_pr_auc": 0.6981937092453367, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2611, + "eval_samples_per_second": 624.347, + "eval_steps_per_second": 3.83, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12272 + }, + { + "epoch": 473.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933659540096212, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26814183592796326, + "eval_pr_auc": 0.698207050020266, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.1807, + "eval_samples_per_second": 902.095, + "eval_steps_per_second": 5.534, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12298 + }, + { + "epoch": 474.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.933661126493542, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813554763793945, + "eval_pr_auc": 0.6982130837277154, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2369, + "eval_samples_per_second": 688.198, + "eval_steps_per_second": 4.222, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12324 + }, + { + "epoch": 475.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336606204035962, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813259720802307, + "eval_pr_auc": 0.6982118437878516, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.1655, + "eval_samples_per_second": 984.988, + "eval_steps_per_second": 6.043, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12350 + }, + { + "epoch": 476.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.933660883181068, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813384890556335, + "eval_pr_auc": 0.6982130674055568, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2472, + "eval_samples_per_second": 659.287, + "eval_steps_per_second": 4.045, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12376 + }, + { + "epoch": 477.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336603284286276, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.2681376338005066, + "eval_pr_auc": 0.6982114384730127, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.234, + "eval_samples_per_second": 696.525, + "eval_steps_per_second": 4.273, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12402 + }, + { + "epoch": 478.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336606982635879, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26814013719558716, + "eval_pr_auc": 0.6982139708732891, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2492, + "eval_samples_per_second": 654.084, + "eval_steps_per_second": 4.013, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12428 + }, + { + "epoch": 479.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336606593335921, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813676953315735, + "eval_pr_auc": 0.6982155837128797, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2089, + "eval_samples_per_second": 780.114, + "eval_steps_per_second": 4.786, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12454 + }, + { + "epoch": 480.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336606009385984, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813599467277527, + "eval_pr_auc": 0.6982237203295948, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.239, + "eval_samples_per_second": 682.114, + "eval_steps_per_second": 4.185, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12480 + }, + { + "epoch": 480.7692307692308, + "grad_norm": 19666.140625, + "learning_rate": 4.5173988392051e-09, + "loss": 0.1976, + "step": 12500 + }, + { + "epoch": 481.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336608247860743, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813769340515137, + "eval_pr_auc": 0.6982221169303999, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2452, + "eval_samples_per_second": 664.649, + "eval_steps_per_second": 4.078, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12506 + }, + { + "epoch": 482.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336611459585399, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.2681364417076111, + "eval_pr_auc": 0.6982243970162039, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2509, + "eval_samples_per_second": 649.641, + "eval_steps_per_second": 3.986, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12532 + }, + { + "epoch": 483.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336621192084356, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681383192539215, + "eval_pr_auc": 0.6982286439182355, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.229, + "eval_samples_per_second": 711.694, + "eval_steps_per_second": 4.366, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12558 + }, + { + "epoch": 484.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336621970684273, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681386172771454, + "eval_pr_auc": 0.6982328773712362, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.789, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12584 + }, + { + "epoch": 485.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336618856284606, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681376338005066, + "eval_pr_auc": 0.6982308470568848, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.4219, + "eval_samples_per_second": 386.39, + "eval_steps_per_second": 2.37, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12610 + }, + { + "epoch": 486.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336622749284189, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813745498657227, + "eval_pr_auc": 0.6982319234202713, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2599, + "eval_samples_per_second": 627.256, + "eval_steps_per_second": 3.848, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12636 + }, + { + "epoch": 487.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336626836933752, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813769340515137, + "eval_pr_auc": 0.6982353364927889, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.782, + "eval_steps_per_second": 3.827, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12662 + }, + { + "epoch": 488.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336625766358866, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813805103302, + "eval_pr_auc": 0.6982371615828771, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.662, + "eval_steps_per_second": 4.004, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12688 + }, + { + "epoch": 489.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336626642283772, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982364881625377, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.289, + "eval_steps_per_second": 3.793, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12714 + }, + { + "epoch": 490.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336626642283772, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982365330396263, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2579, + "eval_samples_per_second": 632.058, + "eval_steps_per_second": 3.878, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12740 + }, + { + "epoch": 491.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933662722623371, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982369774278672, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2559, + "eval_samples_per_second": 636.959, + "eval_steps_per_second": 3.908, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12766 + }, + { + "epoch": 492.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336627420883689, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982367884435748, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.1891, + "eval_samples_per_second": 862.094, + "eval_steps_per_second": 5.289, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12792 + }, + { + "epoch": 493.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336628199483605, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982375574473259, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.1955, + "eval_samples_per_second": 833.803, + "eval_steps_per_second": 5.115, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12818 + }, + { + "epoch": 494.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336627810183648, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982374868010095, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.254, + "eval_samples_per_second": 641.752, + "eval_steps_per_second": 3.937, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12844 + }, + { + "epoch": 495.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336628296808595, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982373741196756, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2245, + "eval_samples_per_second": 726.009, + "eval_steps_per_second": 4.454, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12870 + }, + { + "epoch": 496.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336627712858657, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982369260304812, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2507, + "eval_samples_per_second": 650.217, + "eval_steps_per_second": 3.989, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12896 + }, + { + "epoch": 497.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336627615533667, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.698237407972925, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2463, + "eval_samples_per_second": 661.887, + "eval_steps_per_second": 4.061, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12922 + }, + { + "epoch": 498.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933662722623371, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982366928868454, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2494, + "eval_samples_per_second": 653.517, + "eval_steps_per_second": 4.009, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12948 + }, + { + "epoch": 499.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336627712858656, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982368906715606, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.1725, + "eval_samples_per_second": 945.076, + "eval_steps_per_second": 5.798, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12974 + } + ], + "logging_steps": 500, + "max_steps": 13000, + "num_input_tokens_seen": 0, + "num_train_epochs": 500, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0614503790871344e+16, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/training_args.bin b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..43cdbe5adfb2a2b6cd48f66b6b5e6b0cc84c9bc3 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fce922531bcc60b40ec3cfe0214120623a297c18ab37c3a2e94007f715374c7 +size 5368 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/config.json b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a424582b16776120730808f810ad9081375d580e --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/config.json @@ -0,0 +1,52 @@ +{ + "architectures": [ + "GloMeModelForTokenClassification" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "bos_token_id": 28, + "cdr_weight": 0.0, + "class_weights": [ + 0.1, + 0.9 + ], + "classifier_activation": "gelu", + "classifier_bias": false, + "classifier_dropout": 0.1, + "classifier_pooling": "cls", + "cls_token_id": 28, + "compress_block_size": 16, + "compress_block_sliding_stride": 16, + "decoder_bias": true, + "dice_weight": 0.1, + "embedding_dropout": 0.1, + "eos_token_id": 29, + "hidden_activation": "gelu", + "hidden_size": 320, + "inner_rank": 32, + "intermediate_size": 1280, + "kv_heads": 10, + "mask_token_id": 31, + "mlp_bias": false, + "mlp_dropout": 0.1, + "model_size": "tiny", + "model_type": "glome", + "norm_bias": false, + "norm_eps": 1e-05, + "num_attention_heads": 20, + "num_hidden_layers": 6, + "num_selected_blocks": 8, + "num_slots": 64, + "pad_token_id": 30, + "reference_compile": null, + "selection_block_size": 16, + "sep_token_id": 29, + "sliding_window_size": 0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.52.3", + "unk_token_id": 27, + "use_glome": true, + "use_nsa": true, + "vocab_size": 36 +} diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/model.safetensors b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38fc66a65d33485164d13ff484750b61d1c7ae4e --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0af5cd7ea1c961f575b49c1824e246627fe1431012f8c7aefb4c304043a5c505 +size 61385376 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/optimizer.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d99c6676d0678ad3855f03b029d3179e2a68a15 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:787d59fa175cff3add21d5089a62d37a22bcfa9310a078ec8ca0ef6cce84aad0 +size 122881658 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/rng_state.pth b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fc6ee76cbc69eb6b79b7b3033a135464af14093 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea30593948da7f35739a54c03f3cb3e2f22c1412ec5c93e4c8da21c76e0b76a +size 14244 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/scaler.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363 +size 988 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/scheduler.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e5d7ff8cd396b769c0bdea29d5030512c000100 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e9d89428155a2a0813f903fdb3c0bafa943e8b525b97a6c7f8e2bc07ececa9 +size 1064 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/trainer_state.json b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4de772fdedbb7f399373859e1a1d93159f89c7e6 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/trainer_state.json @@ -0,0 +1,12216 @@ +{ + "best_global_step": 12818, + "best_metric": 0.6982375574473259, + "best_model_checkpoint": "./results/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818", + "epoch": 500.0, + "eval_steps": 500, + "global_step": 13000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy": 0.3402481187716087, + "eval_auc": 0.3906724936824889, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25902238465052535, + "eval_f1_macro": 0.3322238022959372, + "eval_loss": 1.061540961265564, + "eval_pr_auc": 0.12123677424188789, + "eval_precision": 0.15737977933523004, + "eval_precision_macro": 0.49946219326282143, + "eval_pred_class_0": 5257, + "eval_pred_class_1": 14411, + "eval_predicted_binding_ratio": 0.7327130364043116, + "eval_recall": 0.7313769751693002, + "eval_recall_macro": 0.49920692785748166, + "eval_runtime": 0.3106, + "eval_samples_per_second": 524.711, + "eval_steps_per_second": 3.219, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 26 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.34141753101484645, + "eval_auc": 0.39093619574173194, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25910884859577876, + "eval_f1_macro": 0.33318779271412513, + "eval_loss": 1.0595855712890625, + "eval_pr_auc": 0.12129083172780017, + "eval_precision": 0.15748852732582394, + "eval_precision_macro": 0.4996674570038125, + "eval_pred_class_0": 5286, + "eval_pred_class_1": 14382, + "eval_predicted_binding_ratio": 0.7312385600976204, + "eval_recall": 0.7304095453079652, + "eval_recall_macro": 0.4995079053877304, + "eval_runtime": 0.2611, + "eval_samples_per_second": 624.188, + "eval_steps_per_second": 3.829, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 52 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.3436038234695953, + "eval_auc": 0.3913807276315981, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2594079853143644, + "eval_f1_macro": 0.3350089597864736, + "eval_loss": 1.0562976598739624, + "eval_pr_auc": 0.1213805792649038, + "eval_precision": 0.15776986951364175, + "eval_precision_macro": 0.5001890381857135, + "eval_pred_class_0": 5337, + "eval_pred_class_1": 14331, + "eval_predicted_binding_ratio": 0.7286455155582673, + "eval_recall": 0.7291196388261851, + "eval_recall_macro": 0.5002814346723429, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.877, + "eval_steps_per_second": 3.772, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.345688427903193, + "eval_auc": 0.39204411422551294, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25976416450963474, + "eval_f1_macro": 0.3367519287310599, + "eval_loss": 1.0516862869262695, + "eval_pr_auc": 0.1215177922821225, + "eval_precision": 0.15807896947633715, + "eval_precision_macro": 0.5007519661646174, + "eval_pred_class_0": 5384, + "eval_pred_class_1": 14284, + "eval_predicted_binding_ratio": 0.7262558470612162, + "eval_recall": 0.72815220896485, + "eval_recall_macro": 0.5011256608293798, + "eval_runtime": 0.2689, + "eval_samples_per_second": 606.218, + "eval_steps_per_second": 3.719, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 104 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.3489424445800285, + "eval_auc": 0.39286881698964193, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25986937171261776, + "eval_f1_macro": 0.33937421387990774, + "eval_loss": 1.0457645654678345, + "eval_pr_auc": 0.12168361829310792, + "eval_precision": 0.15830985915492957, + "eval_precision_macro": 0.5011556611063601, + "eval_pred_class_0": 5468, + "eval_pred_class_1": 14200, + "eval_predicted_binding_ratio": 0.7219849501728697, + "eval_recall": 0.7249274427603999, + "eval_recall_macro": 0.5017466331928395, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.89, + "eval_steps_per_second": 3.852, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 130 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.3526540573520439, + "eval_auc": 0.3938679358675814, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2595952547103978, + "eval_f1_macro": 0.34226375201644554, + "eval_loss": 1.0385552644729614, + "eval_pr_auc": 0.12189495582289459, + "eval_precision": 0.15835402625044342, + "eval_precision_macro": 0.5012118238196412, + "eval_pred_class_0": 5573, + "eval_pred_class_1": 14095, + "eval_predicted_binding_ratio": 0.7166463290624364, + "eval_recall": 0.7197678168332796, + "eval_recall_macro": 0.5018528828839544, + "eval_runtime": 0.2682, + "eval_samples_per_second": 607.675, + "eval_steps_per_second": 3.728, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 156 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.3575859263778727, + "eval_auc": 0.39509779283079605, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25933524825605253, + "eval_f1_macro": 0.34607916966826957, + "eval_loss": 1.0300335884094238, + "eval_pr_auc": 0.12215992714628282, + "eval_precision": 0.1584754262788365, + "eval_precision_macro": 0.5013918287261083, + "eval_pred_class_0": 5710, + "eval_pred_class_1": 13958, + "eval_predicted_binding_ratio": 0.7096806996135855, + "eval_recall": 0.7133182844243793, + "eval_recall_macro": 0.5021592327536275, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.797, + "eval_steps_per_second": 3.956, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 182 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.36261948342485256, + "eval_auc": 0.39656283563130934, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2583126257247663, + "eval_f1_macro": 0.3497589695442054, + "eval_loss": 1.0202081203460693, + "eval_pr_auc": 0.12247236024679278, + "eval_precision": 0.1581769436997319, + "eval_precision_macro": 0.5008542806107318, + "eval_pred_class_0": 5867, + "eval_pred_class_1": 13801, + "eval_predicted_binding_ratio": 0.7016981899532235, + "eval_recall": 0.7039664624314738, + "eval_recall_macro": 0.5013464231032241, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.667, + "eval_steps_per_second": 4.004, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 208 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.3690258287573724, + "eval_auc": 0.39822865015280895, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25848470363288717, + "eval_f1_macro": 0.3546848296925498, + "eval_loss": 1.0091010332107544, + "eval_pr_auc": 0.12282975659183427, + "eval_precision": 0.15863586358635864, + "eval_precision_macro": 0.5015788301853557, + "eval_pred_class_0": 6033, + "eval_pred_class_1": 13635, + "eval_predicted_binding_ratio": 0.6932580841976815, + "eval_recall": 0.6975169300225733, + "eval_recall_macro": 0.5025280068716114, + "eval_runtime": 0.2623, + "eval_samples_per_second": 621.417, + "eval_steps_per_second": 3.812, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 234 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.37553386211104334, + "eval_auc": 0.4001638991754374, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25788519637462237, + "eval_f1_macro": 0.35943500580602444, + "eval_loss": 0.9966734647750854, + "eval_pr_auc": 0.12325069957928089, + "eval_precision": 0.1586735073239646, + "eval_precision_macro": 0.5015911353953799, + "eval_pred_class_0": 6219, + "eval_pred_class_1": 13449, + "eval_predicted_binding_ratio": 0.6838010982306284, + "eval_recall": 0.6881651080296678, + "eval_recall_macro": 0.5025904311199223, + "eval_runtime": 0.2668, + "eval_samples_per_second": 610.918, + "eval_steps_per_second": 3.748, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 260 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.38382143583485867, + "eval_auc": 0.4023744221985687, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25855001529519733, + "eval_f1_macro": 0.3657153538700335, + "eval_loss": 0.9828852415084839, + "eval_pr_auc": 0.12373084625745168, + "eval_precision": 0.15954394442766537, + "eval_precision_macro": 0.5028728439448414, + "eval_pred_class_0": 6424, + "eval_pred_class_1": 13244, + "eval_predicted_binding_ratio": 0.6733780760626398, + "eval_recall": 0.6813930990003225, + "eval_recall_macro": 0.5047576347901956, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.475, + "eval_steps_per_second": 3.85, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 286 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.3912955053894651, + "eval_auc": 0.40482715792324586, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2574122317330356, + "eval_f1_macro": 0.370844480646392, + "eval_loss": 0.9678097367286682, + "eval_pr_auc": 0.12427357405982056, + "eval_precision": 0.15935796021810922, + "eval_precision_macro": 0.5025013059703454, + "eval_pred_class_0": 6647, + "eval_pred_class_1": 13021, + "eval_predicted_binding_ratio": 0.6620398617042912, + "eval_recall": 0.6691389874234118, + "eval_recall_macro": 0.5042139676659523, + "eval_runtime": 0.2363, + "eval_samples_per_second": 689.751, + "eval_steps_per_second": 4.232, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 312 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.4013117754728493, + "eval_auc": 0.40764224431659535, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2572383775941462, + "eval_f1_macro": 0.3779059068484294, + "eval_loss": 0.9513856172561646, + "eval_pr_auc": 0.12488748600523823, + "eval_precision": 0.15989648682559598, + "eval_precision_macro": 0.5031697587395765, + "eval_pred_class_0": 6916, + "eval_pred_class_1": 12752, + "eval_predicted_binding_ratio": 0.6483628228594671, + "eval_recall": 0.6575298290873912, + "eval_recall_macro": 0.5054414401669225, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.633, + "eval_steps_per_second": 3.943, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 338 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.4099552572706935, + "eval_auc": 0.4108023769954713, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.25432114630855235, + "eval_f1_macro": 0.38308115532732967, + "eval_loss": 0.9335527420043945, + "eval_pr_auc": 0.12558462856716973, + "eval_precision": 0.15880276039159044, + "eval_precision_macro": 0.5015495900209409, + "eval_pred_class_0": 7206, + "eval_pred_class_1": 12462, + "eval_predicted_binding_ratio": 0.6336180597925565, + "eval_recall": 0.6381812318606901, + "eval_recall_macro": 0.5027086517847544, + "eval_runtime": 0.2682, + "eval_samples_per_second": 607.718, + "eval_steps_per_second": 3.728, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 364 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.41844620703681107, + "eval_auc": 0.4144857969457745, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2516356974613975, + "eval_f1_macro": 0.3880413644466475, + "eval_loss": 0.9142351150512695, + "eval_pr_auc": 0.1264136402678906, + "eval_precision": 0.15784289583846342, + "eval_precision_macro": 0.5002307331563727, + "eval_pred_class_0": 7485, + "eval_pred_class_1": 12183, + "eval_predicted_binding_ratio": 0.6194325808419768, + "eval_recall": 0.6201225411157691, + "eval_recall_macro": 0.5004095532886144, + "eval_runtime": 0.2527, + "eval_samples_per_second": 645.136, + "eval_steps_per_second": 3.958, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 390 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.42897091722595077, + "eval_auc": 0.41858189431685716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24961582147390926, + "eval_f1_macro": 0.3943717007980979, + "eval_loss": 0.8934236168861389, + "eval_pr_auc": 0.12736412734017702, + "eval_precision": 0.15742457441429294, + "eval_precision_macro": 0.4996940867457263, + "eval_pred_class_0": 7802, + "eval_pred_class_1": 11866, + "eval_predicted_binding_ratio": 0.6033150294895261, + "eval_recall": 0.6023863269912931, + "eval_recall_macro": 0.4994487317940711, + "eval_runtime": 0.2416, + "eval_samples_per_second": 674.578, + "eval_steps_per_second": 4.139, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 416 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.4378177750660972, + "eval_auc": 0.42318268015385996, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.24520445081575534, + "eval_f1_macro": 0.3986584493314002, + "eval_loss": 0.8710600733757019, + "eval_pr_auc": 0.12844830521974454, + "eval_precision": 0.15552476619328023, + "eval_precision_macro": 0.4974052402394973, + "eval_pred_class_0": 8120, + "eval_pred_class_1": 11548, + "eval_predicted_binding_ratio": 0.5871466341264999, + "eval_recall": 0.5791680103192518, + "eval_recall_macro": 0.4952639713574891, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.27, + "eval_steps_per_second": 3.891, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 442 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.4492576774455969, + "eval_auc": 0.4283753771124365, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.23911211014329867, + "eval_f1_macro": 0.4037791626413705, + "eval_loss": 0.8472632765769958, + "eval_pr_auc": 0.12969206942947384, + "eval_precision": 0.15285136955545578, + "eval_precision_macro": 0.4944498263457579, + "eval_pred_class_0": 8533, + "eval_pred_class_1": 11135, + "eval_predicted_binding_ratio": 0.5661480577587961, + "eval_recall": 0.5488552079974202, + "eval_recall_macro": 0.4897351430824307, + "eval_runtime": 0.281, + "eval_samples_per_second": 580.087, + "eval_steps_per_second": 3.559, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 468 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.46339231238560097, + "eval_auc": 0.4341240430739382, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.23588184187662903, + "eval_f1_macro": 0.41119432949496704, + "eval_loss": 0.8218646049499512, + "eval_pr_auc": 0.13110819406948146, + "eval_precision": 0.15208663990290355, + "eval_precision_macro": 0.4938729504080779, + "eval_pred_class_0": 8957, + "eval_pred_class_1": 10711, + "eval_predicted_binding_ratio": 0.544590197274761, + "eval_recall": 0.5253144147049339, + "eval_recall_macro": 0.4885580946585574, + "eval_runtime": 0.2447, + "eval_samples_per_second": 666.13, + "eval_steps_per_second": 4.087, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 494 + }, + { + "epoch": 19.23076923076923, + "grad_norm": 232728.109375, + "learning_rate": 3.8384615384615384e-07, + "loss": 0.99, + "step": 500 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.4798657718120805, + "eval_auc": 0.44078220133048324, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2309427153811457, + "eval_f1_macro": 0.4189975157915178, + "eval_loss": 0.7945711016654968, + "eval_pr_auc": 0.1327664236209388, + "eval_precision": 0.15057347318890305, + "eval_precision_macro": 0.49263119629657465, + "eval_pred_class_0": 9467, + "eval_pred_class_1": 10201, + "eval_predicted_binding_ratio": 0.5186597518812284, + "eval_recall": 0.49532408900354724, + "eval_recall_macro": 0.4861481916617905, + "eval_runtime": 0.249, + "eval_samples_per_second": 654.734, + "eval_steps_per_second": 4.017, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 520 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.49964409192597115, + "eval_auc": 0.4482004774880778, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22407947646455886, + "eval_f1_macro": 0.42742637388305044, + "eval_loss": 0.765658974647522, + "eval_pr_auc": 0.1347110745909903, + "eval_precision": 0.14829889375913172, + "eval_precision_macro": 0.4908656872127009, + "eval_pred_class_0": 10086, + "eval_pred_class_1": 9582, + "eval_predicted_binding_ratio": 0.48718730933496035, + "eval_recall": 0.4582392776523702, + "eval_recall_macro": 0.48281674753627146, + "eval_runtime": 0.2494, + "eval_samples_per_second": 653.647, + "eval_steps_per_second": 4.01, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 546 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.5294895261338214, + "eval_auc": 0.4563512312496838, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.22025615099427032, + "eval_f1_macro": 0.44167751484473966, + "eval_loss": 0.7351489067077637, + "eval_pr_auc": 0.13684095333600696, + "eval_precision": 0.14908178396258698, + "eval_precision_macro": 0.49225486317659667, + "eval_pred_class_0": 10901, + "eval_pred_class_1": 8767, + "eval_predicted_binding_ratio": 0.4457494407158837, + "eval_recall": 0.4214769429216382, + "eval_recall_macro": 0.48559209613637894, + "eval_runtime": 0.2523, + "eval_samples_per_second": 645.993, + "eval_steps_per_second": 3.963, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 572 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.564317673378076, + "eval_auc": 0.46539531162556536, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21679919568595193, + "eval_f1_macro": 0.45751035677940843, + "eval_loss": 0.7033414244651794, + "eval_pr_auc": 0.1392772958743257, + "eval_precision": 0.15127551020408164, + "eval_precision_macro": 0.49468577674559844, + "eval_pred_class_0": 11828, + "eval_pred_class_1": 7840, + "eval_predicted_binding_ratio": 0.3986170429123449, + "eval_recall": 0.38245727184779105, + "eval_recall_macro": 0.49040772688786005, + "eval_runtime": 0.2665, + "eval_samples_per_second": 611.699, + "eval_steps_per_second": 3.753, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 598 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.6033150294895261, + "eval_auc": 0.4754341993823483, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.21223747980613894, + "eval_f1_macro": 0.4735759293567254, + "eval_loss": 0.6706362962722778, + "eval_pr_auc": 0.14231930535250045, + "eval_precision": 0.1544906658826988, + "eval_precision_macro": 0.4975718001003078, + "eval_pred_class_0": 12865, + "eval_pred_class_1": 6803, + "eval_predicted_binding_ratio": 0.34589180394549524, + "eval_recall": 0.3389229280877136, + "eval_recall_macro": 0.49586334730576304, + "eval_runtime": 0.2619, + "eval_samples_per_second": 622.356, + "eval_steps_per_second": 3.818, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 624 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.6474984746796827, + "eval_auc": 0.4864891211002582, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2075665790376043, + "eval_f1_macro": 0.4904508280155492, + "eval_loss": 0.6374054551124573, + "eval_pr_auc": 0.14557281943245967, + "eval_precision": 0.16076487252124647, + "eval_precision_macro": 0.5021727358326632, + "eval_pred_class_0": 14020, + "eval_pred_class_1": 5648, + "eval_predicted_binding_ratio": 0.28716697173073014, + "eval_recall": 0.2928087713640761, + "eval_recall_macro": 0.5033489139611471, + "eval_runtime": 0.2341, + "eval_samples_per_second": 696.138, + "eval_steps_per_second": 4.271, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 650 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.6925462680496237, + "eval_auc": 0.49869307137754393, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20360858685631503, + "eval_f1_macro": 0.5065549471155847, + "eval_loss": 0.6044979691505432, + "eval_pr_auc": 0.14935675594952297, + "eval_precision": 0.17208370436331255, + "eval_precision_macro": 0.5093417994668434, + "eval_pred_class_0": 15176, + "eval_pred_class_1": 4492, + "eval_predicted_binding_ratio": 0.22839129550538947, + "eval_recall": 0.2492744276039987, + "eval_recall_macro": 0.5123960114117054, + "eval_runtime": 0.2543, + "eval_samples_per_second": 640.94, + "eval_steps_per_second": 3.932, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 676 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.7326113483831604, + "eval_auc": 0.5121322314924708, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.1977116704805492, + "eval_f1_macro": 0.5186416868006297, + "eval_loss": 0.5727357268333435, + "eval_pr_auc": 0.15383837227298106, + "eval_precision": 0.18760856977417487, + "eval_precision_macro": 0.518159780138105, + "eval_pred_class_0": 16214, + "eval_pred_class_1": 3454, + "eval_predicted_binding_ratio": 0.1756152125279642, + "eval_recall": 0.20896485004837148, + "eval_recall_macro": 0.5197960002037596, + "eval_runtime": 0.2506, + "eval_samples_per_second": 650.309, + "eval_steps_per_second": 3.99, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 702 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.7681513117754728, + "eval_auc": 0.5270188672472933, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.18947742623533595, + "eval_f1_macro": 0.5271029967130403, + "eval_loss": 0.5426873564720154, + "eval_pr_auc": 0.1589999639181036, + "eval_precision": 0.21108910891089108, + "eval_precision_macro": 0.5306451786169109, + "eval_pred_class_0": 17143, + "eval_pred_class_1": 2525, + "eval_predicted_binding_ratio": 0.12838112670327437, + "eval_recall": 0.17188003869719445, + "eval_recall_macro": 0.5258205046507038, + "eval_runtime": 0.2527, + "eval_samples_per_second": 644.973, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 728 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.7986068741102298, + "eval_auc": 0.5437619187831915, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.18076525336091004, + "eval_f1_macro": 0.5329784934669249, + "eval_loss": 0.5149086117744446, + "eval_pr_auc": 0.16564494894795073, + "eval_precision": 0.2520184544405998, + "eval_precision_macro": 0.5517368953367268, + "eval_pred_class_0": 17934, + "eval_pred_class_1": 1734, + "eval_predicted_binding_ratio": 0.08816351433801098, + "eval_recall": 0.14092228313447275, + "eval_recall_macro": 0.5313170599592205, + "eval_runtime": 0.258, + "eval_samples_per_second": 631.786, + "eval_steps_per_second": 3.876, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 754 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.8241814114297336, + "eval_auc": 0.5629712926123112, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.17548879351454458, + "eval_f1_macro": 0.5385440097559633, + "eval_loss": 0.4897482395172119, + "eval_pr_auc": 0.17432371223202417, + "eval_precision": 0.3366880146386093, + "eval_precision_macro": 0.5947773855158054, + "eval_pred_class_0": 18575, + "eval_pred_class_1": 1093, + "eval_predicted_binding_ratio": 0.05557250355908074, + "eval_recall": 0.11867139632376653, + "eval_recall_macro": 0.5374548506940254, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.589, + "eval_steps_per_second": 3.899, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 780 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.8372991661582265, + "eval_auc": 0.5843234707368185, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.16883116883116883, + "eval_f1_macro": 0.5393273806169032, + "eval_loss": 0.46770623326301575, + "eval_pr_auc": 0.18537280435724188, + "eval_precision": 0.43391188251001334, + "eval_precision_macro": 0.6435905413924347, + "eval_pred_class_0": 18919, + "eval_pred_class_1": 749, + "eval_predicted_binding_ratio": 0.038082163921090095, + "eval_recall": 0.10480490164463076, + "eval_recall_macro": 0.5396059276135269, + "eval_runtime": 0.262, + "eval_samples_per_second": 622.108, + "eval_steps_per_second": 3.817, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 806 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.8421293471629042, + "eval_auc": 0.6080693861773249, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.16012983500135244, + "eval_f1_macro": 0.5365030891665479, + "eval_loss": 0.44841739535331726, + "eval_pr_auc": 0.1997259509611161, + "eval_precision": 0.4966442953020134, + "eval_precision_macro": 0.6747850251677853, + "eval_pred_class_0": 19072, + "eval_pred_class_1": 596, + "eval_predicted_binding_ratio": 0.030303030303030304, + "eval_recall": 0.09545307965172525, + "eval_recall_macro": 0.5386723960460594, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.915, + "eval_steps_per_second": 3.852, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 832 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.8450782997762863, + "eval_auc": 0.6341019717128032, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.16497670594683475, + "eval_f1_macro": 0.5397977373430758, + "eval_loss": 0.4312308728694916, + "eval_pr_auc": 0.2181951536640109, + "eval_precision": 0.5492700729927007, + "eval_precision_macro": 0.7014132791741746, + "eval_pred_class_0": 19120, + "eval_pred_class_1": 548, + "eval_predicted_binding_ratio": 0.0278625177954037, + "eval_recall": 0.09706546275395034, + "eval_recall_macro": 0.5410781529982704, + "eval_runtime": 0.2519, + "eval_samples_per_second": 647.04, + "eval_steps_per_second": 3.97, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 858 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.8478238763473663, + "eval_auc": 0.6614343616815009, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.17932547299149987, + "eval_f1_macro": 0.5477310488609045, + "eval_loss": 0.41584137082099915, + "eval_pr_auc": 0.24110190314317137, + "eval_precision": 0.5989010989010989, + "eval_precision_macro": 0.7269162957114008, + "eval_pred_class_0": 19122, + "eval_pred_class_1": 546, + "eval_predicted_binding_ratio": 0.027760829774252593, + "eval_recall": 0.1054498548855208, + "eval_recall_macro": 0.546115402483504, + "eval_runtime": 0.2547, + "eval_samples_per_second": 640.016, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 884 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.8509253609924751, + "eval_auc": 0.6891114086357669, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.20585048754062837, + "eval_f1_macro": 0.5617963020129356, + "eval_loss": 0.4015716016292572, + "eval_pr_auc": 0.2683830744239665, + "eval_precision": 0.6429780033840947, + "eval_precision_macro": 0.7501727569994856, + "eval_pred_class_0": 19077, + "eval_pred_class_1": 591, + "eval_predicted_binding_ratio": 0.030048810250152533, + "eval_recall": 0.12254111576910674, + "eval_recall_macro": 0.5549024767594251, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.872, + "eval_steps_per_second": 4.005, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 910 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.8537217815741306, + "eval_auc": 0.7168296727231165, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2366675510745556, + "eval_f1_macro": 0.5778889812054533, + "eval_loss": 0.3880736827850342, + "eval_pr_auc": 0.2994175694348318, + "eval_precision": 0.6676646706586826, + "eval_precision_macro": 0.7639639142767097, + "eval_pred_class_0": 19000, + "eval_pred_class_1": 668, + "eval_predicted_binding_ratio": 0.0339637990644702, + "eval_recall": 0.1438245727184779, + "eval_recall_macro": 0.5652122199621845, + "eval_runtime": 0.2693, + "eval_samples_per_second": 605.227, + "eval_steps_per_second": 3.713, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 936 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.8565690461663616, + "eval_auc": 0.743181046261935, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.2768520892078954, + "eval_f1_macro": 0.59862076733571, + "eval_loss": 0.37574923038482666, + "eval_pr_auc": 0.33279138215623166, + "eval_precision": 0.675, + "eval_precision_macro": 0.7696337714649142, + "eval_pred_class_0": 18868, + "eval_pred_class_1": 800, + "eval_predicted_binding_ratio": 0.04067520846044336, + "eval_recall": 0.17413737504030957, + "eval_recall_macro": 0.5792217629109919, + "eval_runtime": 0.2774, + "eval_samples_per_second": 587.664, + "eval_steps_per_second": 3.605, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 962 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.8596705308114704, + "eval_auc": 0.7678773986205973, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.3205317577548006, + "eval_f1_macro": 0.6211435791665652, + "eval_loss": 0.36427780985832214, + "eval_pr_auc": 0.3671211589285648, + "eval_precision": 0.6774193548387096, + "eval_precision_macro": 0.7732261685723991, + "eval_pred_class_0": 18707, + "eval_pred_class_1": 961, + "eval_predicted_binding_ratio": 0.048861094163107584, + "eval_recall": 0.20993227990970656, + "eval_recall_macro": 0.5956101913823899, + "eval_runtime": 0.2611, + "eval_samples_per_second": 624.252, + "eval_steps_per_second": 3.83, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 988 + }, + { + "epoch": 38.46153846153846, + "grad_norm": 35024.03515625, + "learning_rate": 7.684615384615384e-07, + "loss": 0.5725, + "step": 1000 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.8642464917632703, + "eval_auc": 0.7904617013805764, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.3723554301833568, + "eval_f1_macro": 0.6481240123381296, + "eval_loss": 0.35397008061408997, + "eval_pr_auc": 0.40223746916130343, + "eval_precision": 0.6869037294015612, + "eval_precision_macro": 0.7810970172797707, + "eval_pred_class_0": 18515, + "eval_pred_class_1": 1153, + "eval_predicted_binding_ratio": 0.058623144193613995, + "eval_recall": 0.25540148339245405, + "eval_recall_macro": 0.6168055886811972, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.77, + "eval_steps_per_second": 3.888, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1014 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.867246288387228, + "eval_auc": 0.8102996097248453, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.4125984251968504, + "eval_f1_macro": 0.6688826868467987, + "eval_loss": 0.3446972072124481, + "eval_pr_auc": 0.43559149314237056, + "eval_precision": 0.6822916666666666, + "eval_precision_macro": 0.7815518582187295, + "eval_pred_class_0": 18324, + "eval_pred_class_1": 1344, + "eval_predicted_binding_ratio": 0.06833435021354485, + "eval_recall": 0.29571106094808125, + "eval_recall_macro": 0.634968465827454, + "eval_runtime": 0.2532, + "eval_samples_per_second": 643.748, + "eval_steps_per_second": 3.949, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1040 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.8715171852755745, + "eval_auc": 0.8272611461298317, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.4583065380493033, + "eval_f1_macro": 0.6927107089600444, + "eval_loss": 0.33654505014419556, + "eval_pr_auc": 0.4645782536288223, + "eval_precision": 0.6835038363171355, + "eval_precision_macro": 0.7856317237263981, + "eval_pred_class_0": 18104, + "eval_pred_class_1": 1564, + "eval_predicted_binding_ratio": 0.07952003254016676, + "eval_recall": 0.344727507255724, + "eval_recall_macro": 0.6574244163911867, + "eval_runtime": 0.2602, + "eval_samples_per_second": 626.484, + "eval_steps_per_second": 3.843, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1066 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.8743136058572301, + "eval_auc": 0.8416796876148132, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.4898885678910442, + "eval_f1_macro": 0.7091078096051335, + "eval_loss": 0.329649955034256, + "eval_pr_auc": 0.49067495219464874, + "eval_precision": 0.6802292263610316, + "eval_precision_macro": 0.7867195342316791, + "eval_pred_class_0": 17923, + "eval_pred_class_1": 1745, + "eval_predicted_binding_ratio": 0.08872279845434208, + "eval_recall": 0.38277974846823604, + "eval_recall_macro": 0.674549166803684, + "eval_runtime": 0.2641, + "eval_samples_per_second": 617.276, + "eval_steps_per_second": 3.787, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1092 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.8758897701850722, + "eval_auc": 0.8534597097025247, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5176842521240862, + "eval_f1_macro": 0.7232330815689724, + "eval_loss": 0.32387641072273254, + "eval_pr_auc": 0.5115876936649595, + "eval_precision": 0.6683673469387755, + "eval_precision_macro": 0.7836133097919538, + "eval_pred_class_0": 17708, + "eval_pred_class_1": 1960, + "eval_predicted_binding_ratio": 0.09965426072808622, + "eval_recall": 0.42244437278297325, + "eval_recall_macro": 0.6916048748685797, + "eval_runtime": 0.2631, + "eval_samples_per_second": 619.429, + "eval_steps_per_second": 3.8, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1118 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.878991254830181, + "eval_auc": 0.863260959032272, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5468392993145469, + "eval_f1_macro": 0.7385059071387897, + "eval_loss": 0.3189404308795929, + "eval_pr_auc": 0.5291286431025274, + "eval_precision": 0.6675964667596467, + "eval_precision_macro": 0.7862729722049646, + "eval_pred_class_0": 17517, + "eval_pred_class_1": 2151, + "eval_predicted_binding_ratio": 0.10936546674801709, + "eval_recall": 0.4630764269590455, + "eval_recall_macro": 0.7099591708043251, + "eval_runtime": 0.2583, + "eval_samples_per_second": 630.993, + "eval_steps_per_second": 3.871, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1144 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.8797030709782387, + "eval_auc": 0.8710211865407248, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5603864734299517, + "eval_f1_macro": 0.7453519808982827, + "eval_loss": 0.3149340748786926, + "eval_pr_auc": 0.5420378923897758, + "eval_precision": 0.6611135466900482, + "eval_precision_macro": 0.7847466853482449, + "eval_pred_class_0": 17387, + "eval_pred_class_1": 2281, + "eval_predicted_binding_ratio": 0.11597518812283913, + "eval_recall": 0.48629474363108677, + "eval_recall_macro": 0.719817861342917, + "eval_runtime": 0.2505, + "eval_samples_per_second": 650.753, + "eval_steps_per_second": 3.992, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1170 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.8811775472849298, + "eval_auc": 0.8772876506442417, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5748590140076405, + "eval_f1_macro": 0.7528983447354317, + "eval_loss": 0.3115498721599579, + "eval_pr_auc": 0.5526462799402374, + "eval_precision": 0.659432387312187, + "eval_precision_macro": 0.7856853923591968, + "eval_pred_class_0": 17272, + "eval_pred_class_1": 2396, + "eval_predicted_binding_ratio": 0.12182224933902787, + "eval_recall": 0.509513060303128, + "eval_recall_macro": 0.7301292590704993, + "eval_runtime": 0.247, + "eval_samples_per_second": 659.808, + "eval_steps_per_second": 4.048, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1196 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.8817368314012609, + "eval_auc": 0.8824923380415335, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5840486409155937, + "eval_f1_macro": 0.7575589340187262, + "eval_loss": 0.3087034523487091, + "eval_pr_auc": 0.5616002050007283, + "eval_precision": 0.6555600160578081, + "eval_precision_macro": 0.7850484483851945, + "eval_pred_class_0": 17177, + "eval_pred_class_1": 2491, + "eval_predicted_binding_ratio": 0.12665243034370552, + "eval_recall": 0.526604321186714, + "eval_recall_macro": 0.7374073093831197, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.602, + "eval_steps_per_second": 3.924, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1222 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.8833638397396787, + "eval_auc": 0.8867719903429474, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.5951288386869044, + "eval_f1_macro": 0.7634984852775182, + "eval_loss": 0.30617523193359375, + "eval_pr_auc": 0.5687331552143856, + "eval_precision": 0.6573099415204678, + "eval_precision_macro": 0.7872879591248483, + "eval_pred_class_0": 17103, + "eval_pred_class_1": 2565, + "eval_predicted_binding_ratio": 0.13041488712629654, + "eval_recall": 0.5436955820702999, + "eval_recall_macro": 0.7453191497603264, + "eval_runtime": 0.2573, + "eval_samples_per_second": 633.489, + "eval_steps_per_second": 3.886, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1248 + }, + { + "epoch": 49.0, + "eval_accuracy": 0.8840248118771609, + "eval_auc": 0.8901050792607902, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6022667829119442, + "eval_f1_macro": 0.7671909492667516, + "eval_loss": 0.3041446805000305, + "eval_pr_auc": 0.5742293420515451, + "eval_precision": 0.6556567957479119, + "eval_precision_macro": 0.7874972953730754, + "eval_pred_class_0": 17034, + "eval_pred_class_1": 2634, + "eval_predicted_binding_ratio": 0.13392312385600977, + "eval_recall": 0.5569171235085456, + "eval_recall_macro": 0.751084867060001, + "eval_runtime": 0.2527, + "eval_samples_per_second": 644.934, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1274 + }, + { + "epoch": 50.0, + "eval_accuracy": 0.8846349400040675, + "eval_auc": 0.8931467576948593, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6055970797844603, + "eval_f1_macro": 0.7690165668701654, + "eval_loss": 0.30221912264823914, + "eval_pr_auc": 0.5797467982851593, + "eval_precision": 0.6568627450980392, + "eval_precision_macro": 0.7884983683177079, + "eval_pred_class_0": 17016, + "eval_pred_class_1": 2652, + "eval_predicted_binding_ratio": 0.13483831604636973, + "eval_recall": 0.561754272815221, + "eval_recall_macro": 0.7534129002755408, + "eval_runtime": 0.2505, + "eval_samples_per_second": 650.805, + "eval_steps_per_second": 3.993, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1300 + }, + { + "epoch": 51.0, + "eval_accuracy": 0.8854484441732764, + "eval_auc": 0.8956789398085232, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6095997227516895, + "eval_f1_macro": 0.7712381155096151, + "eval_loss": 0.30062082409858704, + "eval_pr_auc": 0.5844826815319759, + "eval_precision": 0.6588014981273408, + "eval_precision_macro": 0.7899255166833903, + "eval_pred_class_0": 16998, + "eval_pred_class_1": 2670, + "eval_predicted_binding_ratio": 0.13575350823672971, + "eval_recall": 0.5672363753627861, + "eval_recall_macro": 0.7561237710700572, + "eval_runtime": 0.2579, + "eval_samples_per_second": 631.92, + "eval_steps_per_second": 3.877, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1326 + }, + { + "epoch": 52.0, + "eval_accuracy": 0.8860077282896075, + "eval_auc": 0.8977014114868052, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6157010627356874, + "eval_f1_macro": 0.774389842453749, + "eval_loss": 0.2989857792854309, + "eval_pr_auc": 0.5879586440077966, + "eval_precision": 0.6571533113794366, + "eval_precision_macro": 0.7900469834133675, + "eval_pred_class_0": 16935, + "eval_pred_class_1": 2733, + "eval_predicted_binding_ratio": 0.13895668090298963, + "eval_recall": 0.5791680103192518, + "eval_recall_macro": 0.7613048960873738, + "eval_runtime": 0.2404, + "eval_samples_per_second": 677.963, + "eval_steps_per_second": 4.159, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1352 + }, + { + "epoch": 53.0, + "eval_accuracy": 0.8864144803742119, + "eval_auc": 0.8993954307902827, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6165465156196361, + "eval_f1_macro": 0.7749399244764847, + "eval_loss": 0.29775407910346985, + "eval_pr_auc": 0.5914083972949268, + "eval_precision": 0.6590825688073394, + "eval_precision_macro": 0.7910298047365505, + "eval_pred_class_0": 16943, + "eval_pred_class_1": 2725, + "eval_predicted_binding_ratio": 0.13854992881838518, + "eval_recall": 0.5791680103192518, + "eval_recall_macro": 0.7615463399215019, + "eval_runtime": 0.2547, + "eval_samples_per_second": 639.94, + "eval_steps_per_second": 3.926, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1378 + }, + { + "epoch": 54.0, + "eval_accuracy": 0.8866178564165141, + "eval_auc": 0.9007296980023092, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6186730506155951, + "eval_f1_macro": 0.7760410164688105, + "eval_loss": 0.296587198972702, + "eval_pr_auc": 0.59415963293408, + "eval_precision": 0.6585365853658537, + "eval_precision_macro": 0.7910908800004612, + "eval_pred_class_0": 16921, + "eval_pred_class_1": 2747, + "eval_predicted_binding_ratio": 0.1396684970510474, + "eval_recall": 0.583360206385037, + "eval_recall_macro": 0.7633708136410005, + "eval_runtime": 0.2605, + "eval_samples_per_second": 625.694, + "eval_steps_per_second": 3.839, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1404 + }, + { + "epoch": 55.0, + "eval_accuracy": 0.8872279845434208, + "eval_auc": 0.9019074471661075, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6215017064846416, + "eval_f1_macro": 0.7776226419864958, + "eval_loss": 0.2955063581466675, + "eval_pr_auc": 0.5967231989416606, + "eval_precision": 0.6600217470097861, + "eval_precision_macro": 0.7921612076464745, + "eval_pred_class_0": 16909, + "eval_pred_class_1": 2759, + "eval_predicted_binding_ratio": 0.14027862517795403, + "eval_recall": 0.5872299258303773, + "eval_recall_macro": 0.7653056733636705, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.741, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1430 + }, + { + "epoch": 56.0, + "eval_accuracy": 0.8881431767337807, + "eval_auc": 0.9030401348597343, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6235455167693361, + "eval_f1_macro": 0.778929094226063, + "eval_loss": 0.29450690746307373, + "eval_pr_auc": 0.5995879576354929, + "eval_precision": 0.6642362376959533, + "eval_precision_macro": 0.7943337761596458, + "eval_pred_class_0": 16925, + "eval_pred_class_1": 2743, + "eval_predicted_binding_ratio": 0.13946512100874517, + "eval_recall": 0.5875524024508223, + "eval_recall_macro": 0.7659799798214153, + "eval_runtime": 0.2658, + "eval_samples_per_second": 613.135, + "eval_steps_per_second": 3.762, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1456 + }, + { + "epoch": 57.0, + "eval_accuracy": 0.8882957087655075, + "eval_auc": 0.9040587382005859, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6243802359377671, + "eval_f1_macro": 0.7793863433697854, + "eval_loss": 0.2936408519744873, + "eval_pr_auc": 0.6024898616264603, + "eval_precision": 0.6644832605531296, + "eval_precision_macro": 0.7945643253120258, + "eval_pred_class_0": 16920, + "eval_pred_class_1": 2748, + "eval_predicted_binding_ratio": 0.13971934106162295, + "eval_recall": 0.5888423089326024, + "eval_recall_macro": 0.7665947525830392, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.369, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1482 + }, + { + "epoch": 57.69230769230769, + "grad_norm": 15613.5302734375, + "learning_rate": 9.992863736980368e-07, + "loss": 0.3115, + "step": 1500 + }, + { + "epoch": 58.0, + "eval_accuracy": 0.887888956680903, + "eval_auc": 0.9048886089216611, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6246808510638298, + "eval_f1_macro": 0.7793916194591735, + "eval_loss": 0.29281434416770935, + "eval_pr_auc": 0.603713292882509, + "eval_precision": 0.6614996395097332, + "eval_precision_macro": 0.7932808958765667, + "eval_pred_class_0": 16894, + "eval_pred_class_1": 2774, + "eval_predicted_binding_ratio": 0.14104128533658736, + "eval_recall": 0.5917445985166075, + "eval_recall_macro": 0.7675328292275196, + "eval_runtime": 0.2518, + "eval_samples_per_second": 647.428, + "eval_steps_per_second": 3.972, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1508 + }, + { + "epoch": 59.0, + "eval_accuracy": 0.887888956680903, + "eval_auc": 0.9056672866982218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6251912289648138, + "eval_f1_macro": 0.7796389289833485, + "eval_loss": 0.2920655906200409, + "eval_pr_auc": 0.6054694565410179, + "eval_precision": 0.6610352264557872, + "eval_precision_macro": 0.7931493791878604, + "eval_pred_class_0": 16886, + "eval_pred_class_1": 2782, + "eval_predicted_binding_ratio": 0.14144803742119177, + "eval_recall": 0.5930345049983876, + "eval_recall_macro": 0.7680570605513457, + "eval_runtime": 0.235, + "eval_samples_per_second": 693.644, + "eval_steps_per_second": 4.255, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1534 + }, + { + "epoch": 60.0, + "eval_accuracy": 0.8882957087655075, + "eval_auc": 0.9063294664622661, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6254049445865303, + "eval_f1_macro": 0.7798830166450921, + "eval_loss": 0.29136955738067627, + "eval_pr_auc": 0.6071731602747702, + "eval_precision": 0.6635311143270622, + "eval_precision_macro": 0.7942892202018652, + "eval_pred_class_0": 16904, + "eval_pred_class_1": 2764, + "eval_predicted_binding_ratio": 0.14053284523083182, + "eval_recall": 0.5914221218961625, + "eval_recall_macro": 0.7676432152306912, + "eval_runtime": 0.2576, + "eval_samples_per_second": 632.702, + "eval_steps_per_second": 3.882, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1560 + }, + { + "epoch": 61.0, + "eval_accuracy": 0.8886007728289608, + "eval_auc": 0.9070085321120007, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6269368295589988, + "eval_f1_macro": 0.7807307642400976, + "eval_loss": 0.290680855512619, + "eval_pr_auc": 0.6088679721523397, + "eval_precision": 0.6641414141414141, + "eval_precision_macro": 0.7947837752525253, + "eval_pred_class_0": 16896, + "eval_pred_class_1": 2772, + "eval_predicted_binding_ratio": 0.14093959731543623, + "eval_recall": 0.5936794582392777, + "eval_recall_macro": 0.7687417029229828, + "eval_runtime": 0.2486, + "eval_samples_per_second": 655.642, + "eval_steps_per_second": 4.022, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1586 + }, + { + "epoch": 62.0, + "eval_accuracy": 0.8887533048606874, + "eval_auc": 0.9076136113046634, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6273841961852861, + "eval_f1_macro": 0.7810002501366307, + "eval_loss": 0.29004454612731934, + "eval_pr_auc": 0.6100991712198425, + "eval_precision": 0.664741970407795, + "eval_precision_macro": 0.7951158511564335, + "eval_pred_class_0": 16897, + "eval_pred_class_1": 2771, + "eval_predicted_binding_ratio": 0.1408887533048607, + "eval_recall": 0.5940019348597226, + "eval_recall_macro": 0.7689633021917374, + "eval_runtime": 0.2604, + "eval_samples_per_second": 625.871, + "eval_steps_per_second": 3.84, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1612 + }, + { + "epoch": 63.0, + "eval_accuracy": 0.8888041488712629, + "eval_auc": 0.9081136281710841, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6286296484971982, + "eval_f1_macro": 0.781621309135136, + "eval_loss": 0.28950682282447815, + "eval_pr_auc": 0.6111782063777282, + "eval_precision": 0.6639167862266858, + "eval_precision_macro": 0.794932326762632, + "eval_pred_class_0": 16880, + "eval_pred_class_1": 2788, + "eval_predicted_binding_ratio": 0.14175310148464512, + "eval_recall": 0.5969042244437278, + "eval_recall_macro": 0.7701730031496119, + "eval_runtime": 0.2489, + "eval_samples_per_second": 654.907, + "eval_steps_per_second": 4.018, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1638 + }, + { + "epoch": 64.0, + "eval_accuracy": 0.889363432987594, + "eval_auc": 0.9086206913667498, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6299319727891156, + "eval_f1_macro": 0.7824456611913058, + "eval_loss": 0.2888965606689453, + "eval_pr_auc": 0.6126297306007413, + "eval_precision": 0.6664267722202231, + "eval_precision_macro": 0.7962366556938643, + "eval_pred_class_0": 16889, + "eval_pred_class_1": 2779, + "eval_predicted_binding_ratio": 0.14129550538946511, + "eval_recall": 0.5972267010641729, + "eval_recall_macro": 0.7706360462524945, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.362, + "eval_steps_per_second": 3.935, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1664 + }, + { + "epoch": 65.0, + "eval_accuracy": 0.8900244051250763, + "eval_auc": 0.9091278518874051, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6305721605465414, + "eval_f1_macro": 0.782984177701663, + "eval_loss": 0.2884848117828369, + "eval_pr_auc": 0.6142560104629078, + "eval_precision": 0.6702977487291213, + "eval_precision_macro": 0.7980494301171916, + "eval_pred_class_0": 16914, + "eval_pred_class_1": 2754, + "eval_predicted_binding_ratio": 0.14002440512507627, + "eval_recall": 0.5952918413415027, + "eval_recall_macro": 0.7702420454972136, + "eval_runtime": 0.265, + "eval_samples_per_second": 614.992, + "eval_steps_per_second": 3.773, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1690 + }, + { + "epoch": 66.0, + "eval_accuracy": 0.8905836892414074, + "eval_auc": 0.9094545718773954, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6327645051194539, + "eval_f1_macro": 0.78423982216183, + "eval_loss": 0.2880232632160187, + "eval_pr_auc": 0.6147358252333397, + "eval_precision": 0.6719826023921711, + "eval_precision_macro": 0.7991174470355793, + "eval_pred_class_0": 16909, + "eval_pred_class_1": 2759, + "eval_predicted_binding_ratio": 0.14027862517795403, + "eval_recall": 0.5978716543050628, + "eval_recall_macro": 0.7716224934167917, + "eval_runtime": 0.2505, + "eval_samples_per_second": 650.665, + "eval_steps_per_second": 3.992, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1716 + }, + { + "epoch": 67.0, + "eval_accuracy": 0.8908887533048607, + "eval_auc": 0.9099424231201196, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6360244233378561, + "eval_f1_macro": 0.7859248910947654, + "eval_loss": 0.28752708435058594, + "eval_pr_auc": 0.6159928290925853, + "eval_precision": 0.6708407871198568, + "eval_precision_macro": 0.7990901618287602, + "eval_pred_class_0": 16873, + "eval_pred_class_1": 2795, + "eval_predicted_binding_ratio": 0.14210900955867398, + "eval_recall": 0.6046436633344082, + "eval_recall_macro": 0.7745557907424743, + "eval_runtime": 0.2471, + "eval_samples_per_second": 659.527, + "eval_steps_per_second": 4.046, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1742 + }, + { + "epoch": 68.0, + "eval_accuracy": 0.8910412853365873, + "eval_auc": 0.910346516476819, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6362247496180614, + "eval_f1_macro": 0.7860747010162366, + "eval_loss": 0.2870826721191406, + "eval_pr_auc": 0.6168347475575285, + "eval_precision": 0.6716845878136201, + "eval_precision_macro": 0.7994932004123201, + "eval_pred_class_0": 16878, + "eval_pred_class_1": 2790, + "eval_predicted_binding_ratio": 0.14185478950579622, + "eval_recall": 0.6043211867139633, + "eval_recall_macro": 0.7745152743493158, + "eval_runtime": 0.266, + "eval_samples_per_second": 612.866, + "eval_steps_per_second": 3.76, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1768 + }, + { + "epoch": 69.0, + "eval_accuracy": 0.8910921293471629, + "eval_auc": 0.9107640601470772, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6352179836512262, + "eval_f1_macro": 0.7856044496310159, + "eval_loss": 0.28665614128112793, + "eval_pr_auc": 0.6181373929491851, + "eval_precision": 0.673042223024179, + "eval_precision_macro": 0.7999465716529429, + "eval_pred_class_0": 16897, + "eval_pred_class_1": 2771, + "eval_predicted_binding_ratio": 0.1408887533048607, + "eval_recall": 0.6014188971299581, + "eval_recall_macro": 0.7733659343499732, + "eval_runtime": 0.2549, + "eval_samples_per_second": 639.359, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1794 + }, + { + "epoch": 70.0, + "eval_accuracy": 0.8910412853365873, + "eval_auc": 0.9111344401273891, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6354822248681748, + "eval_f1_macro": 0.7857149295725039, + "eval_loss": 0.28624698519706726, + "eval_pr_auc": 0.6190938884927122, + "eval_precision": 0.6724262059035278, + "eval_precision_macro": 0.7997122148522967, + "eval_pred_class_0": 16890, + "eval_pred_class_1": 2778, + "eval_predicted_binding_ratio": 0.14124466137888958, + "eval_recall": 0.6023863269912931, + "eval_recall_macro": 0.7737289273635768, + "eval_runtime": 0.2673, + "eval_samples_per_second": 609.866, + "eval_steps_per_second": 3.742, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1820 + }, + { + "epoch": 71.0, + "eval_accuracy": 0.891193817368314, + "eval_auc": 0.9114138601724477, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.63642541624193, + "eval_f1_macro": 0.7862246662674524, + "eval_loss": 0.2858646512031555, + "eval_pr_auc": 0.6197061363545492, + "eval_precision": 0.6725314183123878, + "eval_precision_macro": 0.7998977650704271, + "eval_pred_class_0": 16883, + "eval_pred_class_1": 2785, + "eval_predicted_binding_ratio": 0.14160056945291843, + "eval_recall": 0.6039987100935182, + "eval_recall_macro": 0.7744747579561573, + "eval_runtime": 0.2386, + "eval_samples_per_second": 683.037, + "eval_steps_per_second": 4.19, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1846 + }, + { + "epoch": 72.0, + "eval_accuracy": 0.8913971934106162, + "eval_auc": 0.9118766988928523, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6373514431239389, + "eval_f1_macro": 0.7867436519572335, + "eval_loss": 0.2853938341140747, + "eval_pr_auc": 0.6212208808374569, + "eval_precision": 0.6730010756543564, + "eval_precision_macro": 0.8002424656665053, + "eval_pred_class_0": 16879, + "eval_pred_class_1": 2789, + "eval_predicted_binding_ratio": 0.14180394549522066, + "eval_recall": 0.6052886165752983, + "eval_recall_macro": 0.7751197111970474, + "eval_runtime": 0.249, + "eval_samples_per_second": 654.683, + "eval_steps_per_second": 4.016, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1872 + }, + { + "epoch": 73.0, + "eval_accuracy": 0.891193817368314, + "eval_auc": 0.9121406831945651, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.637411047102677, + "eval_f1_macro": 0.7867021736679862, + "eval_loss": 0.2850610911846161, + "eval_pr_auc": 0.6219405042066507, + "eval_precision": 0.6715458764726884, + "eval_precision_macro": 0.79960764506032, + "eval_pred_class_0": 16867, + "eval_pred_class_1": 2801, + "eval_predicted_binding_ratio": 0.14241407362212732, + "eval_recall": 0.6065785230570784, + "eval_recall_macro": 0.7755232206038093, + "eval_runtime": 0.2633, + "eval_samples_per_second": 618.95, + "eval_steps_per_second": 3.797, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1898 + }, + { + "epoch": 74.0, + "eval_accuracy": 0.8912446613788896, + "eval_auc": 0.9124869655074592, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.637026981164093, + "eval_f1_macro": 0.7865337040796394, + "eval_loss": 0.284681111574173, + "eval_pr_auc": 0.6229948438184316, + "eval_precision": 0.6722779369627507, + "eval_precision_macro": 0.7998744508231626, + "eval_pred_class_0": 16876, + "eval_pred_class_1": 2792, + "eval_predicted_binding_ratio": 0.14195647752694732, + "eval_recall": 0.6052886165752983, + "eval_recall_macro": 0.7750291697592493, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.745, + "eval_steps_per_second": 3.925, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1924 + }, + { + "epoch": 75.0, + "eval_accuracy": 0.8913971934106162, + "eval_auc": 0.9128360118500571, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6385786802030456, + "eval_f1_macro": 0.7873381643700563, + "eval_loss": 0.2842992842197418, + "eval_pr_auc": 0.6238239183047751, + "eval_precision": 0.6717693129227483, + "eval_precision_macro": 0.7998801484834395, + "eval_pred_class_0": 16859, + "eval_pred_class_1": 2809, + "eval_predicted_binding_ratio": 0.14282082570673174, + "eval_recall": 0.6085133827797484, + "eval_recall_macro": 0.7764302895066123, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.282, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1950 + }, + { + "epoch": 76.0, + "eval_accuracy": 0.8913971934106162, + "eval_auc": 0.9131794425407568, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.638700947225981, + "eval_f1_macro": 0.7873973860112672, + "eval_loss": 0.2839708924293518, + "eval_pr_auc": 0.6248797725776689, + "eval_precision": 0.671647100675916, + "eval_precision_macro": 0.7998444318708524, + "eval_pred_class_0": 16857, + "eval_pred_class_1": 2811, + "eval_predicted_binding_ratio": 0.14292251372788287, + "eval_recall": 0.6088358594001935, + "eval_recall_macro": 0.7765613473375688, + "eval_runtime": 0.2644, + "eval_samples_per_second": 616.603, + "eval_steps_per_second": 3.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 1976 + }, + { + "epoch": 76.92307692307692, + "grad_norm": 18483.060546875, + "learning_rate": 9.912189372587507e-07, + "loss": 0.2796, + "step": 2000 + }, + { + "epoch": 77.0, + "eval_accuracy": 0.891651413463494, + "eval_auc": 0.9134005357195656, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6389971201084195, + "eval_f1_macro": 0.7876288504858192, + "eval_loss": 0.28368592262268066, + "eval_pr_auc": 0.6256253637409228, + "eval_precision": 0.6730906495360457, + "eval_precision_macro": 0.8005261145225586, + "eval_pred_class_0": 16866, + "eval_pred_class_1": 2802, + "eval_predicted_binding_ratio": 0.14246491763270286, + "eval_recall": 0.6081909061593035, + "eval_recall_macro": 0.7764501340719858, + "eval_runtime": 0.2557, + "eval_samples_per_second": 637.408, + "eval_steps_per_second": 3.91, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2002 + }, + { + "epoch": 78.0, + "eval_accuracy": 0.8918547895057962, + "eval_auc": 0.9135784263355038, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6399187404773997, + "eval_f1_macro": 0.788145675542478, + "eval_loss": 0.283357173204422, + "eval_pr_auc": 0.6259773419133142, + "eval_precision": 0.6735566642908054, + "eval_precision_macro": 0.8008691873227245, + "eval_pred_class_0": 16862, + "eval_pred_class_1": 2806, + "eval_predicted_binding_ratio": 0.14266829367500508, + "eval_recall": 0.6094808126410836, + "eval_recall_macro": 0.7770950873128759, + "eval_runtime": 0.2408, + "eval_samples_per_second": 676.804, + "eval_steps_per_second": 4.152, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2028 + }, + { + "epoch": 79.0, + "eval_accuracy": 0.8920073215375229, + "eval_auc": 0.9139080660751812, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6396335256192739, + "eval_f1_macro": 0.788060288914535, + "eval_loss": 0.28298139572143555, + "eval_pr_auc": 0.6270064177031266, + "eval_precision": 0.6749015395631937, + "eval_precision_macro": 0.8014211401519672, + "eval_pred_class_0": 16875, + "eval_pred_class_1": 2793, + "eval_predicted_binding_ratio": 0.14200732153752288, + "eval_recall": 0.6078684295388584, + "eval_recall_macro": 0.7765303395958915, + "eval_runtime": 0.255, + "eval_samples_per_second": 639.262, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2054 + }, + { + "epoch": 80.0, + "eval_accuracy": 0.8923123856009763, + "eval_auc": 0.9140997087121456, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6410169491525424, + "eval_f1_macro": 0.7888360257187523, + "eval_loss": 0.28268861770629883, + "eval_pr_auc": 0.6271691682976167, + "eval_precision": 0.6755984280100036, + "eval_precision_macro": 0.8019346102940528, + "eval_pred_class_0": 16869, + "eval_pred_class_1": 2799, + "eval_predicted_binding_ratio": 0.1423123856009762, + "eval_recall": 0.6098032892615285, + "eval_recall_macro": 0.7774977694572265, + "eval_runtime": 0.2121, + "eval_samples_per_second": 768.576, + "eval_steps_per_second": 4.715, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2080 + }, + { + "epoch": 81.0, + "eval_accuracy": 0.8924140736221273, + "eval_auc": 0.9143275951752264, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6415989159891599, + "eval_f1_macro": 0.7891531311221224, + "eval_loss": 0.28239867091178894, + "eval_pr_auc": 0.6278526459152028, + "eval_precision": 0.6757046022119158, + "eval_precision_macro": 0.8020681327098713, + "eval_pred_class_0": 16865, + "eval_pred_class_1": 2803, + "eval_predicted_binding_ratio": 0.14251576164327842, + "eval_recall": 0.6107707191228636, + "eval_recall_macro": 0.7779513039086281, + "eval_runtime": 0.2642, + "eval_samples_per_second": 617.069, + "eval_steps_per_second": 3.786, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2106 + }, + { + "epoch": 82.0, + "eval_accuracy": 0.8925666056538539, + "eval_auc": 0.9145931950717662, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.641317263622475, + "eval_f1_macro": 0.7890694555517069, + "eval_loss": 0.2821619510650635, + "eval_pr_auc": 0.6287354538303637, + "eval_precision": 0.6770609318996416, + "eval_precision_macro": 0.8026257379014738, + "eval_pred_class_0": 16878, + "eval_pred_class_1": 2790, + "eval_predicted_binding_ratio": 0.14185478950579622, + "eval_recall": 0.6091583360206385, + "eval_recall_macro": 0.7773865561916435, + "eval_runtime": 0.2651, + "eval_samples_per_second": 614.835, + "eval_steps_per_second": 3.772, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2132 + }, + { + "epoch": 83.0, + "eval_accuracy": 0.8929733577384584, + "eval_auc": 0.9148203126674294, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6442453946256549, + "eval_f1_macro": 0.7906286370477088, + "eval_loss": 0.2817782461643219, + "eval_pr_auc": 0.6293872239214393, + "eval_precision": 0.6768465909090909, + "eval_precision_macro": 0.8029675631972466, + "eval_pred_class_0": 16852, + "eval_pred_class_1": 2816, + "eval_predicted_binding_ratio": 0.14317673378076062, + "eval_recall": 0.6146404385682038, + "eval_recall_macro": 0.7798559831520322, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.502, + "eval_steps_per_second": 3.88, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2158 + }, + { + "epoch": 84.0, + "eval_accuracy": 0.8929225137278829, + "eval_auc": 0.9150136584917113, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6435341909275558, + "eval_f1_macro": 0.7902665569930348, + "eval_loss": 0.2815438210964203, + "eval_pr_auc": 0.6300492382313454, + "eval_precision": 0.677235482721767, + "eval_precision_macro": 0.8030326633702543, + "eval_pred_class_0": 16861, + "eval_pred_class_1": 2807, + "eval_predicted_binding_ratio": 0.14271913768558064, + "eval_recall": 0.6130280554659787, + "eval_recall_macro": 0.7791705135179836, + "eval_runtime": 0.2597, + "eval_samples_per_second": 627.685, + "eval_steps_per_second": 3.851, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2184 + }, + { + "epoch": 85.0, + "eval_accuracy": 0.8928716697173072, + "eval_auc": 0.9151760160393141, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6437869822485207, + "eval_f1_macro": 0.7903713942390684, + "eval_loss": 0.28126296401023865, + "eval_pr_auc": 0.6304146488380505, + "eval_precision": 0.6766169154228856, + "eval_precision_macro": 0.8027975997548746, + "eval_pred_class_0": 16854, + "eval_pred_class_1": 2814, + "eval_predicted_binding_ratio": 0.14307504575960953, + "eval_recall": 0.6139954853273137, + "eval_recall_macro": 0.7795335065315872, + "eval_runtime": 0.2553, + "eval_samples_per_second": 638.417, + "eval_steps_per_second": 3.917, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2210 + }, + { + "epoch": 86.0, + "eval_accuracy": 0.8930750457596095, + "eval_auc": 0.9154795142867925, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.643861134631668, + "eval_f1_macro": 0.7904777241463208, + "eval_loss": 0.2809857428073883, + "eval_pr_auc": 0.6313964494387146, + "eval_precision": 0.677960057061341, + "eval_precision_macro": 0.803401280902587, + "eval_pred_class_0": 16864, + "eval_pred_class_1": 2804, + "eval_predicted_binding_ratio": 0.14256660565385398, + "eval_recall": 0.6130280554659787, + "eval_recall_macro": 0.7792610549557817, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.65, + "eval_steps_per_second": 3.924, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2236 + }, + { + "epoch": 87.0, + "eval_accuracy": 0.8930750457596095, + "eval_auc": 0.9156233995513745, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6458999831621485, + "eval_f1_macro": 0.7914651276711422, + "eval_loss": 0.28072693943977356, + "eval_pr_auc": 0.631527672626228, + "eval_precision": 0.6758280479210712, + "eval_precision_macro": 0.8027684505796682, + "eval_pred_class_0": 16830, + "eval_pred_class_1": 2838, + "eval_predicted_binding_ratio": 0.14429530201342283, + "eval_recall": 0.618510158013544, + "eval_recall_macro": 0.7814890380820421, + "eval_runtime": 0.2629, + "eval_samples_per_second": 620.026, + "eval_steps_per_second": 3.804, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2262 + }, + { + "epoch": 88.0, + "eval_accuracy": 0.8932275777913362, + "eval_auc": 0.9158623713307676, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6449103821440649, + "eval_f1_macro": 0.7910387587819241, + "eval_loss": 0.28049618005752563, + "eval_pr_auc": 0.6324229662687507, + "eval_precision": 0.6779239246356203, + "eval_precision_macro": 0.8035422055690709, + "eval_pred_class_0": 16855, + "eval_pred_class_1": 2813, + "eval_predicted_binding_ratio": 0.14302420174903396, + "eval_recall": 0.6149629151886489, + "eval_recall_macro": 0.7801379433793187, + "eval_runtime": 0.2576, + "eval_samples_per_second": 632.715, + "eval_steps_per_second": 3.882, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2288 + }, + { + "epoch": 89.0, + "eval_accuracy": 0.893125889770185, + "eval_auc": 0.9160425393514616, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6452919338508268, + "eval_f1_macro": 0.7911883195144587, + "eval_loss": 0.28025364875793457, + "eval_pr_auc": 0.6329798450144843, + "eval_precision": 0.6768141592920354, + "eval_precision_macro": 0.8031105172758937, + "eval_pred_class_0": 16843, + "eval_pred_class_1": 2825, + "eval_predicted_binding_ratio": 0.1436343298759406, + "eval_recall": 0.6165752982908739, + "eval_recall_macro": 0.7807328715755691, + "eval_runtime": 0.2595, + "eval_samples_per_second": 628.097, + "eval_steps_per_second": 3.853, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2314 + }, + { + "epoch": 90.0, + "eval_accuracy": 0.8936343298759406, + "eval_auc": 0.9161711835226769, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6463826910074375, + "eval_f1_macro": 0.7918927219250234, + "eval_loss": 0.2800801396369934, + "eval_pr_auc": 0.6332605675535015, + "eval_precision": 0.6792184724689165, + "eval_precision_macro": 0.8043336176502299, + "eval_pred_class_0": 16853, + "eval_pred_class_1": 2815, + "eval_predicted_binding_ratio": 0.14312588977018506, + "eval_recall": 0.6165752982908739, + "eval_recall_macro": 0.7810346763682292, + "eval_runtime": 0.2564, + "eval_samples_per_second": 635.634, + "eval_steps_per_second": 3.9, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2340 + }, + { + "epoch": 91.0, + "eval_accuracy": 0.8934309538336384, + "eval_auc": 0.9163414730569294, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6463044211947351, + "eval_f1_macro": 0.7917843566614202, + "eval_loss": 0.2798333764076233, + "eval_pr_auc": 0.633657166273441, + "eval_precision": 0.6778761061946903, + "eval_precision_macro": 0.8037305484960271, + "eval_pred_class_0": 16843, + "eval_pred_class_1": 2825, + "eval_predicted_binding_ratio": 0.1436343298759406, + "eval_recall": 0.617542728152209, + "eval_recall_macro": 0.7813071279440347, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.372, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2366 + }, + { + "epoch": 92.0, + "eval_accuracy": 0.8937868619076673, + "eval_auc": 0.9165959292421633, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6469494676356261, + "eval_f1_macro": 0.7922200583338069, + "eval_loss": 0.27958908677101135, + "eval_pr_auc": 0.6345494466448222, + "eval_precision": 0.6796875, + "eval_precision_macro": 0.8046253782933777, + "eval_pred_class_0": 16852, + "eval_pred_class_1": 2816, + "eval_predicted_binding_ratio": 0.14317673378076062, + "eval_recall": 0.617220251531764, + "eval_recall_macro": 0.7813873334679403, + "eval_runtime": 0.2594, + "eval_samples_per_second": 628.29, + "eval_steps_per_second": 3.855, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2392 + }, + { + "epoch": 93.0, + "eval_accuracy": 0.8938885499288184, + "eval_auc": 0.9168139566838005, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6482386650935446, + "eval_f1_macro": 0.7928796235385993, + "eval_loss": 0.27935075759887695, + "eval_pr_auc": 0.6350396647674293, + "eval_precision": 0.6790254237288136, + "eval_precision_macro": 0.8045281549625298, + "eval_pred_class_0": 16836, + "eval_pred_class_1": 2832, + "eval_predicted_binding_ratio": 0.14399023794996949, + "eval_recall": 0.6201225411157691, + "eval_recall_macro": 0.7826272149050808, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.307, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2418 + }, + { + "epoch": 94.0, + "eval_accuracy": 0.8941936139922717, + "eval_auc": 0.916949978089225, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6492499578628013, + "eval_f1_macro": 0.7934750822155368, + "eval_loss": 0.27906060218811035, + "eval_pr_auc": 0.6354743744677446, + "eval_precision": 0.6800847457627118, + "eval_precision_macro": 0.8051469107763429, + "eval_pred_class_0": 16836, + "eval_pred_class_1": 2832, + "eval_predicted_binding_ratio": 0.14399023794996949, + "eval_recall": 0.6210899709771042, + "eval_recall_macro": 0.7832014712735463, + "eval_runtime": 0.255, + "eval_samples_per_second": 639.231, + "eval_steps_per_second": 3.922, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2444 + }, + { + "epoch": 95.0, + "eval_accuracy": 0.894498678055725, + "eval_auc": 0.9171692902207247, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6496707749451291, + "eval_f1_macro": 0.7937845988573549, + "eval_loss": 0.2788851261138916, + "eval_pr_auc": 0.6362118552671664, + "eval_precision": 0.6817859673990078, + "eval_precision_macro": 0.8059588747122072, + "eval_pred_class_0": 16846, + "eval_pred_class_1": 2822, + "eval_predicted_binding_ratio": 0.14348179784421394, + "eval_recall": 0.6204450177362141, + "eval_recall_macro": 0.7831204384872295, + "eval_runtime": 0.2567, + "eval_samples_per_second": 634.948, + "eval_steps_per_second": 3.895, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2470 + }, + { + "epoch": 96.0, + "eval_accuracy": 0.8946003660768761, + "eval_auc": 0.9172832383185145, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.64977192093259, + "eval_f1_macro": 0.7938688135051675, + "eval_loss": 0.27873092889785767, + "eval_pr_auc": 0.6365172336600542, + "eval_precision": 0.6823988644428672, + "eval_precision_macro": 0.8062439426071903, + "eval_pred_class_0": 16850, + "eval_pred_class_1": 2818, + "eval_predicted_binding_ratio": 0.14327842180191175, + "eval_recall": 0.6201225411157691, + "eval_recall_macro": 0.783049741614805, + "eval_runtime": 0.2642, + "eval_samples_per_second": 617.058, + "eval_steps_per_second": 3.786, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2496 + }, + { + "epoch": 96.15384615384616, + "grad_norm": 12855.328125, + "learning_rate": 9.74310718484651e-07, + "loss": 0.268, + "step": 2500 + }, + { + "epoch": 97.0, + "eval_accuracy": 0.8948037421191783, + "eval_auc": 0.9174495472606937, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6506837751139625, + "eval_f1_macro": 0.7943808843546348, + "eval_loss": 0.27859047055244446, + "eval_pr_auc": 0.636938752781715, + "eval_precision": 0.6828490432317506, + "eval_precision_macro": 0.8065794545376371, + "eval_pred_class_0": 16846, + "eval_pred_class_1": 2822, + "eval_predicted_binding_ratio": 0.14348179784421394, + "eval_recall": 0.6214124475975492, + "eval_recall_macro": 0.783694694855695, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.744, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2522 + }, + { + "epoch": 98.0, + "eval_accuracy": 0.8951596501932072, + "eval_auc": 0.9175816853990344, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6521592442645074, + "eval_f1_macro": 0.7952187504847441, + "eval_loss": 0.2782803475856781, + "eval_pr_auc": 0.6372336473067074, + "eval_precision": 0.6837637071100107, + "eval_precision_macro": 0.8072045778587877, + "eval_pred_class_0": 16841, + "eval_pred_class_1": 2827, + "eval_predicted_binding_ratio": 0.14373601789709173, + "eval_recall": 0.6233473073202193, + "eval_recall_macro": 0.784692305196296, + "eval_runtime": 0.2192, + "eval_samples_per_second": 743.723, + "eval_steps_per_second": 4.563, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2548 + }, + { + "epoch": 99.0, + "eval_accuracy": 0.8950579621720561, + "eval_auc": 0.917689375499995, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6515867656988521, + "eval_f1_macro": 0.7949062764205981, + "eval_loss": 0.2782030701637268, + "eval_pr_auc": 0.6376582660543189, + "eval_precision": 0.683669854764435, + "eval_precision_macro": 0.8070768389286704, + "eval_pred_class_0": 16845, + "eval_pred_class_1": 2823, + "eval_predicted_binding_ratio": 0.1435326418547895, + "eval_recall": 0.6223798774588842, + "eval_recall_macro": 0.7842387707448946, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.364, + "eval_steps_per_second": 3.935, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2574 + }, + { + "epoch": 100.0, + "eval_accuracy": 0.8950579621720561, + "eval_auc": 0.917862049496492, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6513513513513514, + "eval_f1_macro": 0.7947922665303561, + "eval_loss": 0.2779688835144043, + "eval_pr_auc": 0.6381115995039711, + "eval_precision": 0.6839304717985101, + "eval_precision_macro": 0.8071560484103832, + "eval_pred_class_0": 16849, + "eval_pred_class_1": 2819, + "eval_predicted_binding_ratio": 0.14332926581248728, + "eval_recall": 0.6217349242179941, + "eval_recall_macro": 0.7839766550829814, + "eval_runtime": 0.219, + "eval_samples_per_second": 744.395, + "eval_steps_per_second": 4.567, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2600 + }, + { + "epoch": 101.0, + "eval_accuracy": 0.8951088061826317, + "eval_auc": 0.9180189763096767, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6513435862768294, + "eval_f1_macro": 0.7948061179237165, + "eval_loss": 0.27778077125549316, + "eval_pr_auc": 0.6385730633658938, + "eval_precision": 0.6843039772727273, + "eval_precision_macro": 0.8073193278245905, + "eval_pred_class_0": 16852, + "eval_pred_class_1": 2816, + "eval_predicted_binding_ratio": 0.14317673378076062, + "eval_recall": 0.6214124475975492, + "eval_recall_macro": 0.783875777731291, + "eval_runtime": 0.2529, + "eval_samples_per_second": 644.591, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2626 + }, + { + "epoch": 102.0, + "eval_accuracy": 0.895413870246085, + "eval_auc": 0.9182058500221522, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6518869521069555, + "eval_f1_macro": 0.7951749356520059, + "eval_loss": 0.2776651084423065, + "eval_pr_auc": 0.6390385071928153, + "eval_precision": 0.6858974358974359, + "eval_precision_macro": 0.8081029290993704, + "eval_pred_class_0": 16860, + "eval_pred_class_1": 2808, + "eval_predicted_binding_ratio": 0.1427699816961562, + "eval_recall": 0.6210899709771042, + "eval_recall_macro": 0.7839258027759306, + "eval_runtime": 0.2557, + "eval_samples_per_second": 637.358, + "eval_steps_per_second": 3.91, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2652 + }, + { + "epoch": 103.0, + "eval_accuracy": 0.8952104942037828, + "eval_auc": 0.9183596527031714, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6527379949452401, + "eval_f1_macro": 0.7955166277830898, + "eval_loss": 0.2773846685886383, + "eval_pr_auc": 0.6394513455183966, + "eval_precision": 0.6834862385321101, + "eval_precision_macro": 0.8071702310636076, + "eval_pred_class_0": 16834, + "eval_pred_class_1": 2834, + "eval_predicted_binding_ratio": 0.1440919259711206, + "eval_recall": 0.6246372138019993, + "eval_recall_macro": 0.785246716999388, + "eval_runtime": 0.2264, + "eval_samples_per_second": 719.926, + "eval_steps_per_second": 4.417, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2678 + }, + { + "epoch": 104.0, + "eval_accuracy": 0.8953630262355095, + "eval_auc": 0.9185293680199856, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6535353535353535, + "eval_f1_macro": 0.7959556034654849, + "eval_loss": 0.2772791385650635, + "eval_pr_auc": 0.6398373166129732, + "eval_precision": 0.6836914406481155, + "eval_precision_macro": 0.8073814027769664, + "eval_pred_class_0": 16829, + "eval_pred_class_1": 2839, + "eval_predicted_binding_ratio": 0.14434614602399837, + "eval_recall": 0.6259271202837794, + "eval_recall_macro": 0.7858614897610121, + "eval_runtime": 0.2593, + "eval_samples_per_second": 628.649, + "eval_steps_per_second": 3.857, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2704 + }, + { + "epoch": 105.0, + "eval_accuracy": 0.8953630262355095, + "eval_auc": 0.9186085224340037, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6536519690339953, + "eval_f1_macro": 0.7960120658489734, + "eval_loss": 0.27707138657569885, + "eval_pr_auc": 0.6400560131071933, + "eval_precision": 0.6835621260119676, + "eval_precision_macro": 0.8073423632971826, + "eval_pred_class_0": 16827, + "eval_pred_class_1": 2841, + "eval_predicted_binding_ratio": 0.1444478340451495, + "eval_recall": 0.6262495969042244, + "eval_recall_macro": 0.7859925475919686, + "eval_runtime": 0.2509, + "eval_samples_per_second": 649.533, + "eval_steps_per_second": 3.985, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2730 + }, + { + "epoch": 106.0, + "eval_accuracy": 0.8953630262355095, + "eval_auc": 0.9187703544266627, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6535353535353535, + "eval_f1_macro": 0.7959556034654849, + "eval_loss": 0.27683117985725403, + "eval_pr_auc": 0.6407556071793965, + "eval_precision": 0.6836914406481155, + "eval_precision_macro": 0.8073814027769664, + "eval_pred_class_0": 16829, + "eval_pred_class_1": 2839, + "eval_predicted_binding_ratio": 0.14434614602399837, + "eval_recall": 0.6259271202837794, + "eval_recall_macro": 0.7858614897610121, + "eval_runtime": 0.3724, + "eval_samples_per_second": 437.667, + "eval_steps_per_second": 2.685, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2756 + }, + { + "epoch": 107.0, + "eval_accuracy": 0.8955664022778117, + "eval_auc": 0.918983612943811, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6543251430494783, + "eval_f1_macro": 0.7964085438550979, + "eval_loss": 0.2766495645046234, + "eval_pr_auc": 0.6413517959683596, + "eval_precision": 0.6842661034846885, + "eval_precision_macro": 0.8077537803332993, + "eval_pred_class_0": 16827, + "eval_pred_class_1": 2841, + "eval_predicted_binding_ratio": 0.1444478340451495, + "eval_recall": 0.6268945501451145, + "eval_recall_macro": 0.7863753851709456, + "eval_runtime": 0.2579, + "eval_samples_per_second": 631.95, + "eval_steps_per_second": 3.877, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2782 + }, + { + "epoch": 108.0, + "eval_accuracy": 0.8957189343095383, + "eval_auc": 0.9190842761805244, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.655583543240974, + "eval_f1_macro": 0.7970707027489733, + "eval_loss": 0.27643415331840515, + "eval_pr_auc": 0.6415079341486267, + "eval_precision": 0.6839523475823406, + "eval_precision_macro": 0.8078082185158045, + "eval_pred_class_0": 16814, + "eval_pred_class_1": 2854, + "eval_predicted_binding_ratio": 0.1451088061826317, + "eval_recall": 0.6294743631086747, + "eval_recall_macro": 0.7875143892563956, + "eval_runtime": 0.2441, + "eval_samples_per_second": 667.885, + "eval_steps_per_second": 4.097, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2808 + }, + { + "epoch": 109.0, + "eval_accuracy": 0.8958206223306895, + "eval_auc": 0.9192001707781057, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6554565327055658, + "eval_f1_macro": 0.7970445082288499, + "eval_loss": 0.2763550579547882, + "eval_pr_auc": 0.6419306315808602, + "eval_precision": 0.6848208011243851, + "eval_precision_macro": 0.8081695255176081, + "eval_pred_class_0": 16822, + "eval_pred_class_1": 2846, + "eval_predicted_binding_ratio": 0.14470205409802725, + "eval_recall": 0.6285069332473395, + "eval_recall_macro": 0.7871815767220581, + "eval_runtime": 0.2526, + "eval_samples_per_second": 645.285, + "eval_steps_per_second": 3.959, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2834 + }, + { + "epoch": 110.0, + "eval_accuracy": 0.8958206223306895, + "eval_auc": 0.9193565525713485, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.655919395465995, + "eval_f1_macro": 0.7972685860227431, + "eval_loss": 0.27626872062683105, + "eval_pr_auc": 0.6423732230660918, + "eval_precision": 0.684302733006307, + "eval_precision_macro": 0.8080131483516131, + "eval_pred_class_0": 16814, + "eval_pred_class_1": 2854, + "eval_predicted_binding_ratio": 0.1451088061826317, + "eval_recall": 0.6297968397291196, + "eval_recall_macro": 0.7877058080458841, + "eval_runtime": 0.2566, + "eval_samples_per_second": 635.337, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2860 + }, + { + "epoch": 111.0, + "eval_accuracy": 0.8959223103518406, + "eval_auc": 0.9193956188221624, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.656485987581809, + "eval_f1_macro": 0.7975781647169913, + "eval_loss": 0.2761881351470947, + "eval_pr_auc": 0.6423085789727141, + "eval_precision": 0.6843946815955213, + "eval_precision_macro": 0.8081402319339891, + "eval_pred_class_0": 16810, + "eval_pred_class_1": 2858, + "eval_predicted_binding_ratio": 0.1453121822249339, + "eval_recall": 0.6307642695904547, + "eval_recall_macro": 0.7881593424972857, + "eval_runtime": 0.2618, + "eval_samples_per_second": 622.648, + "eval_steps_per_second": 3.82, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2886 + }, + { + "epoch": 112.0, + "eval_accuracy": 0.8960239983729916, + "eval_auc": 0.9196281671522437, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6564757265244414, + "eval_f1_macro": 0.7976085010119736, + "eval_loss": 0.2759994864463806, + "eval_pr_auc": 0.6431408853405497, + "eval_precision": 0.685133239831697, + "eval_precision_macro": 0.808462195558094, + "eval_pred_class_0": 16816, + "eval_pred_class_1": 2852, + "eval_predicted_binding_ratio": 0.14500711816148057, + "eval_recall": 0.6301193163495646, + "eval_recall_macro": 0.7879575877939047, + "eval_runtime": 0.2423, + "eval_samples_per_second": 672.612, + "eval_steps_per_second": 4.126, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2912 + }, + { + "epoch": 113.0, + "eval_accuracy": 0.8960239983729916, + "eval_auc": 0.9197686265771928, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6561291407432319, + "eval_f1_macro": 0.797440712214738, + "eval_loss": 0.27580633759498596, + "eval_pr_auc": 0.6436439478836922, + "eval_precision": 0.685523541813071, + "eval_precision_macro": 0.8085803418255701, + "eval_pred_class_0": 16822, + "eval_pred_class_1": 2846, + "eval_predicted_binding_ratio": 0.14470205409802725, + "eval_recall": 0.6291518864882296, + "eval_recall_macro": 0.7875644143010352, + "eval_runtime": 0.2575, + "eval_samples_per_second": 632.943, + "eval_steps_per_second": 3.883, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2938 + }, + { + "epoch": 114.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9199272079152, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6573731944910984, + "eval_f1_macro": 0.7981312081136818, + "eval_loss": 0.27558717131614685, + "eval_pr_auc": 0.6441453864761489, + "eval_precision": 0.6859446196985629, + "eval_precision_macro": 0.8089550633431857, + "eval_pred_class_0": 16815, + "eval_pred_class_1": 2853, + "eval_predicted_binding_ratio": 0.14505796217205613, + "eval_recall": 0.6310867462108997, + "eval_recall_macro": 0.7885016636831041, + "eval_runtime": 0.259, + "eval_samples_per_second": 629.272, + "eval_steps_per_second": 3.861, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2964 + }, + { + "epoch": 115.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9200543727465736, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6571428571428571, + "eval_f1_macro": 0.7980197003020941, + "eval_loss": 0.2754935324192047, + "eval_pr_auc": 0.6445859889828064, + "eval_precision": 0.6862056862056862, + "eval_precision_macro": 0.8090342302245508, + "eval_pred_class_0": 16819, + "eval_pred_class_1": 2849, + "eval_predicted_binding_ratio": 0.1448545861297539, + "eval_recall": 0.6304417929700097, + "eval_recall_macro": 0.7882395480211912, + "eval_runtime": 0.2588, + "eval_samples_per_second": 629.903, + "eval_steps_per_second": 3.864, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 2990 + }, + { + "epoch": 115.38461538461539, + "grad_norm": 13551.1435546875, + "learning_rate": 9.488660254357756e-07, + "loss": 0.2594, + "step": 3000 + }, + { + "epoch": 116.0, + "eval_accuracy": 0.8964815944681717, + "eval_auc": 0.9201159794649721, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6579301075268817, + "eval_f1_macro": 0.7984714041109127, + "eval_loss": 0.2753925323486328, + "eval_pr_auc": 0.6447630589609926, + "eval_precision": 0.6867765696246931, + "eval_precision_macro": 0.8094048157037065, + "eval_pred_class_0": 16817, + "eval_pred_class_1": 2851, + "eval_predicted_binding_ratio": 0.14495627415090503, + "eval_recall": 0.6314092228313447, + "eval_recall_macro": 0.7887534434311247, + "eval_runtime": 0.2556, + "eval_samples_per_second": 637.638, + "eval_steps_per_second": 3.912, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3016 + }, + { + "epoch": 117.0, + "eval_accuracy": 0.8964815944681717, + "eval_auc": 0.920259670079575, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6579301075268817, + "eval_f1_macro": 0.7984714041109127, + "eval_loss": 0.27515658736228943, + "eval_pr_auc": 0.6451647424161069, + "eval_precision": 0.6867765696246931, + "eval_precision_macro": 0.8094048157037065, + "eval_pred_class_0": 16817, + "eval_pred_class_1": 2851, + "eval_predicted_binding_ratio": 0.14495627415090503, + "eval_recall": 0.6314092228313447, + "eval_recall_macro": 0.7887534434311247, + "eval_runtime": 0.2523, + "eval_samples_per_second": 645.985, + "eval_steps_per_second": 3.963, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3042 + }, + { + "epoch": 118.0, + "eval_accuracy": 0.8963799064470206, + "eval_auc": 0.920327525062304, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6583975863224941, + "eval_f1_macro": 0.7986623832121911, + "eval_loss": 0.2750197649002075, + "eval_pr_auc": 0.6454382852682906, + "eval_precision": 0.6855148342059337, + "eval_precision_macro": 0.8089241730394068, + "eval_pred_class_0": 16803, + "eval_pred_class_1": 2865, + "eval_predicted_binding_ratio": 0.14566809029896277, + "eval_recall": 0.6333440825540149, + "eval_recall_macro": 0.7894794294583318, + "eval_runtime": 0.2545, + "eval_samples_per_second": 640.495, + "eval_steps_per_second": 3.929, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3068 + }, + { + "epoch": 119.0, + "eval_accuracy": 0.8963799064470206, + "eval_auc": 0.9205032745284716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6585120643431636, + "eval_f1_macro": 0.7987177919414212, + "eval_loss": 0.27491119503974915, + "eval_pr_auc": 0.6460050422984182, + "eval_precision": 0.6853854202999651, + "eval_precision_macro": 0.8088851986923312, + "eval_pred_class_0": 16801, + "eval_pred_class_1": 2867, + "eval_predicted_binding_ratio": 0.1457697783201139, + "eval_recall": 0.6336665591744598, + "eval_recall_macro": 0.7896104872892882, + "eval_runtime": 0.2295, + "eval_samples_per_second": 710.293, + "eval_steps_per_second": 4.358, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3094 + }, + { + "epoch": 120.0, + "eval_accuracy": 0.8962782184258694, + "eval_auc": 0.9205394599595942, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6587487453997992, + "eval_f1_macro": 0.7987969999557303, + "eval_loss": 0.2747833728790283, + "eval_pr_auc": 0.6459576422229408, + "eval_precision": 0.6843934654153633, + "eval_precision_macro": 0.8084881983738124, + "eval_pred_class_0": 16791, + "eval_pred_class_1": 2877, + "eval_predicted_binding_ratio": 0.14627821842586944, + "eval_recall": 0.63495646565624, + "eval_recall_macro": 0.7900743576545822, + "eval_runtime": 0.2633, + "eval_samples_per_second": 619.079, + "eval_steps_per_second": 3.798, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3120 + }, + { + "epoch": 121.0, + "eval_accuracy": 0.8966341264998983, + "eval_auc": 0.9206969317927203, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6591785414920369, + "eval_f1_macro": 0.7991286912009045, + "eval_loss": 0.2746541202068329, + "eval_pr_auc": 0.6466379382676535, + "eval_precision": 0.6864525139664804, + "eval_precision_macro": 0.8094545359644352, + "eval_pred_class_0": 16804, + "eval_pred_class_1": 2864, + "eval_predicted_binding_ratio": 0.14561724628838724, + "eval_recall": 0.6339890357949048, + "eval_recall_macro": 0.7898924475165747, + "eval_runtime": 0.2553, + "eval_samples_per_second": 638.354, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3146 + }, + { + "epoch": 122.0, + "eval_accuracy": 0.8966341264998983, + "eval_auc": 0.9208301697034432, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6592927769398358, + "eval_f1_macro": 0.7991839832435101, + "eval_loss": 0.2745382785797119, + "eval_pr_auc": 0.6470920919458031, + "eval_precision": 0.6863224005582693, + "eval_precision_macro": 0.809415217658018, + "eval_pred_class_0": 16802, + "eval_pred_class_1": 2866, + "eval_predicted_binding_ratio": 0.14571893430953833, + "eval_recall": 0.6343115124153499, + "eval_recall_macro": 0.7900235053475313, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.372, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3172 + }, + { + "epoch": 123.0, + "eval_accuracy": 0.8967358145210494, + "eval_auc": 0.9209352222971863, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6597420003350645, + "eval_f1_macro": 0.7994367387715422, + "eval_loss": 0.2744734585285187, + "eval_pr_auc": 0.6474021950727136, + "eval_precision": 0.6865411436541143, + "eval_precision_macro": 0.809580095636581, + "eval_pred_class_0": 16800, + "eval_pred_class_1": 2868, + "eval_predicted_binding_ratio": 0.14582062233068943, + "eval_recall": 0.63495646565624, + "eval_recall_macro": 0.7903459819679763, + "eval_runtime": 0.2539, + "eval_samples_per_second": 641.926, + "eval_steps_per_second": 3.938, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3198 + }, + { + "epoch": 124.0, + "eval_accuracy": 0.896786658531625, + "eval_auc": 0.9210344159265571, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6599664991624791, + "eval_f1_macro": 0.7995630613656908, + "eval_loss": 0.2742863893508911, + "eval_pr_auc": 0.6479185906925482, + "eval_precision": 0.6866504008365284, + "eval_precision_macro": 0.8096624823993346, + "eval_pred_class_0": 16799, + "eval_pred_class_1": 2869, + "eval_predicted_binding_ratio": 0.145871466341265, + "eval_recall": 0.6352789422766849, + "eval_recall_macro": 0.7905072202781989, + "eval_runtime": 0.2336, + "eval_samples_per_second": 697.863, + "eval_steps_per_second": 4.281, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3224 + }, + { + "epoch": 125.0, + "eval_accuracy": 0.896888346552776, + "eval_auc": 0.9211382130279347, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6603015075376885, + "eval_f1_macro": 0.7997605361820792, + "eval_loss": 0.27425193786621094, + "eval_pr_auc": 0.6481518613470144, + "eval_precision": 0.6869989543394911, + "eval_precision_macro": 0.8098665228272252, + "eval_pred_class_0": 16799, + "eval_pred_class_1": 2869, + "eval_predicted_binding_ratio": 0.145871466341265, + "eval_recall": 0.63560141889713, + "eval_recall_macro": 0.7906986390676873, + "eval_runtime": 0.2655, + "eval_samples_per_second": 613.835, + "eval_steps_per_second": 3.766, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3250 + }, + { + "epoch": 126.0, + "eval_accuracy": 0.896888346552776, + "eval_auc": 0.9212811737051158, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6598456893659845, + "eval_f1_macro": 0.7995399118610351, + "eval_loss": 0.2741680145263672, + "eval_pr_auc": 0.6487724280436702, + "eval_precision": 0.6875218455085634, + "eval_precision_macro": 0.8100249793973471, + "eval_pred_class_0": 16807, + "eval_pred_class_1": 2861, + "eval_predicted_binding_ratio": 0.14546471425666058, + "eval_recall": 0.6343115124153499, + "eval_recall_macro": 0.7901744077438613, + "eval_runtime": 0.2775, + "eval_samples_per_second": 587.468, + "eval_steps_per_second": 3.604, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3276 + }, + { + "epoch": 127.0, + "eval_accuracy": 0.8966849705104739, + "eval_auc": 0.9212962201485035, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6608811748998665, + "eval_f1_macro": 0.7999703379297798, + "eval_loss": 0.2739817500114441, + "eval_pr_auc": 0.648761125236648, + "eval_precision": 0.6848841231407817, + "eval_precision_macro": 0.809033228048307, + "eval_pred_class_0": 16777, + "eval_pred_class_1": 2891, + "eval_predicted_binding_ratio": 0.1469900345739272, + "eval_recall": 0.6385037084811351, + "eval_recall_macro": 0.7917574376292318, + "eval_runtime": 0.2571, + "eval_samples_per_second": 634.003, + "eval_steps_per_second": 3.89, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3302 + }, + { + "epoch": 128.0, + "eval_accuracy": 0.8971425666056538, + "eval_auc": 0.9213870048987755, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6608549874266555, + "eval_f1_macro": 0.8001167448595325, + "eval_loss": 0.2739529609680176, + "eval_pr_auc": 0.6489216955933252, + "eval_precision": 0.6881983240223464, + "eval_precision_macro": 0.8104762150937725, + "eval_pred_class_0": 16804, + "eval_pred_class_1": 2864, + "eval_predicted_binding_ratio": 0.14561724628838724, + "eval_recall": 0.63560141889713, + "eval_recall_macro": 0.7908495414640173, + "eval_runtime": 0.26, + "eval_samples_per_second": 626.828, + "eval_steps_per_second": 3.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3328 + }, + { + "epoch": 129.0, + "eval_accuracy": 0.8970408785845028, + "eval_auc": 0.9215601460552225, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6604058359885964, + "eval_f1_macro": 0.7998640212813866, + "eval_loss": 0.27383002638816833, + "eval_pr_auc": 0.6495277773578755, + "eval_precision": 0.6879804332634522, + "eval_precision_macro": 0.8103117684584547, + "eval_pred_class_0": 16806, + "eval_pred_class_1": 2862, + "eval_predicted_binding_ratio": 0.1455155582672361, + "eval_recall": 0.63495646565624, + "eval_recall_macro": 0.7905270648435724, + "eval_runtime": 0.2665, + "eval_samples_per_second": 611.602, + "eval_steps_per_second": 3.752, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3354 + }, + { + "epoch": 130.0, + "eval_accuracy": 0.8969900345739272, + "eval_auc": 0.9216559138449605, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6610906657745065, + "eval_f1_macro": 0.8001778048579948, + "eval_loss": 0.27363157272338867, + "eval_pr_auc": 0.6497825902519792, + "eval_precision": 0.6868265554396942, + "eval_precision_macro": 0.8099131883862756, + "eval_pred_class_0": 16791, + "eval_pred_class_1": 2877, + "eval_predicted_binding_ratio": 0.14627821842586944, + "eval_recall": 0.6372138019993551, + "eval_recall_macro": 0.7914142891810019, + "eval_runtime": 0.2147, + "eval_samples_per_second": 759.086, + "eval_steps_per_second": 4.657, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3380 + }, + { + "epoch": 131.0, + "eval_accuracy": 0.8970917225950783, + "eval_auc": 0.9217394576160085, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6619906479625919, + "eval_f1_macro": 0.800648676506185, + "eval_loss": 0.27347350120544434, + "eval_pr_auc": 0.6501280761818352, + "eval_precision": 0.686525805334257, + "eval_precision_macro": 0.8099216238398834, + "eval_pred_class_0": 16781, + "eval_pred_class_1": 2887, + "eval_predicted_binding_ratio": 0.14678665853162498, + "eval_recall": 0.6391486617220251, + "eval_recall_macro": 0.7922609971252728, + "eval_runtime": 0.2669, + "eval_samples_per_second": 610.642, + "eval_steps_per_second": 3.746, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3406 + }, + { + "epoch": 132.0, + "eval_accuracy": 0.8970917225950783, + "eval_auc": 0.9218276924515536, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6609715242881072, + "eval_f1_macro": 0.8001554858148563, + "eval_loss": 0.2734222412109375, + "eval_pr_auc": 0.6505561229387223, + "eval_precision": 0.6876960613454165, + "eval_precision_macro": 0.8102746036830064, + "eval_pred_class_0": 16799, + "eval_pred_class_1": 2869, + "eval_predicted_binding_ratio": 0.145871466341265, + "eval_recall": 0.63624637213802, + "eval_recall_macro": 0.7910814766466644, + "eval_runtime": 0.2569, + "eval_samples_per_second": 634.527, + "eval_steps_per_second": 3.893, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3432 + }, + { + "epoch": 133.0, + "eval_accuracy": 0.8976001627008339, + "eval_auc": 0.9219583415175538, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6620805369127517, + "eval_f1_macro": 0.8008688878235859, + "eval_loss": 0.273334801197052, + "eval_pr_auc": 0.6510448354697362, + "eval_precision": 0.6901014340678558, + "eval_precision_macro": 0.8114972635268781, + "eval_pred_class_0": 16809, + "eval_pred_class_1": 2859, + "eval_predicted_binding_ratio": 0.14536302623550945, + "eval_recall": 0.63624637213802, + "eval_recall_macro": 0.7913832814393245, + "eval_runtime": 0.2542, + "eval_samples_per_second": 641.276, + "eval_steps_per_second": 3.934, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3458 + }, + { + "epoch": 134.0, + "eval_accuracy": 0.8976001627008339, + "eval_auc": 0.92201595791138, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6632107023411371, + "eval_f1_macro": 0.8014158800109571, + "eval_loss": 0.2731546461582184, + "eval_pr_auc": 0.6511084632800272, + "eval_precision": 0.6887808266759291, + "eval_precision_macro": 0.8110948031169865, + "eval_pred_class_0": 16789, + "eval_pred_class_1": 2879, + "eval_predicted_binding_ratio": 0.14637990644702054, + "eval_recall": 0.6394711383424702, + "eval_recall_macro": 0.7926938597488895, + "eval_runtime": 0.2551, + "eval_samples_per_second": 638.927, + "eval_steps_per_second": 3.92, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3484 + }, + { + "epoch": 134.6153846153846, + "grad_norm": 16295.5498046875, + "learning_rate": 9.153428025759045e-07, + "loss": 0.2515, + "step": 3500 + }, + { + "epoch": 135.0, + "eval_accuracy": 0.8977526947325605, + "eval_auc": 0.9221653809678686, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.663543583737661, + "eval_f1_macro": 0.8016300010477627, + "eval_loss": 0.2730526030063629, + "eval_pr_auc": 0.6517474669432921, + "eval_precision": 0.6894993045897079, + "eval_precision_macro": 0.8114599905511665, + "eval_pred_class_0": 16792, + "eval_pred_class_1": 2876, + "eval_predicted_binding_ratio": 0.14622737441529388, + "eval_recall": 0.6394711383424702, + "eval_recall_macro": 0.7927844011866875, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.318, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3510 + }, + { + "epoch": 136.0, + "eval_accuracy": 0.8976510067114094, + "eval_auc": 0.9222144814251072, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6634342083263668, + "eval_f1_macro": 0.8015417181640828, + "eval_loss": 0.27299538254737854, + "eval_pr_auc": 0.6516584418617962, + "eval_precision": 0.6888888888888889, + "eval_precision_macro": 0.8111766341037249, + "eval_pred_class_0": 16788, + "eval_pred_class_1": 2880, + "eval_predicted_binding_ratio": 0.1464307504575961, + "eval_recall": 0.6397936149629152, + "eval_recall_macro": 0.792855098059112, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.888, + "eval_steps_per_second": 3.901, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3536 + }, + { + "epoch": 137.0, + "eval_accuracy": 0.8978543827537117, + "eval_auc": 0.922406970789481, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6637656903765691, + "eval_f1_macro": 0.8017728364954996, + "eval_loss": 0.27294018864631653, + "eval_pr_auc": 0.6525345183514315, + "eval_precision": 0.6899791231732777, + "eval_precision_macro": 0.8117038643138033, + "eval_pred_class_0": 16794, + "eval_pred_class_1": 2874, + "eval_predicted_binding_ratio": 0.14612568639414278, + "eval_recall": 0.6394711383424702, + "eval_recall_macro": 0.7928447621452195, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.531, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3562 + }, + { + "epoch": 138.0, + "eval_accuracy": 0.8979560707748627, + "eval_auc": 0.9224206449505158, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6643251379829402, + "eval_f1_macro": 0.8020789283763069, + "eval_loss": 0.272890567779541, + "eval_pr_auc": 0.652455646685698, + "eval_precision": 0.6900625434329395, + "eval_precision_macro": 0.8118269834496443, + "eval_pred_class_0": 16790, + "eval_pred_class_1": 2878, + "eval_predicted_binding_ratio": 0.14632906243644497, + "eval_recall": 0.6404385682038052, + "eval_recall_macro": 0.793298296596621, + "eval_runtime": 0.2465, + "eval_samples_per_second": 661.298, + "eval_steps_per_second": 4.057, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3588 + }, + { + "epoch": 139.0, + "eval_accuracy": 0.8980069147854383, + "eval_auc": 0.9225580970332872, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6647727272727273, + "eval_f1_macro": 0.802313204605421, + "eval_loss": 0.2726689577102661, + "eval_pr_auc": 0.6528952820360587, + "eval_precision": 0.6899063475546305, + "eval_precision_macro": 0.8118283599554506, + "eval_pred_class_0": 16785, + "eval_pred_class_1": 2883, + "eval_predicted_binding_ratio": 0.14658328248932276, + "eval_recall": 0.6414059980651403, + "eval_recall_macro": 0.7937216505687565, + "eval_runtime": 0.1981, + "eval_samples_per_second": 822.798, + "eval_steps_per_second": 5.048, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3614 + }, + { + "epoch": 140.0, + "eval_accuracy": 0.898159446817165, + "eval_auc": 0.9226611836622408, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6642078792958928, + "eval_f1_macro": 0.8020928521767887, + "eval_loss": 0.2726409435272217, + "eval_pr_auc": 0.6533512747607447, + "eval_precision": 0.6916899441340782, + "eval_precision_macro": 0.8125195733524473, + "eval_pred_class_0": 16804, + "eval_pred_class_1": 2864, + "eval_predicted_binding_ratio": 0.14561724628838724, + "eval_recall": 0.6388261851015802, + "eval_recall_macro": 0.7927637293589026, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.759, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3640 + }, + { + "epoch": 141.0, + "eval_accuracy": 0.8983119788488916, + "eval_auc": 0.9227806014244446, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.666110183639399, + "eval_f1_macro": 0.8030664874893451, + "eval_loss": 0.2725253105163574, + "eval_pr_auc": 0.6539162157851042, + "eval_precision": 0.6905503634475597, + "eval_precision_macro": 0.8123173177271173, + "eval_pred_class_0": 16779, + "eval_pred_class_1": 2889, + "eval_predicted_binding_ratio": 0.14688834655277608, + "eval_recall": 0.6433408577878104, + "eval_recall_macro": 0.7946890804300916, + "eval_runtime": 0.2669, + "eval_samples_per_second": 610.673, + "eval_steps_per_second": 3.746, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3666 + }, + { + "epoch": 142.0, + "eval_accuracy": 0.898159446817165, + "eval_auc": 0.9228251470721713, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6665556850341269, + "eval_f1_macro": 0.8032289361592369, + "eval_loss": 0.27245020866394043, + "eval_pr_auc": 0.6537875546315605, + "eval_precision": 0.6889194769442533, + "eval_precision_macro": 0.8116772542816959, + "eval_pred_class_0": 16762, + "eval_pred_class_1": 2906, + "eval_predicted_binding_ratio": 0.1477526947325605, + "eval_recall": 0.6455981941309256, + "eval_recall_macro": 0.795515943808989, + "eval_runtime": 0.2562, + "eval_samples_per_second": 636.157, + "eval_steps_per_second": 3.903, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3692 + }, + { + "epoch": 143.0, + "eval_accuracy": 0.8984645108806183, + "eval_auc": 0.9229446329618678, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6663324979114453, + "eval_f1_macro": 0.8032271166958205, + "eval_loss": 0.2723686695098877, + "eval_pr_auc": 0.6543603067705216, + "eval_precision": 0.6914008321775312, + "eval_precision_macro": 0.8127225800544472, + "eval_pred_class_0": 16784, + "eval_pred_class_1": 2884, + "eval_predicted_binding_ratio": 0.14663412649989832, + "eval_recall": 0.6430183811673653, + "eval_recall_macro": 0.7946485640369331, + "eval_runtime": 0.2531, + "eval_samples_per_second": 644.012, + "eval_steps_per_second": 3.951, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3718 + }, + { + "epoch": 144.0, + "eval_accuracy": 0.898159446817165, + "eval_auc": 0.9229539761608667, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6658882402001668, + "eval_f1_macro": 0.8029060288610683, + "eval_loss": 0.2724270820617676, + "eval_pr_auc": 0.6540924505284794, + "eval_precision": 0.6897028334485141, + "eval_precision_macro": 0.8119135366717949, + "eval_pred_class_0": 16774, + "eval_pred_class_1": 2894, + "eval_predicted_binding_ratio": 0.14714256660565386, + "eval_recall": 0.6436633344082554, + "eval_recall_macro": 0.7947295968232501, + "eval_runtime": 0.2578, + "eval_samples_per_second": 632.29, + "eval_steps_per_second": 3.879, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3744 + }, + { + "epoch": 145.0, + "eval_accuracy": 0.8983119788488916, + "eval_auc": 0.923075982767793, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6671105193075899, + "eval_f1_macro": 0.8035504588856721, + "eval_loss": 0.27218085527420044, + "eval_pr_auc": 0.6548951387334131, + "eval_precision": 0.6893704850361198, + "eval_precision_macro": 0.8119604647601695, + "eval_pred_class_0": 16761, + "eval_pred_class_1": 2907, + "eval_predicted_binding_ratio": 0.14780353874313606, + "eval_recall": 0.6462431473718155, + "eval_recall_macro": 0.7958686009087, + "eval_runtime": 0.2047, + "eval_samples_per_second": 796.353, + "eval_steps_per_second": 4.886, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3770 + }, + { + "epoch": 146.0, + "eval_accuracy": 0.8984645108806183, + "eval_auc": 0.9231917800403848, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6671111851975329, + "eval_f1_macro": 0.8036038872863508, + "eval_loss": 0.27211907505989075, + "eval_pr_auc": 0.6553363499797747, + "eval_precision": 0.6904761904761905, + "eval_precision_macro": 0.8124414345344577, + "eval_pred_class_0": 16770, + "eval_pred_class_1": 2898, + "eval_predicted_binding_ratio": 0.14734594264795606, + "eval_recall": 0.6452757175104805, + "eval_recall_macro": 0.7955659688536286, + "eval_runtime": 0.2587, + "eval_samples_per_second": 629.971, + "eval_steps_per_second": 3.865, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3796 + }, + { + "epoch": 147.0, + "eval_accuracy": 0.8985661989017694, + "eval_auc": 0.9232496689441817, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6676661669165417, + "eval_f1_macro": 0.8039077842052783, + "eval_loss": 0.2721000015735626, + "eval_pr_auc": 0.655340056976625, + "eval_precision": 0.6905582356995176, + "eval_precision_macro": 0.812564099359958, + "eval_pred_class_0": 16766, + "eval_pred_class_1": 2902, + "eval_predicted_binding_ratio": 0.14754931869025828, + "eval_recall": 0.6462431473718155, + "eval_recall_macro": 0.7960195033050301, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.767, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3822 + }, + { + "epoch": 148.0, + "eval_accuracy": 0.8985661989017694, + "eval_auc": 0.9233701767462686, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6673336668334167, + "eval_f1_macro": 0.8037469198020228, + "eval_loss": 0.2719952464103699, + "eval_pr_auc": 0.6558934902075464, + "eval_precision": 0.6909530386740331, + "eval_precision_macro": 0.8126837695158862, + "eval_pred_class_0": 16772, + "eval_pred_class_1": 2896, + "eval_predicted_binding_ratio": 0.14724425462680496, + "eval_recall": 0.6452757175104805, + "eval_recall_macro": 0.7956263298121606, + "eval_runtime": 0.2562, + "eval_samples_per_second": 636.275, + "eval_steps_per_second": 3.904, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3848 + }, + { + "epoch": 149.0, + "eval_accuracy": 0.8983628228594671, + "eval_auc": 0.9234171749837325, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6676641729010806, + "eval_f1_macro": 0.8038359878940744, + "eval_loss": 0.27190613746643066, + "eval_pr_auc": 0.6558672612955618, + "eval_precision": 0.6890871654083733, + "eval_precision_macro": 0.8119245066626444, + "eval_pred_class_0": 16754, + "eval_pred_class_1": 2914, + "eval_predicted_binding_ratio": 0.14815944681716495, + "eval_recall": 0.6475330538535956, + "eval_recall_macro": 0.796423012711792, + "eval_runtime": 0.1964, + "eval_samples_per_second": 830.09, + "eval_steps_per_second": 5.093, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3874 + }, + { + "epoch": 150.0, + "eval_accuracy": 0.8986678869229204, + "eval_auc": 0.9235316972989609, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6682204095222241, + "eval_f1_macro": 0.8042113179058208, + "eval_loss": 0.2718164622783661, + "eval_pr_auc": 0.6562427373314145, + "eval_precision": 0.6906400550584997, + "eval_precision_macro": 0.8126866902186664, + "eval_pred_class_0": 16762, + "eval_pred_class_1": 2906, + "eval_predicted_binding_ratio": 0.1477526947325605, + "eval_recall": 0.6472105772331506, + "eval_recall_macro": 0.7964730377564316, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.207, + "eval_steps_per_second": 3.842, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3900 + }, + { + "epoch": 151.0, + "eval_accuracy": 0.8984645108806183, + "eval_auc": 0.923607970893288, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.66744379683597, + "eval_f1_macro": 0.8037648014211952, + "eval_loss": 0.27192452549934387, + "eval_pr_auc": 0.6563118529429329, + "eval_precision": 0.6900826446280992, + "eval_precision_macro": 0.8123224008156005, + "eval_pred_class_0": 16764, + "eval_pred_class_1": 2904, + "eval_predicted_binding_ratio": 0.1476510067114094, + "eval_recall": 0.6462431473718155, + "eval_recall_macro": 0.7959591423464981, + "eval_runtime": 0.1771, + "eval_samples_per_second": 920.181, + "eval_steps_per_second": 5.645, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3926 + }, + { + "epoch": 152.0, + "eval_accuracy": 0.8986170429123449, + "eval_auc": 0.9236869111923289, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6687707641196013, + "eval_f1_macro": 0.8044598207679289, + "eval_loss": 0.2716231048107147, + "eval_pr_auc": 0.6568319718650024, + "eval_precision": 0.6896197327852004, + "eval_precision_macro": 0.812330315374629, + "eval_pred_class_0": 16749, + "eval_pred_class_1": 2919, + "eval_predicted_binding_ratio": 0.1484136668700427, + "eval_recall": 0.6491454369558207, + "eval_recall_macro": 0.7972292042629046, + "eval_runtime": 0.241, + "eval_samples_per_second": 676.369, + "eval_steps_per_second": 4.15, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3952 + }, + { + "epoch": 153.0, + "eval_accuracy": 0.8986678869229204, + "eval_auc": 0.9238590985638783, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6671120761650242, + "eval_f1_macro": 0.8036750821318089, + "eval_loss": 0.2716236114501953, + "eval_pr_auc": 0.657752547087354, + "eval_precision": 0.691961191961192, + "eval_precision_macro": 0.8130882112827054, + "eval_pred_class_0": 16782, + "eval_pred_class_1": 2886, + "eval_predicted_binding_ratio": 0.14673581452104942, + "eval_recall": 0.6439858110287005, + "eval_recall_macro": 0.7951624594468667, + "eval_runtime": 0.2469, + "eval_samples_per_second": 660.315, + "eval_steps_per_second": 4.051, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 3978 + }, + { + "epoch": 153.84615384615384, + "grad_norm": 13863.017578125, + "learning_rate": 8.743443888522679e-07, + "loss": 0.244, + "step": 4000 + }, + { + "epoch": 154.0, + "eval_accuracy": 0.8986170429123449, + "eval_auc": 0.9239073328287097, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6674449633088726, + "eval_f1_macro": 0.8038184624582756, + "eval_loss": 0.27161940932273865, + "eval_pr_auc": 0.6577434889769634, + "eval_precision": 0.6911917098445596, + "eval_precision_macro": 0.8128050601926549, + "eval_pred_class_0": 16773, + "eval_pred_class_1": 2895, + "eval_predicted_binding_ratio": 0.1471934106162294, + "eval_recall": 0.6452757175104805, + "eval_recall_macro": 0.7956565102914266, + "eval_runtime": 0.2311, + "eval_samples_per_second": 705.281, + "eval_steps_per_second": 4.327, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4004 + }, + { + "epoch": 155.0, + "eval_accuracy": 0.8983119788488916, + "eval_auc": 0.9240325316952941, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6666666666666666, + "eval_f1_macro": 0.8033357331413487, + "eval_loss": 0.2715211510658264, + "eval_pr_auc": 0.6581299979478261, + "eval_precision": 0.689893066574681, + "eval_precision_macro": 0.812118099868532, + "eval_pred_class_0": 16769, + "eval_pred_class_1": 2899, + "eval_predicted_binding_ratio": 0.14739678665853162, + "eval_recall": 0.6449532408900355, + "eval_recall_macro": 0.7953443695848741, + "eval_runtime": 0.2265, + "eval_samples_per_second": 719.735, + "eval_steps_per_second": 4.416, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4030 + }, + { + "epoch": 156.0, + "eval_accuracy": 0.8987187309334961, + "eval_auc": 0.9240861966945435, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6688829787234043, + "eval_f1_macro": 0.8045495325789891, + "eval_loss": 0.27123013138771057, + "eval_pr_auc": 0.6585643355556502, + "eval_precision": 0.6902229845626072, + "eval_precision_macro": 0.8126098507842583, + "eval_pred_class_0": 16753, + "eval_pred_class_1": 2915, + "eval_predicted_binding_ratio": 0.14821029082774048, + "eval_recall": 0.6488229603353757, + "eval_recall_macro": 0.7971585073904801, + "eval_runtime": 0.2445, + "eval_samples_per_second": 666.601, + "eval_steps_per_second": 4.09, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4056 + }, + { + "epoch": 157.0, + "eval_accuracy": 0.8987695749440716, + "eval_auc": 0.9242226073999265, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.668111351891982, + "eval_f1_macro": 0.8041939607346642, + "eval_loss": 0.2712385952472687, + "eval_pr_auc": 0.6591839305732792, + "eval_precision": 0.6915113871635611, + "eval_precision_macro": 0.8130484783164258, + "eval_pred_class_0": 16770, + "eval_pred_class_1": 2898, + "eval_predicted_binding_ratio": 0.14734594264795606, + "eval_recall": 0.6462431473718155, + "eval_recall_macro": 0.7961402252220942, + "eval_runtime": 0.2571, + "eval_samples_per_second": 634.046, + "eval_steps_per_second": 3.89, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4082 + }, + { + "epoch": 158.0, + "eval_accuracy": 0.8989729509863738, + "eval_auc": 0.9241500613527003, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6699883740242485, + "eval_f1_macro": 0.8051727852411502, + "eval_loss": 0.27118799090385437, + "eval_pr_auc": 0.6587023192472763, + "eval_precision": 0.6907534246575342, + "eval_precision_macro": 0.8130146392454138, + "eval_pred_class_0": 16748, + "eval_pred_class_1": 2920, + "eval_predicted_binding_ratio": 0.14846451088061827, + "eval_recall": 0.6504353434376008, + "eval_recall_macro": 0.7979646989415927, + "eval_runtime": 0.2165, + "eval_samples_per_second": 752.927, + "eval_steps_per_second": 4.619, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4108 + }, + { + "epoch": 159.0, + "eval_accuracy": 0.8990237949969494, + "eval_auc": 0.9243365652302152, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6693306693306693, + "eval_f1_macro": 0.8048723553674049, + "eval_loss": 0.2712218463420868, + "eval_pr_auc": 0.6594296109425748, + "eval_precision": 0.6919104991394148, + "eval_precision_macro": 0.8134133417966358, + "eval_pred_class_0": 16763, + "eval_pred_class_1": 2905, + "eval_predicted_binding_ratio": 0.14770185072198494, + "eval_recall": 0.6481780070944857, + "eval_recall_macro": 0.7970774746041632, + "eval_runtime": 0.256, + "eval_samples_per_second": 636.627, + "eval_steps_per_second": 3.906, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4134 + }, + { + "epoch": 160.0, + "eval_accuracy": 0.8990746390075249, + "eval_auc": 0.9243664342695147, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6709762970329852, + "eval_f1_macro": 0.805686028587357, + "eval_loss": 0.2712063789367676, + "eval_pr_auc": 0.6596905428752633, + "eval_precision": 0.6903137789904502, + "eval_precision_macro": 0.8129807422676916, + "eval_pred_class_0": 16736, + "eval_pred_class_1": 2932, + "eval_predicted_binding_ratio": 0.1490746390075249, + "eval_recall": 0.6526926797807159, + "eval_recall_macro": 0.7989424647168202, + "eval_runtime": 0.1787, + "eval_samples_per_second": 912.327, + "eval_steps_per_second": 5.597, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4160 + }, + { + "epoch": 161.0, + "eval_accuracy": 0.8989729509863738, + "eval_auc": 0.9244822704721024, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6700979578283247, + "eval_f1_macro": 0.8052257867669526, + "eval_loss": 0.2710643708705902, + "eval_pr_auc": 0.6603246471492675, + "eval_precision": 0.6906228610540726, + "eval_precision_macro": 0.8129753502690642, + "eval_pred_class_0": 16746, + "eval_pred_class_1": 2922, + "eval_predicted_binding_ratio": 0.14856619890176936, + "eval_recall": 0.6507578200580458, + "eval_recall_macro": 0.7980957567725492, + "eval_runtime": 0.2165, + "eval_samples_per_second": 752.853, + "eval_steps_per_second": 4.619, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4186 + }, + { + "epoch": 162.0, + "eval_accuracy": 0.8989729509863738, + "eval_auc": 0.9245231275027241, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6700979578283247, + "eval_f1_macro": 0.8052257867669526, + "eval_loss": 0.2710554301738739, + "eval_pr_auc": 0.6603878428843051, + "eval_precision": 0.6906228610540726, + "eval_precision_macro": 0.8129753502690642, + "eval_pred_class_0": 16746, + "eval_pred_class_1": 2922, + "eval_predicted_binding_ratio": 0.14856619890176936, + "eval_recall": 0.6507578200580458, + "eval_recall_macro": 0.7980957567725492, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.523, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4212 + }, + { + "epoch": 163.0, + "eval_accuracy": 0.8993288590604027, + "eval_auc": 0.9246038585815736, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6719681908548708, + "eval_f1_macro": 0.8062543656977057, + "eval_loss": 0.27094030380249023, + "eval_pr_auc": 0.6606722300563197, + "eval_precision": 0.6909710391822828, + "eval_precision_macro": 0.8134231279100321, + "eval_pred_class_0": 16733, + "eval_pred_class_1": 2935, + "eval_predicted_binding_ratio": 0.14922717103925157, + "eval_recall": 0.653982586262496, + "eval_recall_macro": 0.7996175984369762, + "eval_runtime": 0.2218, + "eval_samples_per_second": 735.026, + "eval_steps_per_second": 4.509, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4238 + }, + { + "epoch": 164.0, + "eval_accuracy": 0.8992271710392515, + "eval_auc": 0.9247262642209572, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6713101160862355, + "eval_f1_macro": 0.8059006594362601, + "eval_loss": 0.2709755003452301, + "eval_pr_auc": 0.6610856256039915, + "eval_precision": 0.691020826220553, + "eval_precision_macro": 0.8133400325618567, + "eval_pred_class_0": 16739, + "eval_pred_class_1": 2929, + "eval_predicted_binding_ratio": 0.14892210697579825, + "eval_recall": 0.6526926797807159, + "eval_recall_macro": 0.7990330061546183, + "eval_runtime": 0.2493, + "eval_samples_per_second": 653.878, + "eval_steps_per_second": 4.012, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4264 + }, + { + "epoch": 165.0, + "eval_accuracy": 0.8998881431767338, + "eval_auc": 0.9247699826062725, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6738446248136492, + "eval_f1_macro": 0.8073568599908361, + "eval_loss": 0.2707850933074951, + "eval_pr_auc": 0.6613417671448518, + "eval_precision": 0.6927792915531336, + "eval_precision_macro": 0.8145046350187375, + "eval_pred_class_0": 16732, + "eval_pred_class_1": 2936, + "eval_predicted_binding_ratio": 0.14927801504982713, + "eval_recall": 0.6559174459851661, + "eval_recall_macro": 0.8007359306946413, + "eval_runtime": 0.1724, + "eval_samples_per_second": 945.745, + "eval_steps_per_second": 5.802, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4290 + }, + { + "epoch": 166.0, + "eval_accuracy": 0.9001423632296115, + "eval_auc": 0.9248060123174118, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6753719008264463, + "eval_f1_macro": 0.8081840577256068, + "eval_loss": 0.2708885669708252, + "eval_pr_auc": 0.6612557604235284, + "eval_precision": 0.6927772126144456, + "eval_precision_macro": 0.8147479579430862, + "eval_pred_class_0": 16719, + "eval_pred_class_1": 2949, + "eval_predicted_binding_ratio": 0.14993898718730933, + "eval_recall": 0.6588197355691713, + "eval_recall_macro": 0.8020663535695799, + "eval_runtime": 0.2656, + "eval_samples_per_second": 613.671, + "eval_steps_per_second": 3.765, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4316 + }, + { + "epoch": 167.0, + "eval_accuracy": 0.8999389871873094, + "eval_auc": 0.9248204358808662, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6741721854304635, + "eval_f1_macro": 0.8075329932438238, + "eval_loss": 0.270906925201416, + "eval_pr_auc": 0.6612342465257918, + "eval_precision": 0.692752636951344, + "eval_precision_macro": 0.8145453662370445, + "eval_pred_class_0": 16729, + "eval_pred_class_1": 2939, + "eval_predicted_binding_ratio": 0.1494305470815538, + "eval_recall": 0.6565623992260561, + "eval_recall_macro": 0.8010282268358203, + "eval_runtime": 0.2263, + "eval_samples_per_second": 720.252, + "eval_steps_per_second": 4.419, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4342 + }, + { + "epoch": 168.0, + "eval_accuracy": 0.900091519219036, + "eval_auc": 0.9249249239896699, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6750454770960806, + "eval_f1_macro": 0.8080084845902765, + "eval_loss": 0.2706829011440277, + "eval_pr_auc": 0.6618605064537387, + "eval_precision": 0.6928038017651053, + "eval_precision_macro": 0.8147071275300828, + "eval_pred_class_0": 16722, + "eval_pred_class_1": 2946, + "eval_predicted_binding_ratio": 0.14978645515558267, + "eval_recall": 0.6581747823282812, + "eval_recall_macro": 0.8017740574284009, + "eval_runtime": 0.2625, + "eval_samples_per_second": 620.89, + "eval_steps_per_second": 3.809, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4368 + }, + { + "epoch": 169.0, + "eval_accuracy": 0.8999898311978849, + "eval_auc": 0.9250021027063997, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6742838218248054, + "eval_f1_macro": 0.8076047153692607, + "eval_loss": 0.2705872058868408, + "eval_pr_auc": 0.6621173041985378, + "eval_precision": 0.6929884275017019, + "eval_precision_macro": 0.8146651641393745, + "eval_pred_class_0": 16730, + "eval_pred_class_1": 2938, + "eval_predicted_binding_ratio": 0.14937970307097823, + "eval_recall": 0.6565623992260561, + "eval_recall_macro": 0.8010584073150864, + "eval_runtime": 0.2622, + "eval_samples_per_second": 621.716, + "eval_steps_per_second": 3.814, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4394 + }, + { + "epoch": 170.0, + "eval_accuracy": 0.900091519219036, + "eval_auc": 0.9250495097088196, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6750454770960806, + "eval_f1_macro": 0.8080084845902765, + "eval_loss": 0.2706546485424042, + "eval_pr_auc": 0.6620136434657915, + "eval_precision": 0.6928038017651053, + "eval_precision_macro": 0.8147071275300828, + "eval_pred_class_0": 16722, + "eval_pred_class_1": 2946, + "eval_predicted_binding_ratio": 0.14978645515558267, + "eval_recall": 0.6581747823282812, + "eval_recall_macro": 0.8017740574284009, + "eval_runtime": 0.2696, + "eval_samples_per_second": 604.539, + "eval_steps_per_second": 3.709, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4420 + }, + { + "epoch": 171.0, + "eval_accuracy": 0.8999389871873094, + "eval_auc": 0.9250856659424456, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6748182419035029, + "eval_f1_macro": 0.8078453665953039, + "eval_loss": 0.27060601115226746, + "eval_pr_auc": 0.6621340116082275, + "eval_precision": 0.6919688241274145, + "eval_precision_macro": 0.814310068581025, + "eval_pred_class_0": 16717, + "eval_pred_class_1": 2951, + "eval_predicted_binding_ratio": 0.15004067520846046, + "eval_recall": 0.6584972589487262, + "eval_recall_macro": 0.8018145738215594, + "eval_runtime": 0.2527, + "eval_samples_per_second": 645.143, + "eval_steps_per_second": 3.958, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4446 + }, + { + "epoch": 172.0, + "eval_accuracy": 0.9000406752084604, + "eval_auc": 0.9252334636716082, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6741796486576069, + "eval_f1_macro": 0.8075720776469225, + "eval_loss": 0.27041611075401306, + "eval_pr_auc": 0.6630610344326174, + "eval_precision": 0.6934878963518581, + "eval_precision_macro": 0.8148646532849819, + "eval_pred_class_0": 16735, + "eval_pred_class_1": 2933, + "eval_predicted_binding_ratio": 0.14912548301810047, + "eval_recall": 0.6559174459851661, + "eval_recall_macro": 0.8008264721324394, + "eval_runtime": 0.2332, + "eval_samples_per_second": 698.832, + "eval_steps_per_second": 4.287, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4472 + }, + { + "epoch": 173.0, + "eval_accuracy": 0.8998372991661582, + "eval_auc": 0.9253198298673536, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6740569159497022, + "eval_f1_macro": 0.8074417704823604, + "eval_loss": 0.2705075442790985, + "eval_pr_auc": 0.662984991174244, + "eval_precision": 0.6921508664627931, + "eval_precision_macro": 0.8142667635751932, + "eval_pred_class_0": 16725, + "eval_pred_class_1": 2943, + "eval_predicted_binding_ratio": 0.149633923123856, + "eval_recall": 0.6568848758465011, + "eval_recall_macro": 0.8010989237082449, + "eval_runtime": 0.2134, + "eval_samples_per_second": 763.793, + "eval_steps_per_second": 4.686, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4498 + }, + { + "epoch": 173.07692307692307, + "grad_norm": 15784.1748046875, + "learning_rate": 8.266086590174684e-07, + "loss": 0.2376, + "step": 4500 + }, + { + "epoch": 174.0, + "eval_accuracy": 0.8999898311978849, + "eval_auc": 0.9254431016991443, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6740679370339685, + "eval_f1_macro": 0.8075003208787752, + "eval_loss": 0.2703414559364319, + "eval_pr_auc": 0.6637127837233647, + "eval_precision": 0.6932515337423313, + "eval_precision_macro": 0.8147445669189726, + "eval_pred_class_0": 16734, + "eval_pred_class_1": 2934, + "eval_predicted_binding_ratio": 0.14917632702867603, + "eval_recall": 0.6559174459851661, + "eval_recall_macro": 0.8007962916531735, + "eval_runtime": 0.1979, + "eval_samples_per_second": 823.53, + "eval_steps_per_second": 5.052, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4524 + }, + { + "epoch": 175.0, + "eval_accuracy": 0.9001423632296115, + "eval_auc": 0.9255452150782025, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6746189529489728, + "eval_f1_macro": 0.8078199869849969, + "eval_loss": 0.27038928866386414, + "eval_pr_auc": 0.6639609479782242, + "eval_precision": 0.6936967632027258, + "eval_precision_macro": 0.8150250385068789, + "eval_pred_class_0": 16733, + "eval_pred_class_1": 2935, + "eval_predicted_binding_ratio": 0.14922717103925157, + "eval_recall": 0.6565623992260561, + "eval_recall_macro": 0.8011489487528844, + "eval_runtime": 0.2587, + "eval_samples_per_second": 630.051, + "eval_steps_per_second": 3.865, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4550 + }, + { + "epoch": 176.0, + "eval_accuracy": 0.8999898311978849, + "eval_auc": 0.9255934298780361, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6742838218248054, + "eval_f1_macro": 0.8076047153692607, + "eval_loss": 0.2702932059764862, + "eval_pr_auc": 0.6640830183725597, + "eval_precision": 0.6929884275017019, + "eval_precision_macro": 0.8146651641393745, + "eval_pred_class_0": 16730, + "eval_pred_class_1": 2938, + "eval_predicted_binding_ratio": 0.14937970307097823, + "eval_recall": 0.6565623992260561, + "eval_recall_macro": 0.8010584073150864, + "eval_runtime": 0.1918, + "eval_samples_per_second": 849.843, + "eval_steps_per_second": 5.214, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4576 + }, + { + "epoch": 177.0, + "eval_accuracy": 0.8998881431767338, + "eval_auc": 0.9255964274877148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6745992397950752, + "eval_f1_macro": 0.8077217319600283, + "eval_loss": 0.27021023631095886, + "eval_pr_auc": 0.6640237853140233, + "eval_precision": 0.691864406779661, + "eval_precision_macro": 0.8142298466485935, + "eval_pred_class_0": 16718, + "eval_pred_class_1": 2950, + "eval_predicted_binding_ratio": 0.1499898311978849, + "eval_recall": 0.6581747823282812, + "eval_recall_macro": 0.8016533355113369, + "eval_runtime": 0.2437, + "eval_samples_per_second": 668.962, + "eval_steps_per_second": 4.104, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4602 + }, + { + "epoch": 178.0, + "eval_accuracy": 0.900549115314216, + "eval_auc": 0.9257395146873824, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6756218905472637, + "eval_f1_macro": 0.8084468667292255, + "eval_loss": 0.27011793851852417, + "eval_pr_auc": 0.6647736112265655, + "eval_precision": 0.695459201092523, + "eval_precision_macro": 0.8159475347119822, + "eval_pred_class_0": 16739, + "eval_pred_class_1": 2929, + "eval_predicted_binding_ratio": 0.14892210697579825, + "eval_recall": 0.6568848758465011, + "eval_recall_macro": 0.801521450417969, + "eval_runtime": 0.2294, + "eval_samples_per_second": 710.491, + "eval_steps_per_second": 4.359, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4628 + }, + { + "epoch": 179.0, + "eval_accuracy": 0.9003965832824893, + "eval_auc": 0.9258087711499611, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6755010766937221, + "eval_f1_macro": 0.8083352405901716, + "eval_loss": 0.2701837122440338, + "eval_pr_auc": 0.6649310182754444, + "eval_precision": 0.6944822888283378, + "eval_precision_macro": 0.8155055479522995, + "eval_pred_class_0": 16732, + "eval_pred_class_1": 2936, + "eval_predicted_binding_ratio": 0.14927801504982713, + "eval_recall": 0.6575298290873912, + "eval_recall_macro": 0.8016930246420839, + "eval_runtime": 0.1963, + "eval_samples_per_second": 830.526, + "eval_steps_per_second": 5.095, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4654 + }, + { + "epoch": 180.0, + "eval_accuracy": 0.9004474272930649, + "eval_auc": 0.9258260560681089, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6762566137566137, + "eval_f1_macro": 0.8087183092815753, + "eval_loss": 0.27005937695503235, + "eval_pr_auc": 0.6649666141685525, + "eval_precision": 0.6939260264675942, + "eval_precision_macro": 0.8153859544454471, + "eval_pred_class_0": 16721, + "eval_pred_class_1": 2947, + "eval_predicted_binding_ratio": 0.14983729916615823, + "eval_recall": 0.6594646888100613, + "eval_recall_macro": 0.802509552107089, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.51, + "eval_steps_per_second": 3.85, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4680 + }, + { + "epoch": 181.0, + "eval_accuracy": 0.9005999593247915, + "eval_auc": 0.9259436830505047, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6764851894754261, + "eval_f1_macro": 0.8088820685009666, + "eval_loss": 0.2699625492095947, + "eval_pr_auc": 0.6656768815498022, + "eval_precision": 0.6947654656696125, + "eval_precision_macro": 0.8157852199805673, + "eval_pred_class_0": 16726, + "eval_pred_class_1": 2942, + "eval_predicted_binding_ratio": 0.14958307911328045, + "eval_recall": 0.6591422121896162, + "eval_recall_macro": 0.8024690357139305, + "eval_runtime": 0.1825, + "eval_samples_per_second": 893.316, + "eval_steps_per_second": 5.48, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4706 + }, + { + "epoch": 182.0, + "eval_accuracy": 0.9011083994305471, + "eval_auc": 0.9259346220939755, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6787778695293146, + "eval_f1_macro": 0.8101680579881181, + "eval_loss": 0.2698967456817627, + "eval_pr_auc": 0.6657214869012321, + "eval_precision": 0.6956668923493569, + "eval_precision_macro": 0.8165423129929146, + "eval_pred_class_0": 16714, + "eval_pred_class_1": 2954, + "eval_predicted_binding_ratio": 0.15019320724018712, + "eval_recall": 0.6626894550145115, + "eval_recall_macro": 0.8042124766471122, + "eval_runtime": 0.2487, + "eval_samples_per_second": 655.302, + "eval_steps_per_second": 4.02, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4732 + }, + { + "epoch": 183.0, + "eval_accuracy": 0.9010575554199716, + "eval_auc": 0.9260415822575143, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6782407407407407, + "eval_f1_macro": 0.8098906179070202, + "eval_loss": 0.26987963914871216, + "eval_pr_auc": 0.6659658035928079, + "eval_precision": 0.6959619952494062, + "eval_precision_macro": 0.8165833539431051, + "eval_pred_class_0": 16721, + "eval_pred_class_1": 2947, + "eval_predicted_binding_ratio": 0.14983729916615823, + "eval_recall": 0.6613995485327314, + "eval_recall_macro": 0.8036580648440201, + "eval_runtime": 0.2595, + "eval_samples_per_second": 628.055, + "eval_steps_per_second": 3.853, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4758 + }, + { + "epoch": 184.0, + "eval_accuracy": 0.9010575554199716, + "eval_auc": 0.9260998701937684, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6781343036718491, + "eval_f1_macro": 0.8098391554406106, + "eval_loss": 0.26986023783683777, + "eval_pr_auc": 0.6661468987350531, + "eval_precision": 0.6960950764006791, + "eval_precision_macro": 0.8166237506024205, + "eval_pred_class_0": 16723, + "eval_pred_class_1": 2945, + "eval_predicted_binding_ratio": 0.1497356111450071, + "eval_recall": 0.6610770719122864, + "eval_recall_macro": 0.8035270070130636, + "eval_runtime": 0.237, + "eval_samples_per_second": 687.681, + "eval_steps_per_second": 4.219, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4784 + }, + { + "epoch": 185.0, + "eval_accuracy": 0.9012609314622737, + "eval_auc": 0.9262103340569317, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6787958981144558, + "eval_f1_macro": 0.810230030763446, + "eval_loss": 0.26986950635910034, + "eval_pr_auc": 0.6665273243194801, + "eval_precision": 0.6967741935483871, + "eval_precision_macro": 0.8170231070594294, + "eval_pred_class_0": 16723, + "eval_pred_class_1": 2945, + "eval_predicted_binding_ratio": 0.1497356111450071, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8039098445920405, + "eval_runtime": 0.2365, + "eval_samples_per_second": 689.338, + "eval_steps_per_second": 4.229, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4810 + }, + { + "epoch": 186.0, + "eval_accuracy": 0.9014134634940004, + "eval_auc": 0.9262454683781669, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6797687861271676, + "eval_f1_macro": 0.8107536578092345, + "eval_loss": 0.2697572410106659, + "eval_pr_auc": 0.666716804071224, + "eval_precision": 0.6966824644549763, + "eval_precision_macro": 0.8171398441695726, + "eval_pred_class_0": 16714, + "eval_pred_class_1": 2954, + "eval_predicted_binding_ratio": 0.15019320724018712, + "eval_recall": 0.6636568848758465, + "eval_recall_macro": 0.8047867330155776, + "eval_runtime": 0.1794, + "eval_samples_per_second": 908.74, + "eval_steps_per_second": 5.575, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4836 + }, + { + "epoch": 187.0, + "eval_accuracy": 0.9011083994305471, + "eval_auc": 0.9263210315000697, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6784592494627211, + "eval_f1_macro": 0.810014015033881, + "eval_loss": 0.26983824372291565, + "eval_pr_auc": 0.6669884807739552, + "eval_precision": 0.6960651289009498, + "eval_precision_macro": 0.8166629472255945, + "eval_pred_class_0": 16720, + "eval_pred_class_1": 2948, + "eval_predicted_binding_ratio": 0.14988814317673377, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8038193031542425, + "eval_runtime": 0.1795, + "eval_samples_per_second": 908.044, + "eval_steps_per_second": 5.571, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4862 + }, + { + "epoch": 188.0, + "eval_accuracy": 0.9012100874516982, + "eval_auc": 0.9264191350895575, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6786836447825368, + "eval_f1_macro": 0.8101580079180191, + "eval_loss": 0.26967287063598633, + "eval_pr_auc": 0.667515048707415, + "eval_precision": 0.6965376782077393, + "eval_precision_macro": 0.8169029737767557, + "eval_pred_class_0": 16722, + "eval_pred_class_1": 2946, + "eval_predicted_binding_ratio": 0.14978645515558267, + "eval_recall": 0.6617220251531764, + "eval_recall_macro": 0.8038796641127746, + "eval_runtime": 0.1851, + "eval_samples_per_second": 880.782, + "eval_steps_per_second": 5.404, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4888 + }, + { + "epoch": 189.0, + "eval_accuracy": 0.9013626194834249, + "eval_auc": 0.9265750497228504, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6782752902155887, + "eval_f1_macro": 0.8100137635248964, + "eval_loss": 0.2697126567363739, + "eval_pr_auc": 0.6680073523436961, + "eval_precision": 0.698190508706043, + "eval_precision_macro": 0.8175521514197519, + "eval_pred_class_0": 16739, + "eval_pred_class_1": 2929, + "eval_predicted_binding_ratio": 0.14892210697579825, + "eval_recall": 0.6594646888100613, + "eval_recall_macro": 0.8030528007338771, + "eval_runtime": 0.2626, + "eval_samples_per_second": 620.766, + "eval_steps_per_second": 3.808, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4914 + }, + { + "epoch": 190.0, + "eval_accuracy": 0.9017185275574537, + "eval_auc": 0.9265310393625665, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6808651147432723, + "eval_f1_macro": 0.8113902183590456, + "eval_loss": 0.26952171325683594, + "eval_pr_auc": 0.6676611850618266, + "eval_precision": 0.6975642760487145, + "eval_precision_macro": 0.8176966904417818, + "eval_pred_class_0": 16712, + "eval_pred_class_1": 2956, + "eval_predicted_binding_ratio": 0.1502948952613382, + "eval_recall": 0.6649467913576266, + "eval_recall_macro": 0.8054920472149997, + "eval_runtime": 0.2625, + "eval_samples_per_second": 620.883, + "eval_steps_per_second": 3.809, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4940 + }, + { + "epoch": 191.0, + "eval_accuracy": 0.901921903599756, + "eval_auc": 0.9266825451738318, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6812097174020822, + "eval_f1_macro": 0.8116278420268636, + "eval_loss": 0.2694892883300781, + "eval_pr_auc": 0.668268169384226, + "eval_precision": 0.6986440677966101, + "eval_precision_macro": 0.8182178348314311, + "eval_pred_class_0": 16718, + "eval_pred_class_1": 2950, + "eval_predicted_binding_ratio": 0.1499898311978849, + "eval_recall": 0.6646243147371815, + "eval_recall_macro": 0.8054817113011072, + "eval_runtime": 0.2617, + "eval_samples_per_second": 622.763, + "eval_steps_per_second": 3.821, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4966 + }, + { + "epoch": 192.0, + "eval_accuracy": 0.9018710595891803, + "eval_auc": 0.9266840147811743, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6814130075932651, + "eval_f1_macro": 0.8117083668893665, + "eval_loss": 0.26940062642097473, + "eval_pr_auc": 0.6682450098479531, + "eval_precision": 0.6980047345282381, + "eval_precision_macro": 0.817974900326174, + "eval_pred_class_0": 16711, + "eval_pred_class_1": 2957, + "eval_predicted_binding_ratio": 0.15034573927191378, + "eval_recall": 0.6655917445985166, + "eval_recall_macro": 0.8058447043147107, + "eval_runtime": 0.2532, + "eval_samples_per_second": 643.715, + "eval_steps_per_second": 3.949, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 4992 + }, + { + "epoch": 192.30769230769232, + "grad_norm": 15858.0107421875, + "learning_rate": 7.72994743624204e-07, + "loss": 0.2316, + "step": 5000 + }, + { + "epoch": 193.0, + "eval_accuracy": 0.9021761236526337, + "eval_auc": 0.9268377687996988, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6816677696889477, + "eval_f1_macro": 0.8119380540142443, + "eval_loss": 0.26932862401008606, + "eval_pr_auc": 0.6692251134414691, + "eval_precision": 0.6999660210669385, + "eval_precision_macro": 0.8188619343002854, + "eval_pred_class_0": 16725, + "eval_pred_class_1": 2943, + "eval_predicted_binding_ratio": 0.149633923123856, + "eval_recall": 0.6643018381167365, + "eval_recall_macro": 0.8055015558664808, + "eval_runtime": 0.1767, + "eval_samples_per_second": 922.594, + "eval_steps_per_second": 5.66, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5018 + }, + { + "epoch": 194.0, + "eval_accuracy": 0.9022269676632093, + "eval_auc": 0.92693509378927, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6815697963238947, + "eval_f1_macro": 0.8119084228038069, + "eval_loss": 0.2693846523761749, + "eval_pr_auc": 0.6695232673057094, + "eval_precision": 0.7004765146358066, + "eval_precision_macro": 0.8190667092007484, + "eval_pred_class_0": 16730, + "eval_pred_class_1": 2938, + "eval_predicted_binding_ratio": 0.14937970307097823, + "eval_recall": 0.6636568848758465, + "eval_recall_macro": 0.8052696206838338, + "eval_runtime": 0.2611, + "eval_samples_per_second": 624.258, + "eval_steps_per_second": 3.83, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5044 + }, + { + "epoch": 195.0, + "eval_accuracy": 0.9022778116737848, + "eval_auc": 0.926939395553809, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6825239511067063, + "eval_f1_macro": 0.8123875088746679, + "eval_loss": 0.269380122423172, + "eval_pr_auc": 0.6693235837806535, + "eval_precision": 0.6996274974602099, + "eval_precision_macro": 0.8188535333546936, + "eval_pred_class_0": 16715, + "eval_pred_class_1": 2953, + "eval_predicted_binding_ratio": 0.15014236322961155, + "eval_recall": 0.6662366978394066, + "eval_recall_macro": 0.8063482638107518, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.818, + "eval_steps_per_second": 3.944, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5070 + }, + { + "epoch": 196.0, + "eval_accuracy": 0.9018202155786048, + "eval_auc": 0.9269277165550606, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6819304892110031, + "eval_f1_macro": 0.8119407443800393, + "eval_loss": 0.2693455219268799, + "eval_pr_auc": 0.6690854783865479, + "eval_precision": 0.696969696969697, + "eval_precision_macro": 0.8176128877709905, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.6675266043211867, + "eval_recall_macro": 0.8066008708211837, + "eval_runtime": 0.2679, + "eval_samples_per_second": 608.505, + "eval_steps_per_second": 3.733, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5096 + }, + { + "epoch": 197.0, + "eval_accuracy": 0.9018202155786048, + "eval_auc": 0.926915803976337, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6826622843056697, + "eval_f1_macro": 0.8122944214527055, + "eval_loss": 0.2691311538219452, + "eval_pr_auc": 0.6692617138980786, + "eval_precision": 0.6960455764075067, + "eval_precision_macro": 0.8173347038115213, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6697839406643018, + "eval_recall_macro": 0.8075182756378791, + "eval_runtime": 0.261, + "eval_samples_per_second": 624.485, + "eval_steps_per_second": 3.831, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5122 + }, + { + "epoch": 198.0, + "eval_accuracy": 0.9023794996949359, + "eval_auc": 0.9270834170733764, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6822244289970208, + "eval_f1_macro": 0.8122781903500151, + "eval_loss": 0.26926350593566895, + "eval_pr_auc": 0.6700425139918407, + "eval_precision": 0.7007820469228153, + "eval_precision_macro": 0.8193035600788525, + "eval_pred_class_0": 16727, + "eval_pred_class_1": 2941, + "eval_predicted_binding_ratio": 0.1495322351027049, + "eval_recall": 0.6646243147371815, + "eval_recall_macro": 0.8057533356145012, + "eval_runtime": 0.1796, + "eval_samples_per_second": 907.821, + "eval_steps_per_second": 5.569, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5148 + }, + { + "epoch": 199.0, + "eval_accuracy": 0.9023286556843604, + "eval_auc": 0.9271362061477199, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6822167080231596, + "eval_f1_macro": 0.812256712426767, + "eval_loss": 0.26931333541870117, + "eval_pr_auc": 0.6701662107301889, + "eval_precision": 0.7004076086956522, + "eval_precision_macro": 0.8191406615590195, + "eval_pred_class_0": 16724, + "eval_pred_class_1": 2944, + "eval_predicted_binding_ratio": 0.14968476713443157, + "eval_recall": 0.6649467913576266, + "eval_recall_macro": 0.8058542129661919, + "eval_runtime": 0.2544, + "eval_samples_per_second": 640.682, + "eval_steps_per_second": 3.931, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5174 + }, + { + "epoch": 200.0, + "eval_accuracy": 0.9019727476103315, + "eval_auc": 0.9271006630615285, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6823723228995058, + "eval_f1_macro": 0.8122076248057319, + "eval_loss": 0.26923447847366333, + "eval_pr_auc": 0.6700054234044599, + "eval_precision": 0.6975412596833951, + "eval_precision_macro": 0.8179304597716335, + "eval_pred_class_0": 16699, + "eval_pred_class_1": 2969, + "eval_predicted_binding_ratio": 0.15095586739882041, + "eval_recall": 0.6678490809416318, + "eval_recall_macro": 0.8068224700899382, + "eval_runtime": 0.2001, + "eval_samples_per_second": 814.446, + "eval_steps_per_second": 4.997, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5200 + }, + { + "epoch": 201.0, + "eval_accuracy": 0.9022269676632093, + "eval_auc": 0.9272154481542286, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6827256228345158, + "eval_f1_macro": 0.8124672441745833, + "eval_loss": 0.2691180408000946, + "eval_pr_auc": 0.6705128874375396, + "eval_precision": 0.6989864864864865, + "eval_precision_macro": 0.8186098340979236, + "eval_pred_class_0": 16708, + "eval_pred_class_1": 2960, + "eval_predicted_binding_ratio": 0.15049827130364044, + "eval_recall": 0.6672041277007417, + "eval_recall_macro": 0.8067112568243553, + "eval_runtime": 0.2326, + "eval_samples_per_second": 700.909, + "eval_steps_per_second": 4.3, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5226 + }, + { + "epoch": 202.0, + "eval_accuracy": 0.9027354077689648, + "eval_auc": 0.9272916730860608, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6847915636842973, + "eval_f1_macro": 0.8136435649304945, + "eval_loss": 0.2689346969127655, + "eval_pr_auc": 0.6709998700001464, + "eval_precision": 0.7001347708894878, + "eval_precision_macro": 0.8194386429297739, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.6701064172847468, + "eval_recall_macro": 0.8081925820956238, + "eval_runtime": 0.1664, + "eval_samples_per_second": 979.381, + "eval_steps_per_second": 6.008, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5252 + }, + { + "epoch": 203.0, + "eval_accuracy": 0.9025828757372382, + "eval_auc": 0.9274415730349983, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6833057851239669, + "eval_f1_macro": 0.8128720237282395, + "eval_loss": 0.269077867269516, + "eval_pr_auc": 0.6716085952371595, + "eval_precision": 0.7009155645981688, + "eval_precision_macro": 0.819534880211639, + "eval_pred_class_0": 16719, + "eval_pred_class_1": 2949, + "eval_predicted_binding_ratio": 0.14993898718730933, + "eval_recall": 0.6665591744598517, + "eval_recall_macro": 0.8066604045173044, + "eval_runtime": 0.2556, + "eval_samples_per_second": 637.756, + "eval_steps_per_second": 3.913, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5278 + }, + { + "epoch": 204.0, + "eval_accuracy": 0.902837095790116, + "eval_auc": 0.9274026625041677, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6848095002474023, + "eval_f1_macro": 0.8136877723940104, + "eval_loss": 0.2691201865673065, + "eval_pr_auc": 0.6711363619469519, + "eval_precision": 0.700877785280216, + "eval_precision_macro": 0.8197612917781423, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.6694614640438569, + "eval_recall_macro": 0.8079908273922429, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.544, + "eval_steps_per_second": 3.868, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5304 + }, + { + "epoch": 205.0, + "eval_accuracy": 0.9029896278218426, + "eval_auc": 0.9274924350745481, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6850445691647409, + "eval_f1_macro": 0.8138546963859644, + "eval_loss": 0.2691231071949005, + "eval_pr_auc": 0.6713794419677425, + "eval_precision": 0.7017247210010146, + "eval_precision_macro": 0.8201640180913157, + "eval_pred_class_0": 16711, + "eval_pred_class_1": 2957, + "eval_predicted_binding_ratio": 0.15034573927191378, + "eval_recall": 0.6691389874234118, + "eval_recall_macro": 0.8079503109990844, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.331, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5330 + }, + { + "epoch": 206.0, + "eval_accuracy": 0.9031421598535693, + "eval_auc": 0.9275284258556915, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6860062633921213, + "eval_f1_macro": 0.8143728753012186, + "eval_loss": 0.26890990138053894, + "eval_pr_auc": 0.6717808840440014, + "eval_precision": 0.7016183412002697, + "eval_precision_macro": 0.8202739053624388, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6710738471460819, + "eval_recall_macro": 0.8088271994226215, + "eval_runtime": 0.2637, + "eval_samples_per_second": 618.148, + "eval_steps_per_second": 3.792, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5356 + }, + { + "epoch": 207.0, + "eval_accuracy": 0.9030913158429937, + "eval_auc": 0.9275997261430513, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6854785478547855, + "eval_f1_macro": 0.8141000144010073, + "eval_loss": 0.2689792513847351, + "eval_pr_auc": 0.6718662790463032, + "eval_precision": 0.7019263264616424, + "eval_precision_macro": 0.8203209943398044, + "eval_pred_class_0": 16709, + "eval_pred_class_1": 2959, + "eval_predicted_binding_ratio": 0.15044742729306487, + "eval_recall": 0.6697839406643018, + "eval_recall_macro": 0.8082727876195295, + "eval_runtime": 0.195, + "eval_samples_per_second": 835.813, + "eval_steps_per_second": 5.128, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5382 + }, + { + "epoch": 208.0, + "eval_accuracy": 0.9032946918852959, + "eval_auc": 0.9276906860783045, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6862421643022105, + "eval_f1_macro": 0.8145402081954642, + "eval_loss": 0.2688303291797638, + "eval_pr_auc": 0.6723694322774509, + "eval_precision": 0.7024653833164471, + "eval_precision_macro": 0.8206766373097469, + "eval_pred_class_0": 16707, + "eval_pred_class_1": 2961, + "eval_predicted_binding_ratio": 0.150549115314216, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.808786683029463, + "eval_runtime": 0.2599, + "eval_samples_per_second": 627.22, + "eval_steps_per_second": 3.848, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5408 + }, + { + "epoch": 209.0, + "eval_accuracy": 0.9031421598535693, + "eval_auc": 0.9277029684919884, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6851760039662865, + "eval_f1_macro": 0.8139715080669648, + "eval_loss": 0.2689387798309326, + "eval_pr_auc": 0.6722283267888528, + "eval_precision": 0.7027118644067797, + "eval_precision_macro": 0.8206106277411336, + "eval_pred_class_0": 16718, + "eval_pred_class_1": 2950, + "eval_predicted_binding_ratio": 0.1499898311978849, + "eval_recall": 0.6684940341825217, + "eval_recall_macro": 0.8077787367749694, + "eval_runtime": 0.2435, + "eval_samples_per_second": 669.46, + "eval_steps_per_second": 4.107, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5434 + }, + { + "epoch": 210.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9277600593308708, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6846370100876468, + "eval_f1_macro": 0.8136754097270521, + "eval_loss": 0.26892563700675964, + "eval_pr_auc": 0.6725269853749476, + "eval_precision": 0.7026476578411406, + "eval_precision_macro": 0.8204961767258567, + "eval_pred_class_0": 16722, + "eval_pred_class_1": 2946, + "eval_predicted_binding_ratio": 0.14978645515558267, + "eval_recall": 0.6675266043211867, + "eval_recall_macro": 0.807325202323568, + "eval_runtime": 0.2601, + "eval_samples_per_second": 626.763, + "eval_steps_per_second": 3.845, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5460 + }, + { + "epoch": 211.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9278453744167288, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.686128979053274, + "eval_f1_macro": 0.8144677293907912, + "eval_loss": 0.26878559589385986, + "eval_pr_auc": 0.6729846306066621, + "eval_precision": 0.7022282241728561, + "eval_precision_macro": 0.8205562286912407, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.808756502550197, + "eval_runtime": 0.2433, + "eval_samples_per_second": 669.879, + "eval_steps_per_second": 4.11, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5486 + }, + { + "epoch": 211.53846153846155, + "grad_norm": 16655.041015625, + "learning_rate": 7.144675667015729e-07, + "loss": 0.2259, + "step": 5500 + }, + { + "epoch": 212.0, + "eval_accuracy": 0.9028879398006915, + "eval_auc": 0.9279623006591996, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6845061116617113, + "eval_f1_macro": 0.8135588667797169, + "eval_loss": 0.26880088448524475, + "eval_pr_auc": 0.6734939312101108, + "eval_precision": 0.7016593294954284, + "eval_precision_macro": 0.8200489288817256, + "eval_pred_class_0": 16715, + "eval_pred_class_1": 2953, + "eval_predicted_binding_ratio": 0.15014236322961155, + "eval_recall": 0.6681715575620768, + "eval_recall_macro": 0.8074967765476829, + "eval_runtime": 0.2166, + "eval_samples_per_second": 752.433, + "eval_steps_per_second": 4.616, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5512 + }, + { + "epoch": 213.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9279055601902797, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6863359156090325, + "eval_f1_macro": 0.8145677594216373, + "eval_loss": 0.2687283456325531, + "eval_pr_auc": 0.6734074239428265, + "eval_precision": 0.7019554956169926, + "eval_precision_macro": 0.820472419105347, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6713963237665269, + "eval_recall_macro": 0.80901861821211, + "eval_runtime": 0.2336, + "eval_samples_per_second": 697.742, + "eval_steps_per_second": 4.281, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5538 + }, + { + "epoch": 214.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9279595755594916, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6864392815949909, + "eval_f1_macro": 0.8146177229810407, + "eval_loss": 0.2687055766582489, + "eval_pr_auc": 0.6734235549479375, + "eval_precision": 0.7018194070080862, + "eval_precision_macro": 0.8204306615878754, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8091496760430665, + "eval_runtime": 0.225, + "eval_samples_per_second": 724.337, + "eval_steps_per_second": 4.444, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5564 + }, + { + "epoch": 215.0, + "eval_accuracy": 0.9031421598535693, + "eval_auc": 0.9280330753916157, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.685383980181668, + "eval_f1_macro": 0.8140720567955604, + "eval_loss": 0.2687467932701111, + "eval_pr_auc": 0.6737104569152422, + "eval_precision": 0.7024373730534867, + "eval_precision_macro": 0.8205258541706347, + "eval_pred_class_0": 16714, + "eval_pred_class_1": 2954, + "eval_predicted_binding_ratio": 0.15019320724018712, + "eval_recall": 0.6691389874234118, + "eval_recall_macro": 0.8080408524368825, + "eval_runtime": 0.1833, + "eval_samples_per_second": 889.114, + "eval_steps_per_second": 5.455, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5590 + }, + { + "epoch": 216.0, + "eval_accuracy": 0.9031930038641448, + "eval_auc": 0.9281060496687963, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6860158311345647, + "eval_f1_macro": 0.8143952682963037, + "eval_loss": 0.26888203620910645, + "eval_pr_auc": 0.6738155361634312, + "eval_precision": 0.7019912251096861, + "eval_precision_macro": 0.8204358998939631, + "eval_pred_class_0": 16705, + "eval_pred_class_1": 2963, + "eval_predicted_binding_ratio": 0.1506508033353671, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.808726322070931, + "eval_runtime": 0.1764, + "eval_samples_per_second": 923.938, + "eval_steps_per_second": 5.668, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5616 + }, + { + "epoch": 217.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9281046287239484, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6865425794761983, + "eval_f1_macro": 0.8146676522813128, + "eval_loss": 0.2686736285686493, + "eval_pr_auc": 0.6740322097472393, + "eval_precision": 0.7016835016835017, + "eval_precision_macro": 0.8203890020095554, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.672041277007417, + "eval_recall_macro": 0.809280733874023, + "eval_runtime": 0.2537, + "eval_samples_per_second": 642.37, + "eval_steps_per_second": 3.941, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5642 + }, + { + "epoch": 218.0, + "eval_accuracy": 0.9033963799064471, + "eval_auc": 0.9282425576991689, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6862615587846763, + "eval_f1_macro": 0.814585106315415, + "eval_loss": 0.26872488856315613, + "eval_pr_auc": 0.6745604450622946, + "eval_precision": 0.7032148900169205, + "eval_precision_macro": 0.8210025266814094, + "eval_pred_class_0": 16713, + "eval_pred_class_1": 2955, + "eval_predicted_binding_ratio": 0.15024405125076265, + "eval_recall": 0.6701064172847468, + "eval_recall_macro": 0.808584928326082, + "eval_runtime": 0.2515, + "eval_samples_per_second": 648.077, + "eval_steps_per_second": 3.976, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5668 + }, + { + "epoch": 219.0, + "eval_accuracy": 0.9032946918852959, + "eval_auc": 0.9283728174652107, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6862421643022105, + "eval_f1_macro": 0.8145402081954642, + "eval_loss": 0.26861947774887085, + "eval_pr_auc": 0.675175157595335, + "eval_precision": 0.7024653833164471, + "eval_precision_macro": 0.8206766373097469, + "eval_pred_class_0": 16707, + "eval_pred_class_1": 2961, + "eval_predicted_binding_ratio": 0.150549115314216, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.808786683029463, + "eval_runtime": 0.269, + "eval_samples_per_second": 605.917, + "eval_steps_per_second": 3.717, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5694 + }, + { + "epoch": 220.0, + "eval_accuracy": 0.9035997559487492, + "eval_auc": 0.9283705984554486, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6875411997363217, + "eval_f1_macro": 0.8152747479984963, + "eval_loss": 0.2684967517852783, + "eval_pr_auc": 0.6752603675091132, + "eval_precision": 0.703067071115605, + "eval_precision_macro": 0.8211461336058236, + "eval_pred_class_0": 16701, + "eval_pred_class_1": 2967, + "eval_predicted_binding_ratio": 0.15085417937766932, + "eval_recall": 0.672686230248307, + "eval_recall_macro": 0.809754112890798, + "eval_runtime": 0.2663, + "eval_samples_per_second": 612.018, + "eval_steps_per_second": 3.755, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5720 + }, + { + "epoch": 221.0, + "eval_accuracy": 0.9037014439699004, + "eval_auc": 0.9285080992007146, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6871489924017179, + "eval_f1_macro": 0.8151206773197821, + "eval_loss": 0.2685548961162567, + "eval_pr_auc": 0.6758164431668767, + "eval_precision": 0.7043684388757196, + "eval_precision_macro": 0.8216427895844347, + "eval_pred_class_0": 16715, + "eval_pred_class_1": 2953, + "eval_predicted_binding_ratio": 0.15014236322961155, + "eval_recall": 0.6707513705256369, + "eval_recall_macro": 0.809028126863591, + "eval_runtime": 0.1986, + "eval_samples_per_second": 820.602, + "eval_steps_per_second": 5.034, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5746 + }, + { + "epoch": 222.0, + "eval_accuracy": 0.9034980679275981, + "eval_auc": 0.9285004105265384, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6870052770448549, + "eval_f1_macro": 0.8149801571567146, + "eval_loss": 0.2685752809047699, + "eval_pr_auc": 0.6755687553750968, + "eval_precision": 0.7030037124535943, + "eval_precision_macro": 0.8210319370409247, + "eval_pred_class_0": 16705, + "eval_pred_class_1": 2963, + "eval_predicted_binding_ratio": 0.1506508033353671, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8093005784393965, + "eval_runtime": 0.2128, + "eval_samples_per_second": 765.976, + "eval_steps_per_second": 4.699, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5772 + }, + { + "epoch": 223.0, + "eval_accuracy": 0.9033963799064471, + "eval_auc": 0.9285113303903685, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6871913072110636, + "eval_f1_macro": 0.815034532807023, + "eval_loss": 0.2685534358024597, + "eval_pr_auc": 0.675465436437485, + "eval_precision": 0.7019845274133871, + "eval_precision_macro": 0.8206238899420935, + "eval_pred_class_0": 16695, + "eval_pred_class_1": 2973, + "eval_predicted_binding_ratio": 0.15115924344112264, + "eval_recall": 0.673008706868752, + "eval_recall_macro": 0.8097644488046905, + "eval_runtime": 0.2565, + "eval_samples_per_second": 635.434, + "eval_steps_per_second": 3.898, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5798 + }, + { + "epoch": 224.0, + "eval_accuracy": 0.9033963799064471, + "eval_auc": 0.9286790602773953, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.685742639761826, + "eval_f1_macro": 0.814334221653217, + "eval_loss": 0.2686038315296173, + "eval_pr_auc": 0.6763975611431872, + "eval_precision": 0.7039049235993209, + "eval_precision_macro": 0.8212163498580232, + "eval_pred_class_0": 16723, + "eval_pred_class_1": 2945, + "eval_predicted_binding_ratio": 0.1497356111450071, + "eval_recall": 0.6684940341825217, + "eval_recall_macro": 0.8079296391712996, + "eval_runtime": 0.2587, + "eval_samples_per_second": 630.051, + "eval_steps_per_second": 3.865, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5824 + }, + { + "epoch": 225.0, + "eval_accuracy": 0.9030404718324181, + "eval_auc": 0.9286844910118134, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6857801944307135, + "eval_f1_macro": 0.8142280597608222, + "eval_loss": 0.268373966217041, + "eval_pr_auc": 0.6766849702960268, + "eval_precision": 0.7011455525606469, + "eval_precision_macro": 0.8200338541246348, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.6710738471460819, + "eval_recall_macro": 0.8087668384640894, + "eval_runtime": 0.2618, + "eval_samples_per_second": 622.527, + "eval_steps_per_second": 3.819, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5850 + }, + { + "epoch": 226.0, + "eval_accuracy": 0.9031421598535693, + "eval_auc": 0.9286265437130228, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6868321551865856, + "eval_f1_macro": 0.8147720454758898, + "eval_loss": 0.268480122089386, + "eval_pr_auc": 0.6762864798501788, + "eval_precision": 0.7005365526492288, + "eval_precision_macro": 0.8199434531195322, + "eval_pred_class_0": 16686, + "eval_pred_class_1": 2982, + "eval_predicted_binding_ratio": 0.15161683953630262, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8098756620702734, + "eval_runtime": 0.1801, + "eval_samples_per_second": 905.249, + "eval_steps_per_second": 5.554, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5876 + }, + { + "epoch": 227.0, + "eval_accuracy": 0.9034472239170226, + "eval_auc": 0.9286911091111043, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6878185106033208, + "eval_f1_macro": 0.8153554406082493, + "eval_loss": 0.2684793770313263, + "eval_pr_auc": 0.6763719243742072, + "eval_precision": 0.7015425888665325, + "eval_precision_macro": 0.8205363669491479, + "eval_pred_class_0": 16686, + "eval_pred_class_1": 2982, + "eval_predicted_binding_ratio": 0.15161683953630262, + "eval_recall": 0.6746210899709771, + "eval_recall_macro": 0.810449918438739, + "eval_runtime": 0.2526, + "eval_samples_per_second": 645.327, + "eval_steps_per_second": 3.959, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5902 + }, + { + "epoch": 228.0, + "eval_accuracy": 0.9032438478747203, + "eval_auc": 0.9287447935753516, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6870580496628844, + "eval_f1_macro": 0.8149167860703537, + "eval_loss": 0.2684626877307892, + "eval_pr_auc": 0.6765555444970285, + "eval_precision": 0.701006711409396, + "eval_precision_macro": 0.8201821668264622, + "eval_pred_class_0": 16688, + "eval_pred_class_1": 2980, + "eval_predicted_binding_ratio": 0.15151515151515152, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8099360230288054, + "eval_runtime": 0.253, + "eval_samples_per_second": 644.184, + "eval_steps_per_second": 3.952, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5928 + }, + { + "epoch": 229.0, + "eval_accuracy": 0.9037522879804759, + "eval_auc": 0.92885044958403, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6877783275606135, + "eval_f1_macro": 0.815442675636767, + "eval_loss": 0.2684047222137451, + "eval_pr_auc": 0.6771563538797724, + "eval_precision": 0.7039162727886563, + "eval_precision_macro": 0.8215498998326138, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.6723637536278619, + "eval_recall_macro": 0.8097135964976395, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.352, + "eval_steps_per_second": 3.886, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5954 + }, + { + "epoch": 230.0, + "eval_accuracy": 0.9031930038641448, + "eval_auc": 0.9288052907888691, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6866359447004609, + "eval_f1_macro": 0.814695001815053, + "eval_loss": 0.2683703601360321, + "eval_pr_auc": 0.6770692308922137, + "eval_precision": 0.7011764705882353, + "eval_precision_macro": 0.8201862703986524, + "eval_pred_class_0": 16693, + "eval_pred_class_1": 2975, + "eval_predicted_binding_ratio": 0.15126093146227373, + "eval_recall": 0.672686230248307, + "eval_recall_macro": 0.80951266905667, + "eval_runtime": 0.1803, + "eval_samples_per_second": 903.896, + "eval_steps_per_second": 5.545, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 5980 + }, + { + "epoch": 230.76923076923077, + "grad_norm": 18226.349609375, + "learning_rate": 6.520804793983146e-07, + "loss": 0.2213, + "step": 6000 + }, + { + "epoch": 231.0, + "eval_accuracy": 0.9036505999593248, + "eval_auc": 0.9289028104284194, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6881685041961494, + "eval_f1_macro": 0.8155957226774667, + "eval_loss": 0.26838722825050354, + "eval_pr_auc": 0.677382862396771, + "eval_precision": 0.7026209677419355, + "eval_precision_macro": 0.821056469972094, + "eval_pred_class_0": 16692, + "eval_pred_class_1": 2976, + "eval_predicted_binding_ratio": 0.1513117754728493, + "eval_recall": 0.6742986133505321, + "eval_recall_macro": 0.8104395825248465, + "eval_runtime": 0.2471, + "eval_samples_per_second": 659.676, + "eval_steps_per_second": 4.047, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6006 + }, + { + "epoch": 232.0, + "eval_accuracy": 0.9036505999593248, + "eval_auc": 0.9289413705892875, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6882710972199375, + "eval_f1_macro": 0.8156453059542872, + "eval_loss": 0.2681979238986969, + "eval_pr_auc": 0.6777962434595076, + "eval_precision": 0.7024848891873741, + "eval_precision_macro": 0.8210147633474318, + "eval_pred_class_0": 16690, + "eval_pred_class_1": 2978, + "eval_predicted_binding_ratio": 0.1514134634940004, + "eval_recall": 0.6746210899709771, + "eval_recall_macro": 0.8105706403558031, + "eval_runtime": 0.2338, + "eval_samples_per_second": 697.187, + "eval_steps_per_second": 4.277, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6032 + }, + { + "epoch": 233.0, + "eval_accuracy": 0.9039556640227782, + "eval_auc": 0.9290857424788171, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6879233437964646, + "eval_f1_macro": 0.8155838213438953, + "eval_loss": 0.2683457136154175, + "eval_pr_auc": 0.6782535995592411, + "eval_precision": 0.7052845528455285, + "eval_precision_macro": 0.8221624965711252, + "eval_pred_class_0": 16716, + "eval_pred_class_1": 2952, + "eval_predicted_binding_ratio": 0.150091519219036, + "eval_recall": 0.6713963237665269, + "eval_recall_macro": 0.8094411449218342, + "eval_runtime": 0.2259, + "eval_samples_per_second": 721.682, + "eval_steps_per_second": 4.427, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6058 + }, + { + "epoch": 234.0, + "eval_accuracy": 0.9040065080333537, + "eval_auc": 0.9291390181781085, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6881400726792204, + "eval_f1_macro": 0.8157063562723066, + "eval_loss": 0.26850852370262146, + "eval_pr_auc": 0.678201898193761, + "eval_precision": 0.7053843548933288, + "eval_precision_macro": 0.8222404873479507, + "eval_pred_class_0": 16715, + "eval_pred_class_1": 2953, + "eval_predicted_binding_ratio": 0.15014236322961155, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8096023832320567, + "eval_runtime": 0.2554, + "eval_samples_per_second": 638.187, + "eval_steps_per_second": 3.915, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6084 + }, + { + "epoch": 235.0, + "eval_accuracy": 0.9037014439699004, + "eval_auc": 0.9291422688327602, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.687767886580943, + "eval_f1_macro": 0.8154198615351363, + "eval_loss": 0.26855188608169556, + "eval_pr_auc": 0.6780321638936206, + "eval_precision": 0.7035413153456999, + "eval_precision_macro": 0.8213868942770528, + "eval_pred_class_0": 16703, + "eval_pred_class_1": 2965, + "eval_predicted_binding_ratio": 0.1507524913565182, + "eval_recall": 0.672686230248307, + "eval_recall_macro": 0.80981447384933, + "eval_runtime": 0.1711, + "eval_samples_per_second": 952.835, + "eval_steps_per_second": 5.846, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6110 + }, + { + "epoch": 236.0, + "eval_accuracy": 0.903853976001627, + "eval_auc": 0.9292183185796111, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6881081972620815, + "eval_f1_macro": 0.8156376648859622, + "eval_loss": 0.2684246003627777, + "eval_pr_auc": 0.6783782374353945, + "eval_precision": 0.7042538825118163, + "eval_precision_macro": 0.8217486340608884, + "eval_pred_class_0": 16706, + "eval_pred_class_1": 2962, + "eval_predicted_binding_ratio": 0.15059995932479153, + "eval_recall": 0.672686230248307, + "eval_recall_macro": 0.8099050152871281, + "eval_runtime": 0.2571, + "eval_samples_per_second": 633.985, + "eval_steps_per_second": 3.889, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6136 + }, + { + "epoch": 237.0, + "eval_accuracy": 0.9039048200122025, + "eval_auc": 0.9291723227895398, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6891447368421053, + "eval_f1_macro": 0.8161564434751782, + "eval_loss": 0.2682003080844879, + "eval_pr_auc": 0.678474943261759, + "eval_precision": 0.7032561262168513, + "eval_precision_macro": 0.8214884501897367, + "eval_pred_class_0": 16689, + "eval_pred_class_1": 2979, + "eval_predicted_binding_ratio": 0.15146430750457596, + "eval_recall": 0.6755885198323122, + "eval_recall_macro": 0.8111147162450025, + "eval_runtime": 0.2533, + "eval_samples_per_second": 643.44, + "eval_steps_per_second": 3.947, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6162 + }, + { + "epoch": 238.0, + "eval_accuracy": 0.9041081960545048, + "eval_auc": 0.9292599542101496, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6891891891891891, + "eval_f1_macro": 0.8162490373023017, + "eval_loss": 0.2683660686016083, + "eval_pr_auc": 0.6787382085049865, + "eval_precision": 0.704752275025278, + "eval_precision_macro": 0.8221384271958916, + "eval_pred_class_0": 16701, + "eval_pred_class_1": 2967, + "eval_predicted_binding_ratio": 0.15085417937766932, + "eval_recall": 0.6742986133505321, + "eval_recall_macro": 0.8107112068382407, + "eval_runtime": 0.251, + "eval_samples_per_second": 649.396, + "eval_steps_per_second": 3.984, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6188 + }, + { + "epoch": 239.0, + "eval_accuracy": 0.9039556640227782, + "eval_auc": 0.9292687329242089, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6886434811274106, + "eval_f1_macro": 0.8159319482645679, + "eval_loss": 0.2683408558368683, + "eval_pr_auc": 0.6788700814485856, + "eval_precision": 0.7043155765340526, + "eval_precision_macro": 0.8218620153057044, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.673653660109642, + "eval_recall_macro": 0.8103585497385295, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.65, + "eval_steps_per_second": 3.826, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6214 + }, + { + "epoch": 240.0, + "eval_accuracy": 0.9038031319910514, + "eval_auc": 0.9293738244479479, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6886109282422647, + "eval_f1_macro": 0.8158628904590758, + "eval_loss": 0.26828694343566895, + "eval_pr_auc": 0.6792947923537326, + "eval_precision": 0.7031932773109244, + "eval_precision_macro": 0.8213743898086402, + "eval_pred_class_0": 16693, + "eval_pred_class_1": 2975, + "eval_predicted_binding_ratio": 0.15126093146227373, + "eval_recall": 0.6746210899709771, + "eval_recall_macro": 0.8106611817936011, + "eval_runtime": 0.1692, + "eval_samples_per_second": 963.308, + "eval_steps_per_second": 5.91, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6240 + }, + { + "epoch": 241.0, + "eval_accuracy": 0.9039048200122025, + "eval_auc": 0.929388987681323, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6891447368421053, + "eval_f1_macro": 0.8161564434751782, + "eval_loss": 0.26832982897758484, + "eval_pr_auc": 0.679377997729221, + "eval_precision": 0.7032561262168513, + "eval_precision_macro": 0.8214884501897367, + "eval_pred_class_0": 16689, + "eval_pred_class_1": 2979, + "eval_predicted_binding_ratio": 0.15146430750457596, + "eval_recall": 0.6755885198323122, + "eval_recall_macro": 0.8111147162450025, + "eval_runtime": 0.2624, + "eval_samples_per_second": 621.159, + "eval_steps_per_second": 3.811, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6266 + }, + { + "epoch": 242.0, + "eval_accuracy": 0.9040065080333537, + "eval_auc": 0.9294373582011398, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6891669410602568, + "eval_f1_macro": 0.8162027357577154, + "eval_loss": 0.26806166768074036, + "eval_pr_auc": 0.6798375856033828, + "eval_precision": 0.7040026908846283, + "eval_precision_macro": 0.8218126661970311, + "eval_pred_class_0": 16695, + "eval_pred_class_1": 2973, + "eval_predicted_binding_ratio": 0.15115924344112264, + "eval_recall": 0.6749435665914221, + "eval_recall_macro": 0.8109129615416215, + "eval_runtime": 0.2508, + "eval_samples_per_second": 649.948, + "eval_steps_per_second": 3.987, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6292 + }, + { + "epoch": 243.0, + "eval_accuracy": 0.9040065080333537, + "eval_auc": 0.929443917905437, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6897798225435425, + "eval_f1_macro": 0.8164989338281623, + "eval_loss": 0.26814863085746765, + "eval_pr_auc": 0.6797263827568155, + "eval_precision": 0.7031825795644892, + "eval_precision_macro": 0.8215607197408852, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6768784263140922, + "eval_recall_macro": 0.8116993085273606, + "eval_runtime": 0.2535, + "eval_samples_per_second": 642.959, + "eval_steps_per_second": 3.945, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6318 + }, + { + "epoch": 244.0, + "eval_accuracy": 0.903853976001627, + "eval_auc": 0.9295684160320964, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6882110469909316, + "eval_f1_macro": 0.8156873815700654, + "eval_loss": 0.26816216111183167, + "eval_pr_auc": 0.6803755613590039, + "eval_precision": 0.7041160593792173, + "eval_precision_macro": 0.8217060181953557, + "eval_pred_class_0": 16704, + "eval_pred_class_1": 2964, + "eval_predicted_binding_ratio": 0.15070164734594266, + "eval_recall": 0.673008706868752, + "eval_recall_macro": 0.8100360731180846, + "eval_runtime": 0.2375, + "eval_samples_per_second": 686.328, + "eval_steps_per_second": 4.211, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6344 + }, + { + "epoch": 245.0, + "eval_accuracy": 0.9041081960545048, + "eval_auc": 0.9295755402213329, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6888815572418344, + "eval_f1_macro": 0.8161003326270482, + "eval_loss": 0.2682454288005829, + "eval_pr_auc": 0.6803390373338819, + "eval_precision": 0.7051671732522796, + "eval_precision_macro": 0.8222669528798059, + "eval_pred_class_0": 16707, + "eval_pred_class_1": 2961, + "eval_predicted_binding_ratio": 0.150549115314216, + "eval_recall": 0.6733311834891971, + "eval_recall_macro": 0.8103180333453712, + "eval_runtime": 0.2519, + "eval_samples_per_second": 647.048, + "eval_steps_per_second": 3.97, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6370 + }, + { + "epoch": 246.0, + "eval_accuracy": 0.903853976001627, + "eval_auc": 0.9296204070415253, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6882110469909316, + "eval_f1_macro": 0.8156873815700654, + "eval_loss": 0.2682053744792938, + "eval_pr_auc": 0.6804463740899893, + "eval_precision": 0.7041160593792173, + "eval_precision_macro": 0.8217060181953557, + "eval_pred_class_0": 16704, + "eval_pred_class_1": 2964, + "eval_predicted_binding_ratio": 0.15070164734594266, + "eval_recall": 0.673008706868752, + "eval_recall_macro": 0.8100360731180846, + "eval_runtime": 0.2411, + "eval_samples_per_second": 676.2, + "eval_steps_per_second": 4.148, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6396 + }, + { + "epoch": 247.0, + "eval_accuracy": 0.9045657921496848, + "eval_auc": 0.9296705294111545, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6909270541742137, + "eval_f1_macro": 0.817248994423186, + "eval_loss": 0.2681281566619873, + "eval_pr_auc": 0.6807214505356617, + "eval_precision": 0.7059219380888291, + "eval_precision_macro": 0.8229238344013863, + "eval_pred_class_0": 16696, + "eval_pred_class_1": 2972, + "eval_predicted_binding_ratio": 0.15110839943054707, + "eval_recall": 0.6765559496936472, + "eval_recall_macro": 0.8119002359683303, + "eval_runtime": 0.1963, + "eval_samples_per_second": 830.371, + "eval_steps_per_second": 5.094, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6422 + }, + { + "epoch": 248.0, + "eval_accuracy": 0.9046166361602603, + "eval_auc": 0.9296871135893773, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6912442396313364, + "eval_f1_macro": 0.8174200753177728, + "eval_loss": 0.26824310421943665, + "eval_pr_auc": 0.6805326752113899, + "eval_precision": 0.7058823529411765, + "eval_precision_macro": 0.8229585490219571, + "eval_pred_class_0": 16693, + "eval_pred_class_1": 2975, + "eval_predicted_binding_ratio": 0.15126093146227373, + "eval_recall": 0.6772009029345373, + "eval_recall_macro": 0.8121925321095093, + "eval_runtime": 0.2645, + "eval_samples_per_second": 616.162, + "eval_steps_per_second": 3.78, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6448 + }, + { + "epoch": 249.0, + "eval_accuracy": 0.9047691681919869, + "eval_auc": 0.929768866580617, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6914841047603361, + "eval_f1_macro": 0.8175893393183914, + "eval_loss": 0.2681940495967865, + "eval_pr_auc": 0.6810052289277716, + "eval_precision": 0.7067340067340068, + "eval_precision_macro": 0.8233634101223034, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.6768784263140922, + "eval_recall_macro": 0.8121520157163508, + "eval_runtime": 0.2396, + "eval_samples_per_second": 680.397, + "eval_steps_per_second": 4.174, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6474 + }, + { + "epoch": 250.0, + "grad_norm": 35924.55078125, + "learning_rate": 5.869563021464528e-07, + "loss": 0.2171, + "step": 6500 + }, + { + "epoch": 250.0, + "eval_accuracy": 0.9048200122025626, + "eval_auc": 0.9298367215633461, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6912928759894459, + "eval_f1_macro": 0.817514675551828, + "eval_loss": 0.2681121826171875, + "eval_pr_auc": 0.681415067318076, + "eval_precision": 0.7073911576105298, + "eval_precision_macro": 0.8236147646777582, + "eval_pred_class_0": 16705, + "eval_pred_class_1": 2963, + "eval_predicted_binding_ratio": 0.1506508033353671, + "eval_recall": 0.6759109964527572, + "eval_recall_macro": 0.8117890227027473, + "eval_runtime": 0.2606, + "eval_samples_per_second": 625.453, + "eval_steps_per_second": 3.837, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6500 + }, + { + "epoch": 251.0, + "eval_accuracy": 0.9046674801708359, + "eval_auc": 0.9298684105799504, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6909510466457887, + "eval_f1_macro": 0.8172961371074986, + "eval_loss": 0.2681705951690674, + "eval_pr_auc": 0.6814913977659953, + "eval_precision": 0.7066756574511126, + "eval_precision_macro": 0.8232516115060616, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6759109964527572, + "eval_recall_macro": 0.8116984812649493, + "eval_runtime": 0.271, + "eval_samples_per_second": 601.566, + "eval_steps_per_second": 3.691, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6526 + }, + { + "epoch": 252.0, + "eval_accuracy": 0.9045657921496848, + "eval_auc": 0.9300244614682288, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6893926857521099, + "eval_f1_macro": 0.8165072340543806, + "eval_loss": 0.2683667540550232, + "eval_pr_auc": 0.6820860933741758, + "eval_precision": 0.7080217539089055, + "eval_precision_macro": 0.8235792136757251, + "eval_pred_class_0": 16726, + "eval_pred_class_1": 2942, + "eval_predicted_binding_ratio": 0.14958307911328045, + "eval_recall": 0.671718800386972, + "eval_recall_macro": 0.8099343685039828, + "eval_runtime": 0.2316, + "eval_samples_per_second": 703.782, + "eval_steps_per_second": 4.318, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6552 + }, + { + "epoch": 253.0, + "eval_accuracy": 0.9046166361602603, + "eval_auc": 0.9299541441632636, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6910408432147562, + "eval_f1_macro": 0.8173217684087248, + "eval_loss": 0.26824095845222473, + "eval_pr_auc": 0.6816317339832768, + "eval_precision": 0.7061595422416694, + "eval_precision_macro": 0.8230444354317887, + "eval_pred_class_0": 16697, + "eval_pred_class_1": 2971, + "eval_predicted_binding_ratio": 0.15105755541997154, + "eval_recall": 0.6765559496936472, + "eval_recall_macro": 0.8119304164475962, + "eval_runtime": 0.2652, + "eval_samples_per_second": 614.632, + "eval_steps_per_second": 3.771, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6578 + }, + { + "epoch": 254.0, + "eval_accuracy": 0.9044641041285336, + "eval_auc": 0.9299257349988078, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6913093477903729, + "eval_f1_macro": 0.8173981849782266, + "eval_loss": 0.2680823504924774, + "eval_pr_auc": 0.6815799614400636, + "eval_precision": 0.7046215673141326, + "eval_precision_macro": 0.8224282755645115, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6784908094163173, + "eval_recall_macro": 0.8126262219955371, + "eval_runtime": 0.1721, + "eval_samples_per_second": 947.131, + "eval_steps_per_second": 5.811, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6604 + }, + { + "epoch": 255.0, + "eval_accuracy": 0.9045657921496848, + "eval_auc": 0.9300487927156216, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6902129064202014, + "eval_f1_macro": 0.8169037907104764, + "eval_loss": 0.2682023346424103, + "eval_pr_auc": 0.6819970017971004, + "eval_precision": 0.7068965517241379, + "eval_precision_macro": 0.8232268515652408, + "eval_pred_class_0": 16710, + "eval_pred_class_1": 2958, + "eval_predicted_binding_ratio": 0.1503965832824893, + "eval_recall": 0.6742986133505321, + "eval_recall_macro": 0.8109828311516347, + "eval_runtime": 0.2548, + "eval_samples_per_second": 639.732, + "eval_steps_per_second": 3.925, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6630 + }, + { + "epoch": 256.0, + "eval_accuracy": 0.9046166361602603, + "eval_auc": 0.9300701263533355, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6908371786420567, + "eval_f1_macro": 0.8172233266061071, + "eval_loss": 0.26801708340644836, + "eval_pr_auc": 0.6822973012887885, + "eval_precision": 0.7064374789349511, + "eval_precision_macro": 0.8231307207859595, + "eval_pred_class_0": 16701, + "eval_pred_class_1": 2967, + "eval_predicted_binding_ratio": 0.15085417937766932, + "eval_recall": 0.6759109964527572, + "eval_recall_macro": 0.8116683007856833, + "eval_runtime": 0.2302, + "eval_samples_per_second": 708.13, + "eval_steps_per_second": 4.344, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6656 + }, + { + "epoch": 257.0, + "eval_accuracy": 0.9048708562131381, + "eval_auc": 0.9301923470752391, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6911012052171042, + "eval_f1_macro": 0.8174397819709127, + "eval_loss": 0.26805296540260315, + "eval_pr_auc": 0.6828138126269635, + "eval_precision": 0.7080514208389715, + "eval_precision_macro": 0.8238677400987582, + "eval_pred_class_0": 16712, + "eval_pred_class_1": 2956, + "eval_predicted_binding_ratio": 0.1502948952613382, + "eval_recall": 0.6749435665914221, + "eval_recall_macro": 0.8114260296891438, + "eval_runtime": 0.2566, + "eval_samples_per_second": 635.189, + "eval_steps_per_second": 3.897, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6682 + }, + { + "epoch": 258.0, + "eval_accuracy": 0.9047691681919869, + "eval_auc": 0.9301935928351055, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6912806988626998, + "eval_f1_macro": 0.8174910212279173, + "eval_loss": 0.26811105012893677, + "eval_pr_auc": 0.6827059930276215, + "eval_precision": 0.7070128118678355, + "eval_precision_macro": 0.8234501252489699, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6762334730732021, + "eval_recall_macro": 0.8118899000544377, + "eval_runtime": 0.2455, + "eval_samples_per_second": 663.913, + "eval_steps_per_second": 4.073, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6708 + }, + { + "epoch": 259.0, + "eval_accuracy": 0.9049217002237137, + "eval_auc": 0.9302954726341887, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6913172664245626, + "eval_f1_macro": 0.817561992789179, + "eval_loss": 0.2680983245372772, + "eval_pr_auc": 0.6832649652047296, + "eval_precision": 0.7081501521812648, + "eval_precision_macro": 0.8239452215038332, + "eval_pred_class_0": 16711, + "eval_pred_class_1": 2957, + "eval_predicted_binding_ratio": 0.15034573927191378, + "eval_recall": 0.6752660432118671, + "eval_recall_macro": 0.8115872679993663, + "eval_runtime": 0.2456, + "eval_samples_per_second": 663.63, + "eval_steps_per_second": 4.071, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6734 + }, + { + "epoch": 260.0, + "eval_accuracy": 0.9048200122025626, + "eval_auc": 0.9302687082620565, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6920039486673247, + "eval_f1_macro": 0.8178583697873878, + "eval_loss": 0.2680213451385498, + "eval_pr_auc": 0.6832868902825516, + "eval_precision": 0.7064158548874706, + "eval_precision_macro": 0.8233115761166728, + "eval_pred_class_0": 16691, + "eval_pred_class_1": 2977, + "eval_predicted_binding_ratio": 0.15136261948342486, + "eval_recall": 0.6781683327958723, + "eval_recall_macro": 0.8127064275194428, + "eval_runtime": 0.2584, + "eval_samples_per_second": 630.797, + "eval_steps_per_second": 3.87, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6760 + }, + { + "epoch": 261.0, + "eval_accuracy": 0.9049725442342892, + "eval_auc": 0.9303376240871719, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.691635043722158, + "eval_f1_macro": 0.817733374603378, + "eval_loss": 0.2681059241294861, + "eval_pr_auc": 0.6834436649713198, + "eval_precision": 0.7081081081081081, + "eval_precision_macro": 0.8239786410782342, + "eval_pred_class_0": 16708, + "eval_pred_class_1": 2960, + "eval_predicted_binding_ratio": 0.15049827130364044, + "eval_recall": 0.6759109964527572, + "eval_recall_macro": 0.8118795641405453, + "eval_runtime": 0.1861, + "eval_samples_per_second": 875.956, + "eval_steps_per_second": 5.374, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6786 + }, + { + "epoch": 262.0, + "eval_accuracy": 0.9049725442342892, + "eval_auc": 0.9303520963131211, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6920415224913494, + "eval_f1_macro": 0.8179298603528982, + "eval_loss": 0.2680259048938751, + "eval_pr_auc": 0.683712511498021, + "eval_precision": 0.7075471698113207, + "eval_precision_macro": 0.8238035250254208, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.6772009029345373, + "eval_recall_macro": 0.8124037954643712, + "eval_runtime": 0.2544, + "eval_samples_per_second": 640.771, + "eval_steps_per_second": 3.931, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6812 + }, + { + "epoch": 263.0, + "eval_accuracy": 0.9051759202765914, + "eval_auc": 0.9303956005834594, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.692700609655627, + "eval_f1_macro": 0.8183195235731167, + "eval_loss": 0.2679544985294342, + "eval_pr_auc": 0.6840791766505604, + "eval_precision": 0.7082210242587601, + "eval_precision_macro": 0.8242003324886615, + "eval_pred_class_0": 16700, + "eval_pred_class_1": 2968, + "eval_predicted_binding_ratio": 0.15090502338824485, + "eval_recall": 0.6778458561754273, + "eval_recall_macro": 0.8127866330433483, + "eval_runtime": 0.224, + "eval_samples_per_second": 727.59, + "eval_steps_per_second": 4.464, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6838 + }, + { + "epoch": 264.0, + "eval_accuracy": 0.905328452308318, + "eval_auc": 0.9304257713302264, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6933465085638999, + "eval_f1_macro": 0.8186850387937344, + "eval_loss": 0.26790833473205566, + "eval_pr_auc": 0.6843453300290927, + "eval_precision": 0.70851565129586, + "eval_precision_macro": 0.8244321084532245, + "eval_pred_class_0": 16697, + "eval_pred_class_1": 2971, + "eval_predicted_binding_ratio": 0.15105755541997154, + "eval_recall": 0.6788132860367624, + "eval_recall_macro": 0.8132703479740159, + "eval_runtime": 0.2587, + "eval_samples_per_second": 630.122, + "eval_steps_per_second": 3.866, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6864 + }, + { + "epoch": 265.0, + "eval_accuracy": 0.905328452308318, + "eval_auc": 0.9304709495903853, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6934474810668423, + "eval_f1_macro": 0.8187338421508825, + "eval_loss": 0.26806485652923584, + "eval_pr_auc": 0.684236710788699, + "eval_precision": 0.7083753784056509, + "eval_precision_macro": 0.8243883480827296, + "eval_pred_class_0": 16695, + "eval_pred_class_1": 2973, + "eval_predicted_binding_ratio": 0.15115924344112264, + "eval_recall": 0.6791357626572073, + "eval_recall_macro": 0.8134014058049723, + "eval_runtime": 0.2519, + "eval_samples_per_second": 646.986, + "eval_steps_per_second": 3.969, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6890 + }, + { + "epoch": 266.0, + "eval_accuracy": 0.9054301403294692, + "eval_auc": 0.9305072226139984, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6937767533750412, + "eval_f1_macro": 0.8189285426426647, + "eval_loss": 0.2682030200958252, + "eval_pr_auc": 0.6841419811140891, + "eval_precision": 0.708711738984191, + "eval_precision_macro": 0.8245864774585525, + "eval_pred_class_0": 16695, + "eval_pred_class_1": 2973, + "eval_predicted_binding_ratio": 0.15115924344112264, + "eval_recall": 0.6794582392776524, + "eval_recall_macro": 0.8135928245944608, + "eval_runtime": 0.1928, + "eval_samples_per_second": 845.449, + "eval_steps_per_second": 5.187, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6916 + }, + { + "epoch": 267.0, + "eval_accuracy": 0.9054809843400448, + "eval_auc": 0.9304825799266392, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6942937016938003, + "eval_f1_macro": 0.8191961667392471, + "eval_loss": 0.2681148052215576, + "eval_pr_auc": 0.6840537331785722, + "eval_precision": 0.7083892617449664, + "eval_precision_macro": 0.824532598274209, + "eval_pred_class_0": 16688, + "eval_pred_class_1": 2980, + "eval_predicted_binding_ratio": 0.15151515151515152, + "eval_recall": 0.6807481457594324, + "eval_recall_macro": 0.8141472363975528, + "eval_runtime": 0.2454, + "eval_samples_per_second": 664.157, + "eval_steps_per_second": 4.075, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6942 + }, + { + "epoch": 268.0, + "eval_accuracy": 0.9057860484034981, + "eval_auc": 0.930527787384295, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6951801283105774, + "eval_f1_macro": 0.8197312675109731, + "eval_loss": 0.2679014503955841, + "eval_pr_auc": 0.6848281199608002, + "eval_precision": 0.7095366017461383, + "eval_precision_macro": 0.8251697388598875, + "eval_pred_class_0": 16690, + "eval_pred_class_1": 2978, + "eval_predicted_binding_ratio": 0.1514134634940004, + "eval_recall": 0.6813930990003225, + "eval_recall_macro": 0.8145904349350619, + "eval_runtime": 0.2174, + "eval_samples_per_second": 749.726, + "eval_steps_per_second": 4.6, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6968 + }, + { + "epoch": 269.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.9305513205667733, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6949375410913873, + "eval_f1_macro": 0.8195606747920547, + "eval_loss": 0.26782992482185364, + "eval_pr_auc": 0.6850252128050689, + "eval_precision": 0.7086825343613812, + "eval_precision_macro": 0.8247638023919581, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6817155756207675, + "eval_recall_macro": 0.8146309513282204, + "eval_runtime": 0.2304, + "eval_samples_per_second": 707.619, + "eval_steps_per_second": 4.341, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 6994 + }, + { + "epoch": 269.2307692307692, + "grad_norm": 17604.1328125, + "learning_rate": 5.202671165416819e-07, + "loss": 0.2132, + "step": 7000 + }, + { + "epoch": 270.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.9305936180072407, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6950377916529741, + "eval_f1_macro": 0.8196091213903969, + "eval_loss": 0.2680298984050751, + "eval_pr_auc": 0.6846741481205671, + "eval_precision": 0.7085427135678392, + "eval_precision_macro": 0.8247203168031008, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6820380522412125, + "eval_recall_macro": 0.814762009159177, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.148, + "eval_steps_per_second": 3.829, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7020 + }, + { + "epoch": 271.0, + "eval_accuracy": 0.9054809843400448, + "eval_auc": 0.9306496966662317, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6938909929194796, + "eval_f1_macro": 0.8190015347004277, + "eval_loss": 0.2681294083595276, + "eval_pr_auc": 0.6847333752342389, + "eval_precision": 0.7089502018842531, + "eval_precision_macro": 0.8247074919339809, + "eval_pred_class_0": 16696, + "eval_pred_class_1": 2972, + "eval_predicted_binding_ratio": 0.15110839943054707, + "eval_recall": 0.6794582392776524, + "eval_recall_macro": 0.8136230050737269, + "eval_runtime": 0.1821, + "eval_samples_per_second": 895.335, + "eval_steps_per_second": 5.493, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7046 + }, + { + "epoch": 272.0, + "eval_accuracy": 0.9053792963188937, + "eval_auc": 0.9306815900653141, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6932586121641668, + "eval_f1_macro": 0.8186603259504293, + "eval_loss": 0.26823949813842773, + "eval_pr_auc": 0.6847385438827486, + "eval_precision": 0.7090357383681726, + "eval_precision_macro": 0.8246412077064189, + "eval_pred_class_0": 16702, + "eval_pred_class_1": 2966, + "eval_predicted_binding_ratio": 0.15080333536709375, + "eval_recall": 0.6781683327958723, + "eval_recall_macro": 0.8130384127913689, + "eval_runtime": 0.1793, + "eval_samples_per_second": 909.129, + "eval_steps_per_second": 5.577, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7072 + }, + { + "epoch": 273.0, + "eval_accuracy": 0.905328452308318, + "eval_auc": 0.9307212597310633, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6931443638760711, + "eval_f1_macro": 0.8185873316314347, + "eval_loss": 0.2681174576282501, + "eval_pr_auc": 0.6852124592316542, + "eval_precision": 0.7087967644084934, + "eval_precision_macro": 0.8245199318120546, + "eval_pred_class_0": 16701, + "eval_pred_class_1": 2967, + "eval_predicted_binding_ratio": 0.15085417937766932, + "eval_recall": 0.6781683327958723, + "eval_recall_macro": 0.8130082323121028, + "eval_runtime": 0.1693, + "eval_samples_per_second": 962.904, + "eval_steps_per_second": 5.907, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7098 + }, + { + "epoch": 274.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.9307878592214269, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6947368421052632, + "eval_f1_macro": 0.8194636820581644, + "eval_loss": 0.2679351270198822, + "eval_pr_auc": 0.685659065605018, + "eval_precision": 0.7089627391742196, + "eval_precision_macro": 0.8248510741829513, + "eval_pred_class_0": 16689, + "eval_pred_class_1": 2979, + "eval_predicted_binding_ratio": 0.15146430750457596, + "eval_recall": 0.6810706223798775, + "eval_recall_macro": 0.8143688356663075, + "eval_runtime": 0.2474, + "eval_samples_per_second": 658.933, + "eval_steps_per_second": 4.043, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7124 + }, + { + "epoch": 275.0, + "eval_accuracy": 0.905684360382347, + "eval_auc": 0.9308354511413273, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6945496459739832, + "eval_f1_macro": 0.8193909880953703, + "eval_loss": 0.2679973542690277, + "eval_pr_auc": 0.6858486839853987, + "eval_precision": 0.7096231493943472, + "eval_precision_macro": 0.8251038602745574, + "eval_pred_class_0": 16696, + "eval_pred_class_1": 2972, + "eval_predicted_binding_ratio": 0.15110839943054707, + "eval_recall": 0.6801031925185425, + "eval_recall_macro": 0.8140058426527039, + "eval_runtime": 0.1976, + "eval_samples_per_second": 825.097, + "eval_steps_per_second": 5.062, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7150 + }, + { + "epoch": 276.0, + "eval_accuracy": 0.9055826723611958, + "eval_auc": 0.9308506727696961, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.694320987654321, + "eval_f1_macro": 0.819244917025501, + "eval_loss": 0.26807495951652527, + "eval_pr_auc": 0.6856600891550617, + "eval_precision": 0.7091459314055144, + "eval_precision_macro": 0.8248616921913159, + "eval_pred_class_0": 16694, + "eval_pred_class_1": 2974, + "eval_predicted_binding_ratio": 0.1512100874516982, + "eval_recall": 0.6801031925185425, + "eval_recall_macro": 0.8139454816941719, + "eval_runtime": 0.2211, + "eval_samples_per_second": 737.174, + "eval_steps_per_second": 4.523, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7176 + }, + { + "epoch": 277.0, + "eval_accuracy": 0.9057352043929225, + "eval_auc": 0.9308575828439557, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6946640316205533, + "eval_f1_macro": 0.8194640504423113, + "eval_loss": 0.26810142397880554, + "eval_pr_auc": 0.6855507532370865, + "eval_precision": 0.709861999326826, + "eval_precision_macro": 0.8252250644654733, + "eval_pred_class_0": 16697, + "eval_pred_class_1": 2971, + "eval_predicted_binding_ratio": 0.15105755541997154, + "eval_recall": 0.6801031925185425, + "eval_recall_macro": 0.81403602313197, + "eval_runtime": 0.2072, + "eval_samples_per_second": 786.583, + "eval_steps_per_second": 4.826, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7202 + }, + { + "epoch": 278.0, + "eval_accuracy": 0.905684360382347, + "eval_auc": 0.9309145374278527, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6941467436108821, + "eval_f1_macro": 0.8191962415719043, + "eval_loss": 0.26818612217903137, + "eval_pr_auc": 0.6856281742604067, + "eval_precision": 0.7101889338731444, + "eval_precision_macro": 0.8252812485457677, + "eval_pred_class_0": 16704, + "eval_pred_class_1": 2964, + "eval_predicted_binding_ratio": 0.15070164734594266, + "eval_recall": 0.6788132860367624, + "eval_recall_macro": 0.813481611328878, + "eval_runtime": 0.1872, + "eval_samples_per_second": 870.561, + "eval_steps_per_second": 5.341, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7228 + }, + { + "epoch": 279.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.9309855846702396, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.69433465085639, + "eval_f1_macro": 0.8192692975301671, + "eval_loss": 0.2680346667766571, + "eval_pr_auc": 0.686232632159634, + "eval_precision": 0.7095254123190845, + "eval_precision_macro": 0.825026825462411, + "eval_pred_class_0": 16697, + "eval_pred_class_1": 2971, + "eval_predicted_binding_ratio": 0.15105755541997154, + "eval_recall": 0.6797807158980974, + "eval_recall_macro": 0.8138446043424814, + "eval_runtime": 0.241, + "eval_samples_per_second": 676.212, + "eval_steps_per_second": 4.149, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7254 + }, + { + "epoch": 280.0, + "eval_accuracy": 0.9056335163717714, + "eval_auc": 0.930922634866985, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6957377049180328, + "eval_f1_macro": 0.8199473215888755, + "eval_loss": 0.26806843280792236, + "eval_pr_auc": 0.6857403383581059, + "eval_precision": 0.70756918972991, + "eval_precision_macro": 0.8244187060893835, + "eval_pred_class_0": 16669, + "eval_pred_class_1": 2999, + "eval_predicted_binding_ratio": 0.15248118771608704, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8156794139758723, + "eval_runtime": 0.1979, + "eval_samples_per_second": 823.751, + "eval_steps_per_second": 5.054, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7280 + }, + { + "epoch": 281.0, + "eval_accuracy": 0.9055826723611958, + "eval_auc": 0.9309180411274773, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6959227116423776, + "eval_f1_macro": 0.8200188959217034, + "eval_loss": 0.2680058181285858, + "eval_pr_auc": 0.6857513786045211, + "eval_precision": 0.7069194943446441, + "eval_precision_macro": 0.8241715464761271, + "eval_pred_class_0": 16662, + "eval_pred_class_1": 3006, + "eval_predicted_binding_ratio": 0.15283709579011592, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.8160424069894758, + "eval_runtime": 0.2526, + "eval_samples_per_second": 645.248, + "eval_steps_per_second": 3.959, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7306 + }, + { + "epoch": 282.0, + "eval_accuracy": 0.9055318283506203, + "eval_auc": 0.930996368279084, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6950098489822718, + "eval_f1_macro": 0.8195600321797414, + "eval_loss": 0.2679016888141632, + "eval_pr_auc": 0.6864832067617813, + "eval_precision": 0.7077900367769977, + "eval_precision_macro": 0.824393309448042, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6826830054821026, + "eval_recall_macro": 0.814963763862558, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.664, + "eval_steps_per_second": 3.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7332 + }, + { + "epoch": 283.0, + "eval_accuracy": 0.905684360382347, + "eval_auc": 0.9310520771031147, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.695152013147083, + "eval_f1_macro": 0.8196821086456596, + "eval_loss": 0.2679460644721985, + "eval_pr_auc": 0.6867185372000254, + "eval_precision": 0.7087801608579088, + "eval_precision_macro": 0.8248408116684653, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6820380522412125, + "eval_recall_macro": 0.8147921896384429, + "eval_runtime": 0.2003, + "eval_samples_per_second": 813.949, + "eval_steps_per_second": 4.994, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7358 + }, + { + "epoch": 284.0, + "eval_accuracy": 0.905684360382347, + "eval_auc": 0.931065877786636, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.695152013147083, + "eval_f1_macro": 0.8196821086456596, + "eval_loss": 0.26795604825019836, + "eval_pr_auc": 0.6868264245068394, + "eval_precision": 0.7087801608579088, + "eval_precision_macro": 0.8248408116684653, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6820380522412125, + "eval_recall_macro": 0.8147921896384429, + "eval_runtime": 0.2632, + "eval_samples_per_second": 619.283, + "eval_steps_per_second": 3.799, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7384 + }, + { + "epoch": 285.0, + "eval_accuracy": 0.9057352043929225, + "eval_auc": 0.9310428312291054, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696165191740413, + "eval_f1_macro": 0.8201894743681304, + "eval_loss": 0.2679717540740967, + "eval_pr_auc": 0.6865300625458848, + "eval_precision": 0.7077640786404532, + "eval_precision_macro": 0.8245726255085029, + "eval_pred_class_0": 16667, + "eval_pred_class_1": 3001, + "eval_predicted_binding_ratio": 0.15258287573723817, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8160018905963173, + "eval_runtime": 0.2365, + "eval_samples_per_second": 689.133, + "eval_steps_per_second": 4.228, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7410 + }, + { + "epoch": 286.0, + "eval_accuracy": 0.9054301403294692, + "eval_auc": 0.9311142093764568, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6946815495732108, + "eval_f1_macro": 0.8193658018591599, + "eval_loss": 0.2679450213909149, + "eval_pr_auc": 0.6869273130451355, + "eval_precision": 0.7074557004346372, + "eval_precision_macro": 0.8241961598653369, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6823605288616575, + "eval_recall_macro": 0.8147723450730694, + "eval_runtime": 0.1793, + "eval_samples_per_second": 908.844, + "eval_steps_per_second": 5.576, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7436 + }, + { + "epoch": 287.0, + "eval_accuracy": 0.9058368924140736, + "eval_auc": 0.9311615190538873, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.695695037791653, + "eval_f1_macro": 0.8199978948356761, + "eval_loss": 0.2679760158061981, + "eval_pr_auc": 0.6872164392300586, + "eval_precision": 0.7092127303182579, + "eval_precision_macro": 0.8251152664358777, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6826830054821026, + "eval_recall_macro": 0.8151448467381539, + "eval_runtime": 0.1776, + "eval_samples_per_second": 917.947, + "eval_steps_per_second": 5.632, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7462 + }, + { + "epoch": 288.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9311948528628156, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6959237343852729, + "eval_f1_macro": 0.8201439915761322, + "eval_loss": 0.26804664731025696, + "eval_pr_auc": 0.6873402512916988, + "eval_precision": 0.7096882333221589, + "eval_precision_macro": 0.8253565529811274, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6826830054821026, + "eval_recall_macro": 0.815205207696686, + "eval_runtime": 0.2679, + "eval_samples_per_second": 608.503, + "eval_steps_per_second": 3.733, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7488 + }, + { + "epoch": 288.46153846153845, + "grad_norm": 18250.5078125, + "learning_rate": 4.5321317063898914e-07, + "loss": 0.2101, + "step": 7500 + }, + { + "epoch": 289.0, + "eval_accuracy": 0.9058368924140736, + "eval_auc": 0.9312627857055362, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6958949096880132, + "eval_f1_macro": 0.8200944800500465, + "eval_loss": 0.26791396737098694, + "eval_pr_auc": 0.6879250403674073, + "eval_precision": 0.7089327534292406, + "eval_precision_macro": 0.8250281609942534, + "eval_pred_class_0": 16679, + "eval_pred_class_1": 2989, + "eval_predicted_binding_ratio": 0.1519727476103315, + "eval_recall": 0.6833279587229926, + "eval_recall_macro": 0.8154069624000669, + "eval_runtime": 0.2277, + "eval_samples_per_second": 715.925, + "eval_steps_per_second": 4.392, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7514 + }, + { + "epoch": 290.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9312241087546806, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696522309711286, + "eval_f1_macro": 0.8204332366847645, + "eval_loss": 0.26787513494491577, + "eval_pr_auc": 0.6878638814996979, + "eval_precision": 0.7088480801335559, + "eval_precision_macro": 0.8250951850316913, + "eval_pred_class_0": 16673, + "eval_pred_class_1": 2995, + "eval_predicted_binding_ratio": 0.15227781167378482, + "eval_recall": 0.6846178652047726, + "eval_recall_macro": 0.815991554682425, + "eval_runtime": 0.2212, + "eval_samples_per_second": 736.836, + "eval_steps_per_second": 4.52, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7540 + }, + { + "epoch": 291.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.9312868249779602, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6975369458128079, + "eval_f1_macro": 0.8210658921448086, + "eval_loss": 0.26807519793510437, + "eval_pr_auc": 0.6876156626538744, + "eval_precision": 0.7106055536968886, + "eval_precision_macro": 0.8260144502101566, + "eval_pred_class_0": 16679, + "eval_pred_class_1": 2989, + "eval_predicted_binding_ratio": 0.1519727476103315, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8163640563475095, + "eval_runtime": 0.2581, + "eval_samples_per_second": 631.522, + "eval_steps_per_second": 3.874, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7566 + }, + { + "epoch": 292.0, + "eval_accuracy": 0.9061928004881025, + "eval_auc": 0.9313246454689077, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6971935007385525, + "eval_f1_macro": 0.8208465473190101, + "eval_loss": 0.26810526847839355, + "eval_pr_auc": 0.6877564948921628, + "eval_precision": 0.7098930481283422, + "eval_precision_macro": 0.8256529284776994, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8162735149097116, + "eval_runtime": 0.206, + "eval_samples_per_second": 791.322, + "eval_steps_per_second": 4.855, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7592 + }, + { + "epoch": 293.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9313187962370344, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6969647251845775, + "eval_f1_macro": 0.8207004065741184, + "eval_loss": 0.26795074343681335, + "eval_pr_auc": 0.6879453119558532, + "eval_precision": 0.7094188376753507, + "eval_precision_macro": 0.8254123095657551, + "eval_pred_class_0": 16674, + "eval_pred_class_1": 2994, + "eval_predicted_binding_ratio": 0.15222696766320928, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8162131539511794, + "eval_runtime": 0.2264, + "eval_samples_per_second": 719.954, + "eval_steps_per_second": 4.417, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7618 + }, + { + "epoch": 294.0, + "eval_accuracy": 0.9061419564775269, + "eval_auc": 0.9313842959550158, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966809069996713, + "eval_f1_macro": 0.820581055003595, + "eval_loss": 0.2678394019603729, + "eval_pr_auc": 0.6884837475836854, + "eval_precision": 0.7102177554438861, + "eval_precision_macro": 0.8257076908850431, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8157191031066195, + "eval_runtime": 0.1788, + "eval_samples_per_second": 911.642, + "eval_steps_per_second": 5.593, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7644 + }, + { + "epoch": 295.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9314417858263554, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696337658071933, + "eval_f1_macro": 0.8203618088536944, + "eval_loss": 0.26788315176963806, + "eval_pr_auc": 0.6885944465147925, + "eval_precision": 0.7095046854082999, + "eval_precision_macro": 0.8253458678840061, + "eval_pred_class_0": 16680, + "eval_pred_class_1": 2988, + "eval_predicted_binding_ratio": 0.15192190359975594, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8156285616688215, + "eval_runtime": 0.2717, + "eval_samples_per_second": 599.832, + "eval_steps_per_second": 3.68, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7670 + }, + { + "epoch": 296.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9314500389854711, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696636587366694, + "eval_f1_macro": 0.8205062543343502, + "eval_loss": 0.2678987681865692, + "eval_pr_auc": 0.6885305487751676, + "eval_precision": 0.7090848363393454, + "eval_precision_macro": 0.8252153220919469, + "eval_pred_class_0": 16674, + "eval_pred_class_1": 2994, + "eval_predicted_binding_ratio": 0.15222696766320928, + "eval_recall": 0.6846178652047726, + "eval_recall_macro": 0.816021735161691, + "eval_runtime": 0.2551, + "eval_samples_per_second": 639.002, + "eval_steps_per_second": 3.92, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7696 + }, + { + "epoch": 297.0, + "eval_accuracy": 0.9061928004881025, + "eval_auc": 0.931550575699698, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965959546127282, + "eval_f1_macro": 0.8205577878611678, + "eval_loss": 0.2679198086261749, + "eval_pr_auc": 0.6889351423284887, + "eval_precision": 0.710738255033557, + "eval_precision_macro": 0.8259168264621285, + "eval_pred_class_0": 16688, + "eval_pred_class_1": 2980, + "eval_predicted_binding_ratio": 0.15151515151515152, + "eval_recall": 0.6830054821025475, + "eval_recall_macro": 0.8154871679239726, + "eval_runtime": 0.2409, + "eval_samples_per_second": 676.584, + "eval_steps_per_second": 4.151, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7722 + }, + { + "epoch": 298.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9316071409836368, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6964256300444738, + "eval_f1_macro": 0.82051102635547, + "eval_loss": 0.2680239677429199, + "eval_pr_auc": 0.6888956424322248, + "eval_precision": 0.7117845117845117, + "eval_precision_macro": 0.8263378182350514, + "eval_pred_class_0": 16698, + "eval_pred_class_1": 2970, + "eval_predicted_binding_ratio": 0.15100671140939598, + "eval_recall": 0.6817155756207675, + "eval_recall_macro": 0.8150232975586785, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.263, + "eval_steps_per_second": 3.842, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7748 + }, + { + "epoch": 299.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9315966104197652, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6972235912600624, + "eval_f1_macro": 0.8208966763783243, + "eval_loss": 0.26803261041641235, + "eval_pr_auc": 0.6887732742755047, + "eval_precision": 0.7106496985934361, + "eval_precision_macro": 0.8259818448607991, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8160717602063305, + "eval_runtime": 0.2484, + "eval_samples_per_second": 656.12, + "eval_steps_per_second": 4.025, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7774 + }, + { + "epoch": 300.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9315960653998236, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966218432272876, + "eval_f1_macro": 0.8204813289787078, + "eval_loss": 0.26792290806770325, + "eval_pr_auc": 0.6890570542847262, + "eval_precision": 0.7087087087087087, + "eval_precision_macro": 0.8250519729735134, + "eval_pred_class_0": 16671, + "eval_pred_class_1": 2997, + "eval_predicted_binding_ratio": 0.15237949969493594, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8161226125133815, + "eval_runtime": 0.2626, + "eval_samples_per_second": 620.694, + "eval_steps_per_second": 3.808, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7800 + }, + { + "epoch": 301.0, + "eval_accuracy": 0.9061928004881025, + "eval_auc": 0.9316357350655727, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966957093539372, + "eval_f1_macro": 0.8206059967994839, + "eval_loss": 0.26809555292129517, + "eval_pr_auc": 0.6889378655811479, + "eval_precision": 0.710596914822267, + "eval_precision_macro": 0.8258725914156881, + "eval_pred_class_0": 16686, + "eval_pred_class_1": 2982, + "eval_predicted_binding_ratio": 0.15161683953630262, + "eval_recall": 0.6833279587229926, + "eval_recall_macro": 0.815618225754929, + "eval_runtime": 0.2178, + "eval_samples_per_second": 748.303, + "eval_steps_per_second": 4.591, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7826 + }, + { + "epoch": 302.0, + "eval_accuracy": 0.9061419564775269, + "eval_auc": 0.9317000084886855, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965811965811965, + "eval_f1_macro": 0.8205328694321837, + "eval_loss": 0.26798245310783386, + "eval_pr_auc": 0.6894273728032447, + "eval_precision": 0.7103586992960107, + "eval_precision_macro": 0.8257517200405735, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6833279587229926, + "eval_recall_macro": 0.815588045275663, + "eval_runtime": 0.2561, + "eval_samples_per_second": 636.477, + "eval_steps_per_second": 3.905, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7852 + }, + { + "epoch": 303.0, + "eval_accuracy": 0.9060402684563759, + "eval_auc": 0.9316820812256066, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966513460275772, + "eval_f1_macro": 0.8205311837826491, + "eval_loss": 0.26780617237091064, + "eval_pr_auc": 0.6897785082602487, + "eval_precision": 0.7094617184887997, + "eval_precision_macro": 0.8253790573615671, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8159208578100005, + "eval_runtime": 0.1902, + "eval_samples_per_second": 856.826, + "eval_steps_per_second": 5.257, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7878 + }, + { + "epoch": 304.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9317363691047894, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6963230466185161, + "eval_f1_macro": 0.8203369534620676, + "eval_loss": 0.26783081889152527, + "eval_pr_auc": 0.690108350201664, + "eval_precision": 0.7091273821464393, + "eval_precision_macro": 0.8251819077788622, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815729439020512, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.745, + "eval_steps_per_second": 3.784, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7904 + }, + { + "epoch": 305.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9318277475374976, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6962670613385956, + "eval_f1_macro": 0.8203632705580364, + "eval_loss": 0.267810195684433, + "eval_pr_auc": 0.6906198234983021, + "eval_precision": 0.7104026845637584, + "eval_precision_macro": 0.82571907957814, + "eval_pred_class_0": 16688, + "eval_pred_class_1": 2980, + "eval_predicted_binding_ratio": 0.15151515151515152, + "eval_recall": 0.6826830054821026, + "eval_recall_macro": 0.8152957491344841, + "eval_runtime": 0.2457, + "eval_samples_per_second": 663.513, + "eval_steps_per_second": 4.071, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7930 + }, + { + "epoch": 306.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9318495094051658, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965664530967636, + "eval_f1_macro": 0.8205079551116469, + "eval_loss": 0.26770758628845215, + "eval_pr_auc": 0.6907229577841941, + "eval_precision": 0.709979906229069, + "eval_precision_macro": 0.8255870038278783, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8156889226273535, + "eval_runtime": 0.2472, + "eval_samples_per_second": 659.459, + "eval_steps_per_second": 4.046, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7956 + }, + { + "epoch": 307.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.9318431735483448, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6973381531383503, + "eval_f1_macro": 0.8209698284488745, + "eval_loss": 0.26784345507621765, + "eval_pr_auc": 0.6903028236900399, + "eval_precision": 0.7108877721943049, + "eval_precision_macro": 0.8261026405178202, + "eval_pred_class_0": 16683, + "eval_pred_class_1": 2985, + "eval_predicted_binding_ratio": 0.15176937156802928, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8161019406855965, + "eval_runtime": 0.2339, + "eval_samples_per_second": 696.845, + "eval_steps_per_second": 4.275, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 7982 + }, + { + "epoch": 307.6923076923077, + "grad_norm": 18753.48046875, + "learning_rate": 3.8700127731844033e-07, + "loss": 0.2071, + "step": 8000 + }, + { + "epoch": 308.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9318915635331595, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6966661192314009, + "eval_f1_macro": 0.8205561173351938, + "eval_loss": 0.2679731547832489, + "eval_pr_auc": 0.6902808443840289, + "eval_precision": 0.7098393574297188, + "eval_precision_macro": 0.8255431799139001, + "eval_pred_class_0": 16680, + "eval_pred_class_1": 2988, + "eval_predicted_binding_ratio": 0.15192190359975594, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.81581998045831, + "eval_runtime": 0.2227, + "eval_samples_per_second": 731.952, + "eval_steps_per_second": 4.491, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8008 + }, + { + "epoch": 309.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9319105808361218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6975217462662071, + "eval_f1_macro": 0.821040751603759, + "eval_loss": 0.2678290605545044, + "eval_pr_auc": 0.6904266123808676, + "eval_precision": 0.7102272727272727, + "eval_precision_macro": 0.8258500239865676, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.8164649336992, + "eval_runtime": 0.2365, + "eval_samples_per_second": 689.086, + "eval_steps_per_second": 4.228, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8034 + }, + { + "epoch": 310.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9319563819762139, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6977201902575036, + "eval_f1_macro": 0.821136637744354, + "eval_loss": 0.2677942216396332, + "eval_pr_auc": 0.6905770539125178, + "eval_precision": 0.7099465954606141, + "eval_precision_macro": 0.8257626451391362, + "eval_pred_class_0": 16672, + "eval_pred_class_1": 2996, + "eval_predicted_binding_ratio": 0.15232865568436038, + "eval_recall": 0.6859077716865527, + "eval_recall_macro": 0.816727049361113, + "eval_runtime": 0.2422, + "eval_samples_per_second": 672.991, + "eval_steps_per_second": 4.129, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8060 + }, + { + "epoch": 311.0, + "eval_accuracy": 0.9064470205409803, + "eval_auc": 0.9320261250637406, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6975673898750822, + "eval_f1_macro": 0.8211161862162611, + "eval_loss": 0.2678627669811249, + "eval_pr_auc": 0.6908896136415948, + "eval_precision": 0.7113643982567884, + "eval_precision_macro": 0.8263444706297427, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8161623016441286, + "eval_runtime": 0.264, + "eval_samples_per_second": 617.505, + "eval_steps_per_second": 3.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8086 + }, + { + "epoch": 312.0, + "eval_accuracy": 0.9060402684563759, + "eval_auc": 0.9320056478859348, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.696551724137931, + "eval_f1_macro": 0.8204830448879512, + "eval_loss": 0.2678248882293701, + "eval_pr_auc": 0.6908301522653787, + "eval_precision": 0.7096018735362998, + "eval_precision_macro": 0.8254226766806146, + "eval_pred_class_0": 16679, + "eval_pred_class_1": 2989, + "eval_predicted_binding_ratio": 0.1519727476103315, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815789799979044, + "eval_runtime": 0.2616, + "eval_samples_per_second": 623.086, + "eval_steps_per_second": 3.823, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8112 + }, + { + "epoch": 313.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9319988935316585, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965370096832431, + "eval_f1_macro": 0.8204581387495119, + "eval_loss": 0.2678254544734955, + "eval_pr_auc": 0.6908227024589884, + "eval_precision": 0.7092245989304813, + "eval_precision_macro": 0.8252587374599636, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8158906773307344, + "eval_runtime": 0.2589, + "eval_samples_per_second": 629.545, + "eval_steps_per_second": 3.862, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8138 + }, + { + "epoch": 314.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9320198184044164, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6971634694212165, + "eval_f1_macro": 0.8207964351950081, + "eval_loss": 0.2678711414337158, + "eval_pr_auc": 0.6909501280556708, + "eval_precision": 0.7091394262841895, + "eval_precision_macro": 0.8253255619723288, + "eval_pred_class_0": 16670, + "eval_pred_class_1": 2998, + "eval_predicted_binding_ratio": 0.15243034370551148, + "eval_recall": 0.6855852950661077, + "eval_recall_macro": 0.8164752696130925, + "eval_runtime": 0.2654, + "eval_samples_per_second": 614.201, + "eval_steps_per_second": 3.768, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8164 + }, + { + "epoch": 315.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9320753325784678, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6963230466185161, + "eval_f1_macro": 0.8203369534620676, + "eval_loss": 0.2678229808807373, + "eval_pr_auc": 0.6914105766155315, + "eval_precision": 0.7091273821464393, + "eval_precision_macro": 0.8251819077788622, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815729439020512, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.375, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8190 + }, + { + "epoch": 316.0, + "eval_accuracy": 0.9061928004881025, + "eval_auc": 0.9321298151076297, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6967953985209532, + "eval_f1_macro": 0.8206541727499956, + "eval_loss": 0.26796844601631165, + "eval_pr_auc": 0.6912288183485439, + "eval_precision": 0.710455764075067, + "eval_precision_macro": 0.8258284574391159, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8157492835858855, + "eval_runtime": 0.2237, + "eval_samples_per_second": 728.567, + "eval_steps_per_second": 4.47, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8216 + }, + { + "epoch": 317.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9321722877330785, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6956235603817045, + "eval_f1_macro": 0.8199989231339035, + "eval_loss": 0.2681267261505127, + "eval_pr_auc": 0.6912998271961284, + "eval_precision": 0.7101108498488411, + "eval_precision_macro": 0.8254885924997606, + "eval_pred_class_0": 16691, + "eval_pred_class_1": 2977, + "eval_predicted_binding_ratio": 0.15136261948342486, + "eval_recall": 0.6817155756207675, + "eval_recall_macro": 0.8148120342038165, + "eval_runtime": 0.1877, + "eval_samples_per_second": 868.412, + "eval_steps_per_second": 5.328, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8242 + }, + { + "epoch": 318.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9321751880177678, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965664530967636, + "eval_f1_macro": 0.8205079551116469, + "eval_loss": 0.26805615425109863, + "eval_pr_auc": 0.6913470747613989, + "eval_precision": 0.709979906229069, + "eval_precision_macro": 0.8255870038278783, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6836504353434376, + "eval_recall_macro": 0.8156889226273535, + "eval_runtime": 0.2426, + "eval_samples_per_second": 671.932, + "eval_steps_per_second": 4.122, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8268 + }, + { + "epoch": 319.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9321843949617812, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6961380443714051, + "eval_f1_macro": 0.8202653471082613, + "eval_loss": 0.267932653427124, + "eval_pr_auc": 0.6916487257323465, + "eval_precision": 0.7097855227882037, + "eval_precision_macro": 0.8254333991308556, + "eval_pred_class_0": 16684, + "eval_pred_class_1": 2984, + "eval_predicted_binding_ratio": 0.15171852755745374, + "eval_recall": 0.6830054821025475, + "eval_recall_macro": 0.8153664460069084, + "eval_runtime": 0.2569, + "eval_samples_per_second": 634.38, + "eval_steps_per_second": 3.892, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8294 + }, + { + "epoch": 320.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.932188093311385, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6964373666064686, + "eval_f1_macro": 0.8204099902666875, + "eval_loss": 0.2679634094238281, + "eval_pr_auc": 0.6915655068510385, + "eval_precision": 0.7093645484949833, + "eval_precision_macro": 0.8253022526621696, + "eval_pred_class_0": 16678, + "eval_pred_class_1": 2990, + "eval_predicted_binding_ratio": 0.15202359162090706, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815759619499778, + "eval_runtime": 0.2483, + "eval_samples_per_second": 656.448, + "eval_steps_per_second": 4.027, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8320 + }, + { + "epoch": 321.0, + "eval_accuracy": 0.9058877364246491, + "eval_auc": 0.9321906237611137, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965076242006887, + "eval_f1_macro": 0.8204083386821658, + "eval_loss": 0.26790735125541687, + "eval_pr_auc": 0.6917736045731879, + "eval_precision": 0.7084723148765844, + "eval_precision_macro": 0.8249320182661266, + "eval_pred_class_0": 16670, + "eval_pred_class_1": 2998, + "eval_predicted_binding_ratio": 0.15243034370551148, + "eval_recall": 0.6849403418252177, + "eval_recall_macro": 0.8160924320341154, + "eval_runtime": 0.2593, + "eval_samples_per_second": 628.581, + "eval_steps_per_second": 3.856, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8346 + }, + { + "epoch": 322.0, + "eval_accuracy": 0.9058368924140736, + "eval_auc": 0.932275423024527, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6955950032873109, + "eval_f1_macro": 0.8199495526481063, + "eval_loss": 0.2680682837963104, + "eval_pr_auc": 0.6919499023976591, + "eval_precision": 0.709353000335233, + "eval_precision_macro": 0.8251589694514043, + "eval_pred_class_0": 16685, + "eval_pred_class_1": 2983, + "eval_predicted_binding_ratio": 0.15166768354687818, + "eval_recall": 0.6823605288616575, + "eval_recall_macro": 0.8150137889071974, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.703, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8372 + }, + { + "epoch": 323.0, + "eval_accuracy": 0.9059894244458003, + "eval_auc": 0.9322442887603632, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6965370096832431, + "eval_f1_macro": 0.8204581387495119, + "eval_loss": 0.26799651980400085, + "eval_pr_auc": 0.6918309607756195, + "eval_precision": 0.7092245989304813, + "eval_precision_macro": 0.8252587374599636, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8158906773307344, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.368, + "eval_steps_per_second": 3.965, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8398 + }, + { + "epoch": 324.0, + "eval_accuracy": 0.9058368924140736, + "eval_auc": 0.9322356073712934, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6960945191992124, + "eval_f1_macro": 0.8201909332654507, + "eval_loss": 0.2680003046989441, + "eval_pr_auc": 0.6918708088537314, + "eval_precision": 0.7086535248914133, + "eval_precision_macro": 0.8249414550993799, + "eval_pred_class_0": 16675, + "eval_pred_class_1": 2993, + "eval_predicted_binding_ratio": 0.15217612365263372, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.81566907806198, + "eval_runtime": 0.1979, + "eval_samples_per_second": 823.473, + "eval_steps_per_second": 5.052, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8424 + }, + { + "epoch": 325.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9322670238779269, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6961235216819974, + "eval_f1_macro": 0.8202405385118361, + "eval_loss": 0.26794886589050293, + "eval_pr_auc": 0.6921960622354616, + "eval_precision": 0.7094074322062269, + "eval_precision_macro": 0.8252690299332196, + "eval_pred_class_0": 16681, + "eval_pred_class_1": 2987, + "eval_predicted_binding_ratio": 0.1518710595891804, + "eval_recall": 0.6833279587229926, + "eval_recall_macro": 0.815467323358599, + "eval_runtime": 0.2683, + "eval_samples_per_second": 607.5, + "eval_steps_per_second": 3.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8450 + }, + { + "epoch": 326.0, + "eval_accuracy": 0.9059385804352247, + "eval_auc": 0.9322930096501425, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6963230466185161, + "eval_f1_macro": 0.8203369534620676, + "eval_loss": 0.2680268883705139, + "eval_pr_auc": 0.6921477872574622, + "eval_precision": 0.7091273821464393, + "eval_precision_macro": 0.8251819077788622, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6839729119638827, + "eval_recall_macro": 0.815729439020512, + "eval_runtime": 0.1733, + "eval_samples_per_second": 940.367, + "eval_steps_per_second": 5.769, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8476 + }, + { + "epoch": 326.9230769230769, + "grad_norm": 17241.076171875, + "learning_rate": 3.2282309449959705e-07, + "loss": 0.2047, + "step": 8500 + }, + { + "epoch": 327.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9323445529646195, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6967657199146281, + "eval_f1_macro": 0.820604246632002, + "eval_loss": 0.26804205775260925, + "eval_pr_auc": 0.6924439463998024, + "eval_precision": 0.7096989966555184, + "eval_precision_macro": 0.8254994563562998, + "eval_pred_class_0": 16678, + "eval_pred_class_1": 2990, + "eval_predicted_binding_ratio": 0.15202359162090706, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8159510382892665, + "eval_runtime": 0.1771, + "eval_samples_per_second": 920.47, + "eval_steps_per_second": 5.647, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8502 + }, + { + "epoch": 328.0, + "eval_accuracy": 0.9060911124669514, + "eval_auc": 0.9323743441439272, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6967657199146281, + "eval_f1_macro": 0.820604246632002, + "eval_loss": 0.26802849769592285, + "eval_pr_auc": 0.6925977669253861, + "eval_precision": 0.7096989966555184, + "eval_precision_macro": 0.8254994563562998, + "eval_pred_class_0": 16678, + "eval_pred_class_1": 2990, + "eval_predicted_binding_ratio": 0.15202359162090706, + "eval_recall": 0.6842953885843276, + "eval_recall_macro": 0.8159510382892665, + "eval_runtime": 0.1678, + "eval_samples_per_second": 971.421, + "eval_steps_per_second": 5.96, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8528 + }, + { + "epoch": 329.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.9323637649175607, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6983295119554537, + "eval_f1_macro": 0.8214488366277419, + "eval_loss": 0.2680566608905792, + "eval_pr_auc": 0.6924500045026715, + "eval_precision": 0.7094841930116472, + "eval_precision_macro": 0.825665699698526, + "eval_pred_class_0": 16663, + "eval_pred_class_1": 3005, + "eval_predicted_binding_ratio": 0.15278625177954036, + "eval_recall": 0.6875201547887778, + "eval_recall_macro": 0.8174125189951615, + "eval_runtime": 0.2683, + "eval_samples_per_second": 607.54, + "eval_steps_per_second": 3.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8554 + }, + { + "epoch": 330.0, + "eval_accuracy": 0.906243644498678, + "eval_auc": 0.9324242037360844, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6974072858549393, + "eval_f1_macro": 0.8209676462967013, + "eval_loss": 0.26804018020629883, + "eval_pr_auc": 0.6928556723686659, + "eval_precision": 0.7099899766120948, + "eval_precision_macro": 0.8257296209897056, + "eval_pred_class_0": 16675, + "eval_pred_class_1": 2993, + "eval_predicted_binding_ratio": 0.15217612365263372, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.8164347532199341, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.585, + "eval_steps_per_second": 3.869, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8580 + }, + { + "epoch": 331.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9323838917254041, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6981162981162982, + "eval_f1_macro": 0.8213280175544326, + "eval_loss": 0.2678423821926117, + "eval_pr_auc": 0.6929480254197629, + "eval_precision": 0.7093874833555259, + "eval_precision_macro": 0.8255890849326837, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6871976781683328, + "eval_recall_macro": 0.817251280684939, + "eval_runtime": 0.2474, + "eval_samples_per_second": 658.814, + "eval_steps_per_second": 4.042, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8606 + }, + { + "epoch": 332.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9323530591687079, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6993784756297023, + "eval_f1_macro": 0.8220268454242666, + "eval_loss": 0.26779934763908386, + "eval_pr_auc": 0.6928104051729911, + "eval_precision": 0.7095917690009956, + "eval_precision_macro": 0.8258856473344816, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183195878979646, + "eval_runtime": 0.2656, + "eval_samples_per_second": 613.815, + "eval_steps_per_second": 3.766, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8632 + }, + { + "epoch": 333.0, + "eval_accuracy": 0.9063961765304047, + "eval_auc": 0.932432009200248, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6983450761920367, + "eval_f1_macro": 0.8214741659959972, + "eval_loss": 0.2679860591888428, + "eval_pr_auc": 0.6929996618872077, + "eval_precision": 0.7098600932711525, + "eval_precision_macro": 0.8258288825890143, + "eval_pred_class_0": 16666, + "eval_pred_class_1": 3002, + "eval_predicted_binding_ratio": 0.1526337197478137, + "eval_recall": 0.6871976781683328, + "eval_recall_macro": 0.817311641643471, + "eval_runtime": 0.1802, + "eval_samples_per_second": 904.69, + "eval_steps_per_second": 5.55, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8658 + }, + { + "epoch": 334.0, + "eval_accuracy": 0.9063961765304047, + "eval_auc": 0.9325279911049631, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6975521603417119, + "eval_f1_macro": 0.8210910370116632, + "eval_loss": 0.26808273792266846, + "eval_pr_auc": 0.6933573725062909, + "eval_precision": 0.7109845947756196, + "eval_precision_macro": 0.8261792653772595, + "eval_pred_class_0": 16682, + "eval_pred_class_1": 2986, + "eval_predicted_binding_ratio": 0.15182021557860484, + "eval_recall": 0.6846178652047726, + "eval_recall_macro": 0.816263178995819, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.721, + "eval_steps_per_second": 3.827, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8684 + }, + { + "epoch": 335.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.932545276023111, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6976362442547603, + "eval_f1_macro": 0.8211138747443938, + "eval_loss": 0.26816996932029724, + "eval_pr_auc": 0.6932853419412803, + "eval_precision": 0.710464727515881, + "eval_precision_macro": 0.8259705061096825, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.816495114178466, + "eval_runtime": 0.254, + "eval_samples_per_second": 641.649, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8710 + }, + { + "epoch": 336.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9325305604846879, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6982772764561116, + "eval_f1_macro": 0.821477015533191, + "eval_loss": 0.2680180072784424, + "eval_pr_auc": 0.6935243427052671, + "eval_precision": 0.7107548430193721, + "eval_precision_macro": 0.8262002594609874, + "eval_pred_class_0": 16674, + "eval_pred_class_1": 2994, + "eval_predicted_binding_ratio": 0.15222696766320928, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8169788291091336, + "eval_runtime": 0.264, + "eval_samples_per_second": 617.515, + "eval_steps_per_second": 3.788, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8736 + }, + { + "epoch": 337.0, + "eval_accuracy": 0.9063453325198292, + "eval_auc": 0.932509878924404, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.698032786885246, + "eval_f1_macro": 0.8213054775682699, + "eval_loss": 0.2678954601287842, + "eval_pr_auc": 0.6935428752602653, + "eval_precision": 0.7099033011003668, + "eval_precision_macro": 0.8257957323787274, + "eval_pred_class_0": 16669, + "eval_pred_class_1": 2999, + "eval_predicted_binding_ratio": 0.15248118771608704, + "eval_recall": 0.6865527249274428, + "eval_recall_macro": 0.817019345502292, + "eval_runtime": 0.2614, + "eval_samples_per_second": 623.491, + "eval_steps_per_second": 3.825, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8762 + }, + { + "epoch": 338.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9325608674864403, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6989176779271893, + "eval_f1_macro": 0.8218398486513014, + "eval_loss": 0.26806220412254333, + "eval_pr_auc": 0.6935170525264562, + "eval_precision": 0.7110443777110443, + "eval_precision_macro": 0.8264297528888735, + "eval_pred_class_0": 16671, + "eval_pred_class_1": 2997, + "eval_predicted_binding_ratio": 0.15237949969493594, + "eval_recall": 0.6871976781683328, + "eval_recall_macro": 0.8174625440398011, + "eval_runtime": 0.1767, + "eval_samples_per_second": 922.3, + "eval_steps_per_second": 5.658, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8788 + }, + { + "epoch": 339.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9325807801793065, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6990163934426229, + "eval_f1_macro": 0.8218875444165816, + "eval_loss": 0.26803991198539734, + "eval_pr_auc": 0.6936355510877774, + "eval_precision": 0.7109036345448483, + "eval_precision_macro": 0.8263858865027319, + "eval_pred_class_0": 16669, + "eval_pred_class_1": 2999, + "eval_predicted_binding_ratio": 0.15248118771608704, + "eval_recall": 0.6875201547887778, + "eval_recall_macro": 0.8175936018707576, + "eval_runtime": 0.257, + "eval_samples_per_second": 634.235, + "eval_steps_per_second": 3.891, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8814 + }, + { + "epoch": 340.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9326051308916972, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6983762506150566, + "eval_f1_macro": 0.8215248382050282, + "eval_loss": 0.26792144775390625, + "eval_pr_auc": 0.6939264625162457, + "eval_precision": 0.7106141522029372, + "eval_precision_macro": 0.8261564043164398, + "eval_pred_class_0": 16672, + "eval_pred_class_1": 2996, + "eval_predicted_binding_ratio": 0.15232865568436038, + "eval_recall": 0.6865527249274428, + "eval_recall_macro": 0.8171098869400901, + "eval_runtime": 0.2744, + "eval_samples_per_second": 593.925, + "eval_steps_per_second": 3.644, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8840 + }, + { + "epoch": 341.0, + "eval_accuracy": 0.9063961765304047, + "eval_auc": 0.9325905710732574, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6980482204362801, + "eval_f1_macro": 0.821330737974691, + "eval_loss": 0.26793381571769714, + "eval_pr_auc": 0.6938155556406347, + "eval_precision": 0.7102803738317757, + "eval_precision_macro": 0.825959524727788, + "eval_pred_class_0": 16672, + "eval_pred_class_1": 2996, + "eval_predicted_binding_ratio": 0.15232865568436038, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8169184681506015, + "eval_runtime": 0.2601, + "eval_samples_per_second": 626.704, + "eval_steps_per_second": 3.845, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8866 + }, + { + "epoch": 342.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9325911160931989, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997542997542997, + "eval_f1_macro": 0.8222974802915219, + "eval_loss": 0.26800957322120667, + "eval_pr_auc": 0.6936597261010234, + "eval_precision": 0.711051930758988, + "eval_precision_macro": 0.8265713326382553, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8182083746323816, + "eval_runtime": 0.1766, + "eval_samples_per_second": 923.118, + "eval_steps_per_second": 5.663, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8892 + }, + { + "epoch": 343.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9326288587241547, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6982772764561116, + "eval_f1_macro": 0.821477015533191, + "eval_loss": 0.2680290937423706, + "eval_pr_auc": 0.6938544134850919, + "eval_precision": 0.7107548430193721, + "eval_precision_macro": 0.8262002594609874, + "eval_pred_class_0": 16674, + "eval_pred_class_1": 2994, + "eval_predicted_binding_ratio": 0.15222696766320928, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8169788291091336, + "eval_runtime": 0.2553, + "eval_samples_per_second": 638.353, + "eval_steps_per_second": 3.916, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8918 + }, + { + "epoch": 344.0, + "eval_accuracy": 0.9064470205409803, + "eval_auc": 0.9326493359019604, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6982617251557888, + "eval_f1_macro": 0.8214517001734176, + "eval_loss": 0.2680736482143402, + "eval_pr_auc": 0.6938105747317047, + "eval_precision": 0.710377043710377, + "eval_precision_macro": 0.8260361014844849, + "eval_pred_class_0": 16671, + "eval_pred_class_1": 2997, + "eval_predicted_binding_ratio": 0.15237949969493594, + "eval_recall": 0.6865527249274428, + "eval_recall_macro": 0.8170797064608241, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.605, + "eval_steps_per_second": 3.869, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8944 + }, + { + "epoch": 345.0, + "eval_accuracy": 0.9063961765304047, + "eval_auc": 0.9326923535473509, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6977507798391068, + "eval_f1_macro": 0.8211870157279457, + "eval_loss": 0.2681424021720886, + "eval_pr_auc": 0.6938928037843046, + "eval_precision": 0.7107023411371237, + "eval_precision_macro": 0.8260910674386901, + "eval_pred_class_0": 16678, + "eval_pred_class_1": 2990, + "eval_predicted_binding_ratio": 0.15202359162090706, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.816525294657732, + "eval_runtime": 0.2453, + "eval_samples_per_second": 664.523, + "eval_steps_per_second": 4.077, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8970 + }, + { + "epoch": 346.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9327347288478101, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6979799638692724, + "eval_f1_macro": 0.821333351261192, + "eval_loss": 0.2682046592235565, + "eval_pr_auc": 0.6939995353512864, + "eval_precision": 0.7111780455153949, + "eval_precision_macro": 0.8263324280334768, + "eval_pred_class_0": 16680, + "eval_pred_class_1": 2988, + "eval_predicted_binding_ratio": 0.15192190359975594, + "eval_recall": 0.6852628184456627, + "eval_recall_macro": 0.8165856556162641, + "eval_runtime": 0.2656, + "eval_samples_per_second": 613.642, + "eval_steps_per_second": 3.765, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 8996 + }, + { + "epoch": 346.15384615384613, + "grad_norm": 18666.783203125, + "learning_rate": 2.618336781094791e-07, + "loss": 0.2031, + "step": 9000 + }, + { + "epoch": 347.0, + "eval_accuracy": 0.9062944885092536, + "eval_auc": 0.9326942611171465, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6983139630054018, + "eval_f1_macro": 0.8214235117341392, + "eval_loss": 0.2681069076061249, + "eval_pr_auc": 0.6938781737586003, + "eval_precision": 0.7091090425531915, + "eval_precision_macro": 0.8255029006283365, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6878426314092229, + "eval_recall_macro": 0.817513396346852, + "eval_runtime": 0.1933, + "eval_samples_per_second": 843.456, + "eval_steps_per_second": 5.175, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9022 + }, + { + "epoch": 348.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9326940859321652, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6992961204779833, + "eval_f1_macro": 0.8220048784892098, + "eval_loss": 0.26795056462287903, + "eval_pr_auc": 0.6940722215736992, + "eval_precision": 0.7101063829787234, + "eval_precision_macro": 0.8260916068555082, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8180876527153176, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.287, + "eval_steps_per_second": 3.842, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9048 + }, + { + "epoch": 349.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9327572303853989, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991310050828005, + "eval_f1_macro": 0.8219607247335354, + "eval_loss": 0.267974317073822, + "eval_pr_auc": 0.6943572217621294, + "eval_precision": 0.7111407605070047, + "eval_precision_macro": 0.8265061930909349, + "eval_pred_class_0": 16670, + "eval_pred_class_1": 2998, + "eval_predicted_binding_ratio": 0.15243034370551148, + "eval_recall": 0.6875201547887778, + "eval_recall_macro": 0.8176237823500236, + "eval_runtime": 0.2638, + "eval_samples_per_second": 617.819, + "eval_steps_per_second": 3.79, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9074 + }, + { + "epoch": 350.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9327859996523161, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6985064828491712, + "eval_f1_macro": 0.8216233644580062, + "eval_loss": 0.2680947780609131, + "eval_pr_auc": 0.6943020020881897, + "eval_precision": 0.7112299465240641, + "eval_precision_macro": 0.8264413105131714, + "eval_pred_class_0": 16676, + "eval_pred_class_1": 2992, + "eval_predicted_binding_ratio": 0.15212527964205816, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8170391900676656, + "eval_runtime": 0.2408, + "eval_samples_per_second": 676.86, + "eval_steps_per_second": 4.153, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9100 + }, + { + "epoch": 351.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9328183504788495, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6986211424819435, + "eval_f1_macro": 0.8216965657061384, + "eval_loss": 0.2681223750114441, + "eval_pr_auc": 0.694450730809475, + "eval_precision": 0.7114677365429622, + "eval_precision_macro": 0.8265619548577976, + "eval_pred_class_0": 16677, + "eval_pred_class_1": 2991, + "eval_predicted_binding_ratio": 0.15207443563148262, + "eval_recall": 0.6862302483069977, + "eval_recall_macro": 0.8170693705469316, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.919, + "eval_steps_per_second": 3.901, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9126 + }, + { + "epoch": 352.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9328099415997506, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991310050828005, + "eval_f1_macro": 0.8219607247335354, + "eval_loss": 0.26799651980400085, + "eval_pr_auc": 0.6945293388795055, + "eval_precision": 0.7111407605070047, + "eval_precision_macro": 0.8265061930909349, + "eval_pred_class_0": 16670, + "eval_pred_class_1": 2998, + "eval_predicted_binding_ratio": 0.15243034370551148, + "eval_recall": 0.6875201547887778, + "eval_recall_macro": 0.8176237823500236, + "eval_runtime": 0.1729, + "eval_samples_per_second": 942.612, + "eval_steps_per_second": 5.783, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9152 + }, + { + "epoch": 353.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9328013186056745, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000819000819001, + "eval_f1_macro": 0.8224913728389398, + "eval_loss": 0.26793336868286133, + "eval_pr_auc": 0.6945633927009858, + "eval_precision": 0.7113848202396804, + "eval_precision_macro": 0.8267677821793697, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8183997934218701, + "eval_runtime": 0.2553, + "eval_samples_per_second": 638.431, + "eval_steps_per_second": 3.917, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9178 + }, + { + "epoch": 354.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.932783060437631, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26802781224250793, + "eval_pr_auc": 0.694307607889245, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.039, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9204 + }, + { + "epoch": 355.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9327856687473517, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997875469848014, + "eval_f1_macro": 0.8222422697443199, + "eval_loss": 0.2679577171802521, + "eval_pr_auc": 0.6944152708065687, + "eval_precision": 0.7094102054340623, + "eval_precision_macro": 0.8258762738882024, + "eval_pred_class_0": 16650, + "eval_pred_class_1": 3018, + "eval_predicted_binding_ratio": 0.15344722391702256, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8187429418701, + "eval_runtime": 0.1868, + "eval_samples_per_second": 872.379, + "eval_steps_per_second": 5.352, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9230 + }, + { + "epoch": 356.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9328543023299973, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000490918016691, + "eval_f1_macro": 0.8224398957879677, + "eval_loss": 0.26801130175590515, + "eval_pr_auc": 0.6946173571124205, + "eval_precision": 0.7106312292358804, + "eval_precision_macro": 0.8264405996101362, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8186015481252511, + "eval_runtime": 0.1679, + "eval_samples_per_second": 971.014, + "eval_steps_per_second": 5.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9256 + }, + { + "epoch": 357.0, + "eval_accuracy": 0.9069554606467358, + "eval_auc": 0.9328912663610364, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002947920078612, + "eval_f1_macro": 0.8226120363891247, + "eval_loss": 0.26803824305534363, + "eval_pr_auc": 0.6948368351435844, + "eval_precision": 0.7114808652246256, + "eval_precision_macro": 0.8268440754137292, + "eval_pred_class_0": 16663, + "eval_pred_class_1": 3005, + "eval_predicted_binding_ratio": 0.15278625177954036, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8185610317320926, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.12, + "eval_steps_per_second": 3.841, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9282 + }, + { + "epoch": 358.0, + "eval_accuracy": 0.9069554606467358, + "eval_auc": 0.9328766286826047, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003929273084479, + "eval_f1_macro": 0.8226594466805872, + "eval_loss": 0.2680445909500122, + "eval_pr_auc": 0.6947543185413181, + "eval_precision": 0.711340206185567, + "eval_precision_macro": 0.8268002873554328, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8186920895630492, + "eval_runtime": 0.2229, + "eval_samples_per_second": 731.38, + "eval_steps_per_second": 4.487, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9308 + }, + { + "epoch": 359.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9328791980623294, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002617801047121, + "eval_f1_macro": 0.8225604590386311, + "eval_loss": 0.2679673135280609, + "eval_pr_auc": 0.694877850403851, + "eval_precision": 0.7107273331119229, + "eval_precision_macro": 0.8265169354519211, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187627864354736, + "eval_runtime": 0.1766, + "eval_samples_per_second": 922.941, + "eval_steps_per_second": 5.662, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9334 + }, + { + "epoch": 360.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9328811640271188, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700228832951945, + "eval_f1_macro": 0.8225089013937882, + "eval_loss": 0.2680214047431946, + "eval_pr_auc": 0.6947522638420174, + "eval_precision": 0.7099767981438515, + "eval_precision_macro": 0.8261913298268353, + "eval_pred_class_0": 16651, + "eval_pred_class_1": 3017, + "eval_predicted_binding_ratio": 0.15339637990644703, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189645411388546, + "eval_runtime": 0.2235, + "eval_samples_per_second": 729.222, + "eval_steps_per_second": 4.474, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9360 + }, + { + "epoch": 361.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.932900882070006, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000163478829492, + "eval_f1_macro": 0.8223884382480462, + "eval_loss": 0.26796379685401917, + "eval_pr_auc": 0.6949176089109205, + "eval_precision": 0.7098806366047745, + "eval_precision_macro": 0.8261149519800236, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188033028286321, + "eval_runtime": 0.1694, + "eval_samples_per_second": 962.09, + "eval_steps_per_second": 5.902, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9386 + }, + { + "epoch": 362.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9329447561753046, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7005723630417008, + "eval_f1_macro": 0.8227283114988764, + "eval_loss": 0.26795387268066406, + "eval_pr_auc": 0.6950891938157696, + "eval_precision": 0.7106834771068348, + "eval_precision_macro": 0.8265498567232265, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190550825766527, + "eval_runtime": 0.2571, + "eval_samples_per_second": 633.968, + "eval_steps_per_second": 3.889, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9412 + }, + { + "epoch": 363.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9329731556072616, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7001636661211129, + "eval_f1_macro": 0.8225130616165066, + "eval_loss": 0.26798126101493835, + "eval_pr_auc": 0.6952376733836106, + "eval_precision": 0.7108673978065803, + "eval_precision_macro": 0.8265604171937038, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818631728604517, + "eval_runtime": 0.2635, + "eval_samples_per_second": 618.549, + "eval_steps_per_second": 3.795, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9438 + }, + { + "epoch": 364.0, + "eval_accuracy": 0.9070063046573114, + "eval_auc": 0.9329669754704237, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7008994276369583, + "eval_f1_macro": 0.8229219452383643, + "eval_loss": 0.26792433857917786, + "eval_pr_auc": 0.6952423959524336, + "eval_precision": 0.7110152621101526, + "eval_precision_macro": 0.8267457720422265, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.8192465013661412, + "eval_runtime": 0.2599, + "eval_samples_per_second": 627.085, + "eval_steps_per_second": 3.847, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9464 + }, + { + "epoch": 365.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9329833552661686, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003598298985934, + "eval_f1_macro": 0.8226078241660808, + "eval_loss": 0.2679993808269501, + "eval_pr_auc": 0.6952272078548911, + "eval_precision": 0.7105874543644208, + "eval_precision_macro": 0.8264735530603251, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188938442664302, + "eval_runtime": 0.2598, + "eval_samples_per_second": 627.317, + "eval_steps_per_second": 3.849, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9490 + }, + { + "epoch": 365.38461538461536, + "grad_norm": 18768.416015625, + "learning_rate": 2.0513069380006943e-07, + "loss": 0.2014, + "step": 9500 + }, + { + "epoch": 366.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9330138179879044, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003763704794632, + "eval_f1_macro": 0.822633632944773, + "eval_loss": 0.26798829436302185, + "eval_pr_auc": 0.6953770669982303, + "eval_precision": 0.7109634551495017, + "eval_precision_macro": 0.8266367281750631, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187929669147396, + "eval_runtime": 0.198, + "eval_samples_per_second": 823.426, + "eval_steps_per_second": 5.052, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9516 + }, + { + "epoch": 367.0, + "eval_accuracy": 0.9069554606467358, + "eval_auc": 0.9330281442263691, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7005890052356021, + "eval_f1_macro": 0.8227541703278901, + "eval_loss": 0.2679848372936249, + "eval_pr_auc": 0.6954512195878219, + "eval_precision": 0.7110594486881435, + "eval_precision_macro": 0.8267130106501293, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189542052249621, + "eval_runtime": 0.265, + "eval_samples_per_second": 615.184, + "eval_steps_per_second": 3.774, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9542 + }, + { + "epoch": 368.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9330502564639999, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.2679852545261383, + "eval_pr_auc": 0.6955032656447977, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.2572, + "eval_samples_per_second": 633.651, + "eval_steps_per_second": 3.887, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9568 + }, + { + "epoch": 369.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9330669768972081, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.2680181562900543, + "eval_pr_auc": 0.6955153529738297, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.2378, + "eval_samples_per_second": 685.421, + "eval_steps_per_second": 4.205, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9594 + }, + { + "epoch": 370.0, + "eval_accuracy": 0.9070063046573114, + "eval_auc": 0.9330907631246593, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7008015704236872, + "eval_f1_macro": 0.8228746737830142, + "eval_loss": 0.26799651980400085, + "eval_pr_auc": 0.6956635287744017, + "eval_precision": 0.7111553784860558, + "eval_precision_macro": 0.8267892646512892, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8191154435351846, + "eval_runtime": 0.1746, + "eval_samples_per_second": 933.529, + "eval_steps_per_second": 5.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9620 + }, + { + "epoch": 371.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9331252940309591, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6999672453324599, + "eval_f1_macro": 0.8224181697622275, + "eval_loss": 0.26801252365112305, + "eval_pr_auc": 0.6958269523241747, + "eval_precision": 0.7111480865224625, + "eval_precision_macro": 0.8266476794611952, + "eval_pred_class_0": 16663, + "eval_pred_class_1": 3005, + "eval_predicted_binding_ratio": 0.15278625177954036, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8183696129426041, + "eval_runtime": 0.2596, + "eval_samples_per_second": 627.791, + "eval_steps_per_second": 3.851, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9646 + }, + { + "epoch": 372.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9331262672808548, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7005723630417008, + "eval_f1_macro": 0.8227283114988764, + "eval_loss": 0.26803725957870483, + "eval_pr_auc": 0.6957878915651574, + "eval_precision": 0.7106834771068348, + "eval_precision_macro": 0.8265498567232265, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190550825766527, + "eval_runtime": 0.2654, + "eval_samples_per_second": 614.266, + "eval_steps_per_second": 3.769, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9672 + }, + { + "epoch": 373.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9331573528825239, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699607329842932, + "eval_f1_macro": 0.8221730364601127, + "eval_loss": 0.26807889342308044, + "eval_pr_auc": 0.6959044832644976, + "eval_precision": 0.7100631019594819, + "eval_precision_macro": 0.8261247850555049, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183799488564967, + "eval_runtime": 0.2659, + "eval_samples_per_second": 613.113, + "eval_steps_per_second": 3.761, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9698 + }, + { + "epoch": 374.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9331759224905339, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6996235063021771, + "eval_f1_macro": 0.8221986674075668, + "eval_loss": 0.2681734561920166, + "eval_pr_auc": 0.695850524773773, + "eval_precision": 0.710438829787234, + "eval_precision_macro": 0.8262878422645654, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.818279071504806, + "eval_runtime": 0.2603, + "eval_samples_per_second": 626.284, + "eval_steps_per_second": 3.842, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9724 + }, + { + "epoch": 375.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933143698186487, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700228832951945, + "eval_f1_macro": 0.8225089013937882, + "eval_loss": 0.2681812345981598, + "eval_pr_auc": 0.6956723886102156, + "eval_precision": 0.7099767981438515, + "eval_precision_macro": 0.8261913298268353, + "eval_pred_class_0": 16651, + "eval_pred_class_1": 3017, + "eval_predicted_binding_ratio": 0.15339637990644703, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189645411388546, + "eval_runtime": 0.1783, + "eval_samples_per_second": 914.221, + "eval_steps_per_second": 5.609, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9750 + }, + { + "epoch": 376.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9331405935193198, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002123835974514, + "eval_f1_macro": 0.8224831299290885, + "eval_loss": 0.2681320309638977, + "eval_pr_auc": 0.6957066245846253, + "eval_precision": 0.7096026490066225, + "eval_precision_macro": 0.8260290996114323, + "eval_pred_class_0": 16648, + "eval_pred_class_1": 3020, + "eval_predicted_binding_ratio": 0.1535489119381737, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.819065418490545, + "eval_runtime": 0.2617, + "eval_samples_per_second": 622.75, + "eval_steps_per_second": 3.821, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9776 + }, + { + "epoch": 377.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9331510656881976, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7010443864229765, + "eval_f1_macro": 0.8229384784439624, + "eval_loss": 0.2682173550128937, + "eval_pr_auc": 0.6955658216411763, + "eval_precision": 0.709613478691774, + "eval_precision_macro": 0.8261726428372638, + "eval_pred_class_0": 16641, + "eval_pred_class_1": 3027, + "eval_predicted_binding_ratio": 0.15390482001220257, + "eval_recall": 0.6926797807158981, + "eval_recall_macro": 0.8198112490831255, + "eval_runtime": 0.2446, + "eval_samples_per_second": 666.285, + "eval_steps_per_second": 4.088, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9802 + }, + { + "epoch": 378.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9331855381995037, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994767822105952, + "eval_f1_macro": 0.8220743333087894, + "eval_loss": 0.2681441009044647, + "eval_pr_auc": 0.6959256270686769, + "eval_precision": 0.709452736318408, + "eval_precision_macro": 0.8258426835378145, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8184506457289211, + "eval_runtime": 0.2608, + "eval_samples_per_second": 625.04, + "eval_steps_per_second": 3.835, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9828 + }, + { + "epoch": 379.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9331895674540718, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003267973856209, + "eval_f1_macro": 0.8225562214288413, + "eval_loss": 0.26809969544410706, + "eval_pr_auc": 0.6959934558189067, + "eval_precision": 0.7098376946008612, + "eval_precision_macro": 0.8261483505739005, + "eval_pred_class_0": 16649, + "eval_pred_class_1": 3019, + "eval_predicted_binding_ratio": 0.15349806792759813, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.8190955989698111, + "eval_runtime": 0.2612, + "eval_samples_per_second": 624.005, + "eval_steps_per_second": 3.828, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9854 + }, + { + "epoch": 380.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9332175581210724, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994928840176673, + "eval_f1_macro": 0.8220999320609059, + "eval_loss": 0.26810789108276367, + "eval_pr_auc": 0.6960299382483204, + "eval_precision": 0.7098273572377158, + "eval_precision_macro": 0.8260051771779358, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183497683772306, + "eval_runtime": 0.1782, + "eval_samples_per_second": 914.547, + "eval_steps_per_second": 5.611, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9880 + }, + { + "epoch": 381.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332336167443518, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699607329842932, + "eval_f1_macro": 0.8221730364601127, + "eval_loss": 0.2680502235889435, + "eval_pr_auc": 0.6962912618518755, + "eval_precision": 0.7100631019594819, + "eval_precision_macro": 0.8261247850555049, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183799488564967, + "eval_runtime": 0.2534, + "eval_samples_per_second": 643.169, + "eval_steps_per_second": 3.946, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9906 + }, + { + "epoch": 382.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332394951737218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994106090373281, + "eval_f1_macro": 0.8220780022434744, + "eval_loss": 0.26803064346313477, + "eval_pr_auc": 0.696271279785013, + "eval_precision": 0.71034253408713, + "eval_precision_macro": 0.8262114206958068, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8181178331945835, + "eval_runtime": 0.1775, + "eval_samples_per_second": 918.497, + "eval_steps_per_second": 5.635, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9932 + }, + { + "epoch": 383.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332396703587031, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.2680654227733612, + "eval_pr_auc": 0.6962410474404584, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.2478, + "eval_samples_per_second": 657.85, + "eval_steps_per_second": 4.036, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9958 + }, + { + "epoch": 384.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332586000691747, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995090016366612, + "eval_f1_macro": 0.8221255355501671, + "eval_loss": 0.26804468035697937, + "eval_pr_auc": 0.6963737898708651, + "eval_precision": 0.7102027251578598, + "eval_precision_macro": 0.8261680532566416, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8182488910255401, + "eval_runtime": 0.1642, + "eval_samples_per_second": 992.895, + "eval_steps_per_second": 6.091, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 9984 + }, + { + "epoch": 384.61538461538464, + "grad_norm": 19506.416015625, + "learning_rate": 1.5373466155541264e-07, + "loss": 0.1999, + "step": 10000 + }, + { + "epoch": 385.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9332885664334637, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6990990990990991, + "eval_f1_macro": 0.8219096951966862, + "eval_loss": 0.26817384362220764, + "eval_pr_auc": 0.6963813810367415, + "eval_precision": 0.7103861517976032, + "eval_precision_macro": 0.8261784335560267, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6881651080296678, + "eval_recall_macro": 0.8178255370534045, + "eval_runtime": 0.2609, + "eval_samples_per_second": 624.846, + "eval_steps_per_second": 3.833, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10010 + }, + { + "epoch": 386.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9332890919884074, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995251350908793, + "eval_f1_macro": 0.8221511437890823, + "eval_loss": 0.2681432366371155, + "eval_pr_auc": 0.6963686032935126, + "eval_precision": 0.7105788423153693, + "eval_precision_macro": 0.8263313128873689, + "eval_pred_class_0": 16662, + "eval_pred_class_1": 3006, + "eval_predicted_binding_ratio": 0.15283709579011592, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8181480136738496, + "eval_runtime": 0.2677, + "eval_samples_per_second": 609.003, + "eval_steps_per_second": 3.736, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10036 + }, + { + "epoch": 387.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9332816368942063, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6992961204779833, + "eval_f1_macro": 0.8220048784892098, + "eval_loss": 0.2681044936180115, + "eval_pr_auc": 0.6963340299375516, + "eval_precision": 0.7101063829787234, + "eval_precision_macro": 0.8260916068555082, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8180876527153176, + "eval_runtime": 0.2503, + "eval_samples_per_second": 651.154, + "eval_steps_per_second": 3.995, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10062 + }, + { + "epoch": 388.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9332853547088078, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699607329842932, + "eval_f1_macro": 0.8221730364601127, + "eval_loss": 0.26804089546203613, + "eval_pr_auc": 0.6964477494759991, + "eval_precision": 0.7100631019594819, + "eval_precision_macro": 0.8261247850555049, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183799488564967, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.349, + "eval_steps_per_second": 3.794, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10088 + }, + { + "epoch": 389.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9333026785569515, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997218131238749, + "eval_f1_macro": 0.8222461586311625, + "eval_loss": 0.26805874705314636, + "eval_pr_auc": 0.6965416515768459, + "eval_precision": 0.7102990033222591, + "eval_precision_macro": 0.8262444710452093, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184101293357626, + "eval_runtime": 0.1895, + "eval_samples_per_second": 860.093, + "eval_steps_per_second": 5.277, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10114 + }, + { + "epoch": 390.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9332940944928713, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26800334453582764, + "eval_pr_auc": 0.6965549091166009, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2601, + "eval_samples_per_second": 626.7, + "eval_steps_per_second": 3.845, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10140 + }, + { + "epoch": 391.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9332915835081403, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699591169255928, + "eval_f1_macro": 0.8221474102804127, + "eval_loss": 0.26801303029060364, + "eval_pr_auc": 0.69649915263674, + "eval_precision": 0.7096881220968813, + "eval_precision_macro": 0.8259621107662262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8184808262081871, + "eval_runtime": 0.2322, + "eval_samples_per_second": 702.068, + "eval_steps_per_second": 4.307, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10166 + }, + { + "epoch": 392.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9333394576705105, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2680598199367523, + "eval_pr_auc": 0.6966844521188784, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2619, + "eval_samples_per_second": 622.281, + "eval_steps_per_second": 3.818, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10192 + }, + { + "epoch": 393.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9333311363839021, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26812389492988586, + "eval_pr_auc": 0.696579843318821, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2606, + "eval_samples_per_second": 625.459, + "eval_steps_per_second": 3.837, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10218 + }, + { + "epoch": 394.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9333747574442278, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6990990990990991, + "eval_f1_macro": 0.8219096951966862, + "eval_loss": 0.26815417408943176, + "eval_pr_auc": 0.6966758102563304, + "eval_precision": 0.7103861517976032, + "eval_precision_macro": 0.8261784335560267, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6881651080296678, + "eval_recall_macro": 0.8178255370534045, + "eval_runtime": 0.2489, + "eval_samples_per_second": 654.797, + "eval_steps_per_second": 4.017, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10244 + }, + { + "epoch": 395.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.93337473797923, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994106090373281, + "eval_f1_macro": 0.8220780022434744, + "eval_loss": 0.26812419295310974, + "eval_pr_auc": 0.6967071460749926, + "eval_precision": 0.71034253408713, + "eval_precision_macro": 0.8262114206958068, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8181178331945835, + "eval_runtime": 0.2546, + "eval_samples_per_second": 640.225, + "eval_steps_per_second": 3.928, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10270 + }, + { + "epoch": 396.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9333609762257046, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6966270582247817, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.1654, + "eval_samples_per_second": 985.392, + "eval_steps_per_second": 6.045, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10296 + }, + { + "epoch": 397.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9333962857319209, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997218131238749, + "eval_f1_macro": 0.8222461586311625, + "eval_loss": 0.268099308013916, + "eval_pr_auc": 0.6968255966064625, + "eval_precision": 0.7102990033222591, + "eval_precision_macro": 0.8262444710452093, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184101293357626, + "eval_runtime": 0.1974, + "eval_samples_per_second": 825.595, + "eval_steps_per_second": 5.065, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10322 + }, + { + "epoch": 398.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9334254248337986, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6987714987714988, + "eval_f1_macro": 0.8217158026492684, + "eval_loss": 0.26809167861938477, + "eval_pr_auc": 0.6970114900505864, + "eval_precision": 0.7100532623169108, + "eval_precision_macro": 0.8259819840149124, + "eval_pred_class_0": 16664, + "eval_pred_class_1": 3004, + "eval_predicted_binding_ratio": 0.15273540776896483, + "eval_recall": 0.6878426314092229, + "eval_recall_macro": 0.8176341182639161, + "eval_runtime": 0.1847, + "eval_samples_per_second": 882.621, + "eval_steps_per_second": 5.415, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10348 + }, + { + "epoch": 399.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9334154587548664, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994928840176673, + "eval_f1_macro": 0.8220999320609059, + "eval_loss": 0.26815035939216614, + "eval_pr_auc": 0.6968381832516852, + "eval_precision": 0.7098273572377158, + "eval_precision_macro": 0.8260051771779358, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183497683772306, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.281, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10374 + }, + { + "epoch": 400.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9334343592678412, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6991976420501065, + "eval_f1_macro": 0.821957303073866, + "eval_loss": 0.26815110445022583, + "eval_pr_auc": 0.6969254623478301, + "eval_precision": 0.7102461743180306, + "eval_precision_macro": 0.826134970486347, + "eval_pred_class_0": 16662, + "eval_pred_class_1": 3006, + "eval_predicted_binding_ratio": 0.15283709579011592, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.817956594884361, + "eval_runtime": 0.2607, + "eval_samples_per_second": 625.319, + "eval_steps_per_second": 3.836, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10400 + }, + { + "epoch": 401.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9334508655860725, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6992961204779833, + "eval_f1_macro": 0.8220048784892098, + "eval_loss": 0.2681412398815155, + "eval_pr_auc": 0.6970213943546584, + "eval_precision": 0.7101063829787234, + "eval_precision_macro": 0.8260916068555082, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8180876527153176, + "eval_runtime": 0.2502, + "eval_samples_per_second": 651.598, + "eval_steps_per_second": 3.998, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10426 + }, + { + "epoch": 402.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9334578048578289, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6996235063021771, + "eval_f1_macro": 0.8221986674075668, + "eval_loss": 0.26816073060035706, + "eval_pr_auc": 0.697032431219364, + "eval_precision": 0.710438829787234, + "eval_precision_macro": 0.8262878422645654, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.818279071504806, + "eval_runtime": 0.2131, + "eval_samples_per_second": 764.767, + "eval_steps_per_second": 4.692, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10452 + }, + { + "epoch": 403.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9334644910846125, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6993945344460808, + "eval_f1_macro": 0.8220524214743572, + "eval_loss": 0.2681373655796051, + "eval_pr_auc": 0.6971117657586055, + "eval_precision": 0.7099667774086379, + "eval_precision_macro": 0.8260483424802825, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8182187105462742, + "eval_runtime": 0.1917, + "eval_samples_per_second": 850.439, + "eval_steps_per_second": 5.217, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10478 + }, + { + "epoch": 403.84615384615387, + "grad_norm": 20065.328125, + "learning_rate": 1.0857058873879127e-07, + "loss": 0.1991, + "step": 10500 + }, + { + "epoch": 404.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9334760435608745, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6994106090373281, + "eval_f1_macro": 0.8220780022434744, + "eval_loss": 0.26816511154174805, + "eval_pr_auc": 0.6971952135976213, + "eval_precision": 0.71034253408713, + "eval_precision_macro": 0.8262114206958068, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6888100612705579, + "eval_recall_macro": 0.8181178331945835, + "eval_runtime": 0.2491, + "eval_samples_per_second": 654.311, + "eval_steps_per_second": 4.014, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10504 + }, + { + "epoch": 405.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9334837419675497, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997218131238749, + "eval_f1_macro": 0.8222461586311625, + "eval_loss": 0.26816821098327637, + "eval_pr_auc": 0.6972179050703514, + "eval_precision": 0.7102990033222591, + "eval_precision_macro": 0.8262444710452093, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184101293357626, + "eval_runtime": 0.2552, + "eval_samples_per_second": 638.823, + "eval_steps_per_second": 3.919, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10530 + }, + { + "epoch": 406.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9334893478869489, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997218131238749, + "eval_f1_macro": 0.8222461586311625, + "eval_loss": 0.2681950330734253, + "eval_pr_auc": 0.6972103120395237, + "eval_precision": 0.7102990033222591, + "eval_precision_macro": 0.8262444710452093, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184101293357626, + "eval_runtime": 0.2003, + "eval_samples_per_second": 813.636, + "eval_steps_per_second": 4.992, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10556 + }, + { + "epoch": 407.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9334909050867821, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998363338788871, + "eval_f1_macro": 0.822319298583337, + "eval_loss": 0.2681744396686554, + "eval_pr_auc": 0.6972816477778223, + "eval_precision": 0.71053506148222, + "eval_precision_macro": 0.8263642352251727, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184403098150286, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.723, + "eval_steps_per_second": 4.004, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10582 + }, + { + "epoch": 408.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.933494973271346, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6995090016366612, + "eval_f1_macro": 0.8221255355501671, + "eval_loss": 0.2681480348110199, + "eval_pr_auc": 0.697380910091478, + "eval_precision": 0.7102027251578598, + "eval_precision_macro": 0.8261680532566416, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8182488910255401, + "eval_runtime": 0.2536, + "eval_samples_per_second": 642.704, + "eval_steps_per_second": 3.943, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10608 + }, + { + "epoch": 409.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9334896009319218, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2680969536304474, + "eval_pr_auc": 0.6974108729960042, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2261, + "eval_samples_per_second": 721.003, + "eval_steps_per_second": 4.423, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10634 + }, + { + "epoch": 410.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9334926569365942, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699607329842932, + "eval_f1_macro": 0.8221730364601127, + "eval_loss": 0.26808932423591614, + "eval_pr_auc": 0.6974463532877748, + "eval_precision": 0.7100631019594819, + "eval_precision_macro": 0.8261247850555049, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8183799488564967, + "eval_runtime": 0.2604, + "eval_samples_per_second": 625.923, + "eval_steps_per_second": 3.84, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10660 + }, + { + "epoch": 411.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9334912943867403, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2680690288543701, + "eval_pr_auc": 0.6974656777279113, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.181, + "eval_samples_per_second": 900.801, + "eval_steps_per_second": 5.526, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10686 + }, + { + "epoch": 412.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9334969197711376, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000490918016691, + "eval_f1_macro": 0.8224398957879677, + "eval_loss": 0.26803234219551086, + "eval_pr_auc": 0.6975509824558107, + "eval_precision": 0.7106312292358804, + "eval_precision_macro": 0.8264405996101362, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8186015481252511, + "eval_runtime": 0.1785, + "eval_samples_per_second": 913.403, + "eval_steps_per_second": 5.604, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10712 + }, + { + "epoch": 413.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9334973090710958, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7000490918016691, + "eval_f1_macro": 0.8224398957879677, + "eval_loss": 0.26804181933403015, + "eval_pr_auc": 0.697539016898834, + "eval_precision": 0.7106312292358804, + "eval_precision_macro": 0.8264405996101362, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8186015481252511, + "eval_runtime": 0.1858, + "eval_samples_per_second": 877.163, + "eval_steps_per_second": 5.381, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10738 + }, + { + "epoch": 414.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9335163069090602, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998363338788871, + "eval_f1_macro": 0.822319298583337, + "eval_loss": 0.2680352032184601, + "eval_pr_auc": 0.697653341121327, + "eval_precision": 0.71053506148222, + "eval_precision_macro": 0.8263642352251727, + "eval_pred_class_0": 16659, + "eval_pred_class_1": 3009, + "eval_predicted_binding_ratio": 0.15298962782184258, + "eval_recall": 0.689455014511448, + "eval_recall_macro": 0.8184403098150286, + "eval_runtime": 0.239, + "eval_samples_per_second": 682.004, + "eval_steps_per_second": 4.184, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10764 + }, + { + "epoch": 415.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9335094357647963, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.2680439054965973, + "eval_pr_auc": 0.6975754183405896, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.2547, + "eval_samples_per_second": 640.063, + "eval_steps_per_second": 3.927, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10790 + }, + { + "epoch": 416.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9335170855089767, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.26802244782447815, + "eval_pr_auc": 0.6976835126920541, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.1858, + "eval_samples_per_second": 877.122, + "eval_steps_per_second": 5.381, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10816 + }, + { + "epoch": 417.0, + "eval_accuracy": 0.9069046166361603, + "eval_auc": 0.9335191877287514, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004743988221822, + "eval_f1_macro": 0.8226809883524872, + "eval_loss": 0.26805901527404785, + "eval_pr_auc": 0.6976076287719296, + "eval_precision": 0.7108233731739708, + "eval_precision_macro": 0.8265932427829508, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8189240247456961, + "eval_runtime": 0.2586, + "eval_samples_per_second": 630.253, + "eval_steps_per_second": 3.867, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10842 + }, + { + "epoch": 418.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9335267206829443, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002617801047121, + "eval_f1_macro": 0.8225604590386311, + "eval_loss": 0.26806166768074036, + "eval_pr_auc": 0.6976721144908643, + "eval_precision": 0.7107273331119229, + "eval_precision_macro": 0.8265169354519211, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187627864354736, + "eval_runtime": 0.1992, + "eval_samples_per_second": 818.364, + "eval_steps_per_second": 5.021, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10868 + }, + { + "epoch": 419.0, + "eval_accuracy": 0.9069554606467358, + "eval_auc": 0.9335306331475249, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7006869479882237, + "eval_f1_macro": 0.8228014837466855, + "eval_loss": 0.2680812180042267, + "eval_pr_auc": 0.6976481882085733, + "eval_precision": 0.7109193494855626, + "eval_precision_macro": 0.8266695216356063, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190852630559187, + "eval_runtime": 0.2498, + "eval_samples_per_second": 652.627, + "eval_steps_per_second": 4.004, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10894 + }, + { + "epoch": 420.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9335429739562026, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26813384890556335, + "eval_pr_auc": 0.6976080200264206, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2232, + "eval_samples_per_second": 730.143, + "eval_steps_per_second": 4.479, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10920 + }, + { + "epoch": 421.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9335431686061818, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6999182338511856, + "eval_f1_macro": 0.8223410440199006, + "eval_loss": 0.2681698799133301, + "eval_pr_auc": 0.6975308484626278, + "eval_precision": 0.710019907100199, + "eval_precision_macro": 0.8261580260852261, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8186722449976755, + "eval_runtime": 0.1913, + "eval_samples_per_second": 852.153, + "eval_steps_per_second": 5.228, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10946 + }, + { + "epoch": 422.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9335561517597906, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.2681558430194855, + "eval_pr_auc": 0.6975926126749412, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.252, + "eval_samples_per_second": 646.818, + "eval_steps_per_second": 3.968, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10972 + }, + { + "epoch": 423.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9335701957557857, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6989687346537895, + "eval_f1_macro": 0.821811089570853, + "eval_loss": 0.2681851089000702, + "eval_pr_auc": 0.6976584248764129, + "eval_precision": 0.7097739361702128, + "eval_precision_macro": 0.825895371446451, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.8178962339258291, + "eval_runtime": 0.2518, + "eval_samples_per_second": 647.463, + "eval_steps_per_second": 3.972, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 10998 + }, + { + "epoch": 423.0769230769231, + "grad_norm": 19880.513671875, + "learning_rate": 7.045132214180816e-08, + "loss": 0.198, + "step": 11000 + }, + { + "epoch": 424.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.933567850223537, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002452984464431, + "eval_f1_macro": 0.8225346777593885, + "eval_loss": 0.2681238353252411, + "eval_pr_auc": 0.6977041251052366, + "eval_precision": 0.7103516921035169, + "eval_precision_macro": 0.8263539414042262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188636637871641, + "eval_runtime": 0.1802, + "eval_samples_per_second": 904.736, + "eval_steps_per_second": 5.551, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11024 + }, + { + "epoch": 425.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9335591688344673, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26810285449028015, + "eval_pr_auc": 0.6976771787125668, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2422, + "eval_samples_per_second": 673.038, + "eval_steps_per_second": 4.129, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11050 + }, + { + "epoch": 426.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9335456990559106, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700228832951945, + "eval_f1_macro": 0.8225089013937882, + "eval_loss": 0.2680869400501251, + "eval_pr_auc": 0.6976467269760291, + "eval_precision": 0.7099767981438515, + "eval_precision_macro": 0.8261913298268353, + "eval_pred_class_0": 16651, + "eval_pred_class_1": 3017, + "eval_predicted_binding_ratio": 0.15339637990644703, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189645411388546, + "eval_runtime": 0.1727, + "eval_samples_per_second": 943.897, + "eval_steps_per_second": 5.791, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11076 + }, + { + "epoch": 427.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9335531541501119, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004412485700278, + "eval_f1_macro": 0.8226293306702985, + "eval_loss": 0.26812946796417236, + "eval_pr_auc": 0.6976292182478663, + "eval_precision": 0.7100728959575878, + "eval_precision_macro": 0.8262676792100252, + "eval_pred_class_0": 16650, + "eval_pred_class_1": 3018, + "eval_predicted_binding_ratio": 0.15344722391702256, + "eval_recall": 0.691067397613673, + "eval_recall_macro": 0.8191257794490772, + "eval_runtime": 0.2563, + "eval_samples_per_second": 635.952, + "eval_steps_per_second": 3.902, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11102 + }, + { + "epoch": 428.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9335772907475254, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002452984464431, + "eval_f1_macro": 0.8225346777593885, + "eval_loss": 0.26813971996307373, + "eval_pr_auc": 0.6977601654966087, + "eval_precision": 0.7103516921035169, + "eval_precision_macro": 0.8263539414042262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188636637871641, + "eval_runtime": 0.2428, + "eval_samples_per_second": 671.434, + "eval_steps_per_second": 4.119, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11128 + }, + { + "epoch": 429.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.9335880938213678, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6989687346537895, + "eval_f1_macro": 0.821811089570853, + "eval_loss": 0.26816368103027344, + "eval_pr_auc": 0.6977910817778257, + "eval_precision": 0.7097739361702128, + "eval_precision_macro": 0.825895371446451, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.8178962339258291, + "eval_runtime": 0.1821, + "eval_samples_per_second": 895.286, + "eval_steps_per_second": 5.493, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11154 + }, + { + "epoch": 430.0, + "eval_accuracy": 0.906599552572707, + "eval_auc": 0.9335897288811925, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6993945344460808, + "eval_f1_macro": 0.8220524214743572, + "eval_loss": 0.2681582272052765, + "eval_pr_auc": 0.6978031299323104, + "eval_precision": 0.7099667774086379, + "eval_precision_macro": 0.8260483424802825, + "eval_pred_class_0": 16658, + "eval_pred_class_1": 3010, + "eval_predicted_binding_ratio": 0.15304047183241815, + "eval_recall": 0.6891325378910029, + "eval_recall_macro": 0.8182187105462742, + "eval_runtime": 0.259, + "eval_samples_per_second": 629.36, + "eval_steps_per_second": 3.861, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11180 + }, + { + "epoch": 431.0, + "eval_accuracy": 0.9065487085621314, + "eval_auc": 0.9336081038392237, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6990831696136215, + "eval_f1_macro": 0.8218841874311036, + "eval_loss": 0.2681788206100464, + "eval_pr_auc": 0.6978648412555615, + "eval_precision": 0.7100099767209843, + "eval_precision_macro": 0.8260151318092648, + "eval_pred_class_0": 16661, + "eval_pred_class_1": 3007, + "eval_predicted_binding_ratio": 0.15288793980069149, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.8179264144050951, + "eval_runtime": 0.1932, + "eval_samples_per_second": 843.696, + "eval_steps_per_second": 5.176, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11206 + }, + { + "epoch": 432.0, + "eval_accuracy": 0.9064978645515558, + "eval_auc": 0.933607091659332, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6989687346537895, + "eval_f1_macro": 0.821811089570853, + "eval_loss": 0.26817184686660767, + "eval_pr_auc": 0.6978628373729787, + "eval_precision": 0.7097739361702128, + "eval_precision_macro": 0.825895371446451, + "eval_pred_class_0": 16660, + "eval_pred_class_1": 3008, + "eval_predicted_binding_ratio": 0.15293878381126702, + "eval_recall": 0.6884875846501128, + "eval_recall_macro": 0.8178962339258291, + "eval_runtime": 0.2181, + "eval_samples_per_second": 747.26, + "eval_steps_per_second": 4.584, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11232 + }, + { + "epoch": 433.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336009699174881, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002452984464431, + "eval_f1_macro": 0.8225346777593885, + "eval_loss": 0.26814383268356323, + "eval_pr_auc": 0.6978726222778764, + "eval_precision": 0.7103516921035169, + "eval_precision_macro": 0.8263539414042262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188636637871641, + "eval_runtime": 0.2581, + "eval_samples_per_second": 631.607, + "eval_steps_per_second": 3.875, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11258 + }, + { + "epoch": 434.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9335997338901205, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7002452984464431, + "eval_f1_macro": 0.8225346777593885, + "eval_loss": 0.26813551783561707, + "eval_pr_auc": 0.6978611289947059, + "eval_precision": 0.7103516921035169, + "eval_precision_macro": 0.8263539414042262, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188636637871641, + "eval_runtime": 0.1828, + "eval_samples_per_second": 891.596, + "eval_steps_per_second": 5.47, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11284 + }, + { + "epoch": 435.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9335949455006336, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7001308044473512, + "eval_f1_macro": 0.8224615491231337, + "eval_loss": 0.2681216299533844, + "eval_pr_auc": 0.6978496657032133, + "eval_precision": 0.7101160862354893, + "eval_precision_macro": 0.8262344077367322, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188334833078981, + "eval_runtime": 0.2595, + "eval_samples_per_second": 628.033, + "eval_steps_per_second": 3.853, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11310 + }, + { + "epoch": 436.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933600045330087, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7001308044473512, + "eval_f1_macro": 0.8224615491231337, + "eval_loss": 0.2681162655353546, + "eval_pr_auc": 0.6978775373770459, + "eval_precision": 0.7101160862354893, + "eval_precision_macro": 0.8262344077367322, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188334833078981, + "eval_runtime": 0.2578, + "eval_samples_per_second": 632.163, + "eval_steps_per_second": 3.878, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11336 + }, + { + "epoch": 437.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9336053398095198, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.26813197135925293, + "eval_pr_auc": 0.6979041519553001, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.1888, + "eval_samples_per_second": 863.236, + "eval_steps_per_second": 5.296, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11362 + }, + { + "epoch": 438.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.933606196269428, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26813068985939026, + "eval_pr_auc": 0.6979019651404079, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2467, + "eval_samples_per_second": 660.854, + "eval_steps_per_second": 4.054, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11388 + }, + { + "epoch": 439.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336083958141924, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26813721656799316, + "eval_pr_auc": 0.6979009956604231, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.1795, + "eval_samples_per_second": 908.065, + "eval_steps_per_second": 5.571, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11414 + }, + { + "epoch": 440.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336056512494865, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681269645690918, + "eval_pr_auc": 0.697889844022227, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2577, + "eval_samples_per_second": 632.569, + "eval_steps_per_second": 3.881, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11440 + }, + { + "epoch": 441.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336110917164033, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681290805339813, + "eval_pr_auc": 0.6979179015521837, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2643, + "eval_samples_per_second": 616.824, + "eval_steps_per_second": 3.784, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11466 + }, + { + "epoch": 442.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336152864234539, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681342363357544, + "eval_pr_auc": 0.697937589363129, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.1746, + "eval_samples_per_second": 933.511, + "eval_steps_per_second": 5.727, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11492 + }, + { + "epoch": 442.3076923076923, + "grad_norm": 19259.90625, + "learning_rate": 4.0062918659231006e-08, + "loss": 0.1984, + "step": 11500 + }, + { + "epoch": 443.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336222548927073, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6999182338511856, + "eval_f1_macro": 0.8223410440199006, + "eval_loss": 0.26813840866088867, + "eval_pr_auc": 0.6979755820701472, + "eval_precision": 0.710019907100199, + "eval_precision_macro": 0.8261580260852261, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8186722449976755, + "eval_runtime": 0.189, + "eval_samples_per_second": 862.533, + "eval_steps_per_second": 5.292, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11518 + }, + { + "epoch": 444.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336241624625029, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6999182338511856, + "eval_f1_macro": 0.8223410440199006, + "eval_loss": 0.2681417763233185, + "eval_pr_auc": 0.6980183846984902, + "eval_precision": 0.710019907100199, + "eval_precision_macro": 0.8261580260852261, + "eval_pred_class_0": 16654, + "eval_pred_class_1": 3014, + "eval_predicted_binding_ratio": 0.15324384787472037, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8186722449976755, + "eval_runtime": 0.1783, + "eval_samples_per_second": 913.982, + "eval_steps_per_second": 5.607, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11544 + }, + { + "epoch": 445.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.933623228142603, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26812103390693665, + "eval_pr_auc": 0.6980203083536239, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.2463, + "eval_samples_per_second": 661.813, + "eval_steps_per_second": 4.06, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11570 + }, + { + "epoch": 446.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.933622994562628, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.26812058687210083, + "eval_pr_auc": 0.6980222288630917, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.25, + "eval_samples_per_second": 651.994, + "eval_steps_per_second": 4.0, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11596 + }, + { + "epoch": 447.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336267026447306, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681138813495636, + "eval_pr_auc": 0.6980542679927909, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.1865, + "eval_samples_per_second": 873.899, + "eval_steps_per_second": 5.361, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11622 + }, + { + "epoch": 448.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336341674714307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26811888813972473, + "eval_pr_auc": 0.6980920829430033, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2363, + "eval_samples_per_second": 689.772, + "eval_steps_per_second": 4.232, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11648 + }, + { + "epoch": 449.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336351017913307, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7001472272206772, + "eval_f1_macro": 0.8224873029219602, + "eval_loss": 0.26812025904655457, + "eval_pr_auc": 0.6980982354073687, + "eval_precision": 0.7104913678618858, + "eval_precision_macro": 0.8263972209146124, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187326059562077, + "eval_runtime": 0.2566, + "eval_samples_per_second": 635.262, + "eval_steps_per_second": 3.897, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11674 + }, + { + "epoch": 450.0, + "eval_accuracy": 0.9068029286150091, + "eval_auc": 0.9336350044663411, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003433055419324, + "eval_f1_macro": 0.8225820203316995, + "eval_loss": 0.2681255340576172, + "eval_pr_auc": 0.69809285211945, + "eval_precision": 0.7102122015915119, + "eval_precision_macro": 0.8263107608966447, + "eval_pred_class_0": 16652, + "eval_pred_class_1": 3016, + "eval_predicted_binding_ratio": 0.15334553589587147, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8189947216181206, + "eval_runtime": 0.1865, + "eval_samples_per_second": 874.141, + "eval_steps_per_second": 5.363, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11700 + }, + { + "epoch": 451.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9336387320134417, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.2681383192539215, + "eval_pr_auc": 0.6981051991963814, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.2522, + "eval_samples_per_second": 646.252, + "eval_steps_per_second": 3.965, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11726 + }, + { + "epoch": 452.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336409802207007, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7003598298985934, + "eval_f1_macro": 0.8226078241660808, + "eval_loss": 0.26812514662742615, + "eval_pr_auc": 0.6981289571890097, + "eval_precision": 0.7105874543644208, + "eval_precision_macro": 0.8264735530603251, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690422444372783, + "eval_recall_macro": 0.8188938442664302, + "eval_runtime": 0.1751, + "eval_samples_per_second": 930.856, + "eval_steps_per_second": 5.711, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11752 + }, + { + "epoch": 453.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336424400955443, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26812946796417236, + "eval_pr_auc": 0.6981354214954091, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2533, + "eval_samples_per_second": 643.502, + "eval_steps_per_second": 3.948, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11778 + }, + { + "epoch": 454.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336481628049311, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2681373655796051, + "eval_pr_auc": 0.6981529233150263, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2333, + "eval_samples_per_second": 698.566, + "eval_steps_per_second": 4.286, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11804 + }, + { + "epoch": 455.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336448537552857, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26815658807754517, + "eval_pr_auc": 0.6981319681162118, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.1829, + "eval_samples_per_second": 891.289, + "eval_steps_per_second": 5.468, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11830 + }, + { + "epoch": 456.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933646138445148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.268153578042984, + "eval_pr_auc": 0.6981406301220767, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2528, + "eval_samples_per_second": 644.693, + "eval_steps_per_second": 3.955, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11856 + }, + { + "epoch": 457.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.933646138445148, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.268160343170166, + "eval_pr_auc": 0.6981424953255787, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.2524, + "eval_samples_per_second": 645.855, + "eval_steps_per_second": 3.962, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11882 + }, + { + "epoch": 458.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9336505959296704, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.26815977692604065, + "eval_pr_auc": 0.6981608628032375, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.1785, + "eval_samples_per_second": 913.407, + "eval_steps_per_second": 5.604, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11908 + }, + { + "epoch": 459.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336521336645056, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26815950870513916, + "eval_pr_auc": 0.6981611753342029, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.1894, + "eval_samples_per_second": 860.719, + "eval_steps_per_second": 5.28, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11934 + }, + { + "epoch": 460.0, + "eval_accuracy": 0.9066503965832825, + "eval_auc": 0.9336499146547433, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6997055937193327, + "eval_f1_macro": 0.8222205050048714, + "eval_loss": 0.2681548595428467, + "eval_pr_auc": 0.6981527417806164, + "eval_precision": 0.7099236641221374, + "eval_precision_macro": 0.8260816159097628, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.818511006687453, + "eval_runtime": 0.2094, + "eval_samples_per_second": 778.376, + "eval_steps_per_second": 4.775, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11960 + }, + { + "epoch": 461.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336533599593742, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2681526839733124, + "eval_pr_auc": 0.6981686335311912, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2574, + "eval_samples_per_second": 633.254, + "eval_steps_per_second": 3.885, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 11986 + }, + { + "epoch": 461.53846153846155, + "grad_norm": 19181.365234375, + "learning_rate": 1.7952297882945e-08, + "loss": 0.1977, + "step": 12000 + }, + { + "epoch": 462.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336556178991323, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26815271377563477, + "eval_pr_auc": 0.6981807528411922, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.2521, + "eval_samples_per_second": 646.614, + "eval_steps_per_second": 3.967, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12012 + }, + { + "epoch": 463.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336526397544513, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.2681511640548706, + "eval_pr_auc": 0.6981676703517014, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2048, + "eval_samples_per_second": 795.899, + "eval_steps_per_second": 4.883, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12038 + }, + { + "epoch": 464.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336516859695536, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26814842224121094, + "eval_pr_auc": 0.698161790632896, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2527, + "eval_samples_per_second": 645.01, + "eval_steps_per_second": 3.957, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12064 + }, + { + "epoch": 465.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336534183543679, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.26814672350883484, + "eval_pr_auc": 0.6981725600302674, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.1868, + "eval_samples_per_second": 872.776, + "eval_steps_per_second": 5.354, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12090 + }, + { + "epoch": 466.0, + "eval_accuracy": 0.906701240593858, + "eval_auc": 0.9336528344044305, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.6998200556191723, + "eval_f1_macro": 0.822293617491433, + "eval_loss": 0.2681439220905304, + "eval_pr_auc": 0.6981719802867735, + "eval_precision": 0.7101593625498008, + "eval_precision_macro": 0.8262011990462741, + "eval_pred_class_0": 16656, + "eval_pred_class_1": 3012, + "eval_predicted_binding_ratio": 0.15314215985356924, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185411871667191, + "eval_runtime": 0.2538, + "eval_samples_per_second": 642.256, + "eval_steps_per_second": 3.94, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12116 + }, + { + "epoch": 467.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336571167039716, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26814955472946167, + "eval_pr_auc": 0.6981930050756842, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.251, + "eval_samples_per_second": 649.332, + "eval_steps_per_second": 3.984, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12142 + }, + { + "epoch": 468.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336574573414352, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.26814743876457214, + "eval_pr_auc": 0.6981957415820915, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.1759, + "eval_samples_per_second": 926.564, + "eval_steps_per_second": 5.684, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12168 + }, + { + "epoch": 469.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336602894986317, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.699934554973822, + "eval_f1_macro": 0.8223667477493719, + "eval_loss": 0.2681457996368408, + "eval_pr_auc": 0.6982076318844164, + "eval_precision": 0.7103952175357025, + "eval_precision_macro": 0.8263208602537131, + "eval_pred_class_0": 16657, + "eval_pred_class_1": 3011, + "eval_predicted_binding_ratio": 0.1530913158429937, + "eval_recall": 0.6897774911318929, + "eval_recall_macro": 0.8185713676459851, + "eval_runtime": 0.1997, + "eval_samples_per_second": 816.139, + "eval_steps_per_second": 5.007, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12194 + }, + { + "epoch": 470.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336571945639633, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681419849395752, + "eval_pr_auc": 0.6981934072595471, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2541, + "eval_samples_per_second": 641.578, + "eval_steps_per_second": 3.936, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12220 + }, + { + "epoch": 471.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336578855713892, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681434154510498, + "eval_pr_auc": 0.6981968133129176, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2585, + "eval_samples_per_second": 630.471, + "eval_steps_per_second": 3.868, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12246 + }, + { + "epoch": 472.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336576422589153, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26814115047454834, + "eval_pr_auc": 0.6981937092453367, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2611, + "eval_samples_per_second": 624.347, + "eval_steps_per_second": 3.83, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12272 + }, + { + "epoch": 473.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933659540096212, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26814183592796326, + "eval_pr_auc": 0.698207050020266, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.1807, + "eval_samples_per_second": 902.095, + "eval_steps_per_second": 5.534, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12298 + }, + { + "epoch": 474.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.933661126493542, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813554763793945, + "eval_pr_auc": 0.6982130837277154, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2369, + "eval_samples_per_second": 688.198, + "eval_steps_per_second": 4.222, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12324 + }, + { + "epoch": 475.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336606204035962, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813259720802307, + "eval_pr_auc": 0.6982118437878516, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.1655, + "eval_samples_per_second": 984.988, + "eval_steps_per_second": 6.043, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12350 + }, + { + "epoch": 476.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.933660883181068, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813384890556335, + "eval_pr_auc": 0.6982130674055568, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2472, + "eval_samples_per_second": 659.287, + "eval_steps_per_second": 4.045, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12376 + }, + { + "epoch": 477.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336603284286276, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.2681376338005066, + "eval_pr_auc": 0.6982114384730127, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.234, + "eval_samples_per_second": 696.525, + "eval_steps_per_second": 4.273, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12402 + }, + { + "epoch": 478.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336606982635879, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26814013719558716, + "eval_pr_auc": 0.6982139708732891, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2492, + "eval_samples_per_second": 654.084, + "eval_steps_per_second": 4.013, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12428 + }, + { + "epoch": 479.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336606593335921, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813676953315735, + "eval_pr_auc": 0.6982155837128797, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2089, + "eval_samples_per_second": 780.114, + "eval_steps_per_second": 4.786, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12454 + }, + { + "epoch": 480.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336606009385984, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813599467277527, + "eval_pr_auc": 0.6982237203295948, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.239, + "eval_samples_per_second": 682.114, + "eval_steps_per_second": 4.185, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12480 + }, + { + "epoch": 480.7692307692308, + "grad_norm": 19666.140625, + "learning_rate": 4.5173988392051e-09, + "loss": 0.1976, + "step": 12500 + }, + { + "epoch": 481.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336608247860743, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.26813769340515137, + "eval_pr_auc": 0.6982221169303999, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2452, + "eval_samples_per_second": 664.649, + "eval_steps_per_second": 4.078, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12506 + }, + { + "epoch": 482.0, + "eval_accuracy": 0.9068537726255848, + "eval_auc": 0.9336611459585399, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.7004578155657293, + "eval_f1_macro": 0.822655157030306, + "eval_loss": 0.2681364417076111, + "eval_pr_auc": 0.6982243970162039, + "eval_precision": 0.7104477611940299, + "eval_precision_macro": 0.8264302698361911, + "eval_pred_class_0": 16653, + "eval_pred_class_1": 3015, + "eval_predicted_binding_ratio": 0.1532946918852959, + "eval_recall": 0.690744920993228, + "eval_recall_macro": 0.8190249020973867, + "eval_runtime": 0.2509, + "eval_samples_per_second": 649.641, + "eval_steps_per_second": 3.986, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12532 + }, + { + "epoch": 483.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336621192084356, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681383192539215, + "eval_pr_auc": 0.6982286439182355, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.229, + "eval_samples_per_second": 711.694, + "eval_steps_per_second": 4.366, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12558 + }, + { + "epoch": 484.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336621970684273, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681386172771454, + "eval_pr_auc": 0.6982328773712362, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2516, + "eval_samples_per_second": 647.789, + "eval_steps_per_second": 3.974, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12584 + }, + { + "epoch": 485.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336618856284606, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.2681376338005066, + "eval_pr_auc": 0.6982308470568848, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.4219, + "eval_samples_per_second": 386.39, + "eval_steps_per_second": 2.37, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12610 + }, + { + "epoch": 486.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336622749284189, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813745498657227, + "eval_pr_auc": 0.6982319234202713, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2599, + "eval_samples_per_second": 627.256, + "eval_steps_per_second": 3.848, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12636 + }, + { + "epoch": 487.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336626836933752, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813769340515137, + "eval_pr_auc": 0.6982353364927889, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2613, + "eval_samples_per_second": 623.782, + "eval_steps_per_second": 3.827, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12662 + }, + { + "epoch": 488.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336625766358866, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813805103302, + "eval_pr_auc": 0.6982371615828771, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2497, + "eval_samples_per_second": 652.662, + "eval_steps_per_second": 4.004, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12688 + }, + { + "epoch": 489.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336626642283772, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982364881625377, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2636, + "eval_samples_per_second": 618.289, + "eval_steps_per_second": 3.793, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12714 + }, + { + "epoch": 490.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336626642283772, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982365330396263, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2579, + "eval_samples_per_second": 632.058, + "eval_steps_per_second": 3.878, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12740 + }, + { + "epoch": 491.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933662722623371, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982369774278672, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2559, + "eval_samples_per_second": 636.959, + "eval_steps_per_second": 3.908, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12766 + }, + { + "epoch": 492.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336627420883689, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982367884435748, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.1891, + "eval_samples_per_second": 862.094, + "eval_steps_per_second": 5.289, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12792 + }, + { + "epoch": 493.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336628199483605, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982375574473259, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.1955, + "eval_samples_per_second": 833.803, + "eval_steps_per_second": 5.115, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12818 + }, + { + "epoch": 494.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336627810183648, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982374868010095, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.254, + "eval_samples_per_second": 641.752, + "eval_steps_per_second": 3.937, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12844 + }, + { + "epoch": 495.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336628296808595, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982373741196756, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2245, + "eval_samples_per_second": 726.009, + "eval_steps_per_second": 4.454, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12870 + }, + { + "epoch": 496.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336627712858657, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982369260304812, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2507, + "eval_samples_per_second": 650.217, + "eval_steps_per_second": 3.989, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12896 + }, + { + "epoch": 497.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336627615533667, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.698237407972925, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2463, + "eval_samples_per_second": 661.887, + "eval_steps_per_second": 4.061, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12922 + }, + { + "epoch": 498.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.933662722623371, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982366928868454, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2494, + "eval_samples_per_second": 653.517, + "eval_steps_per_second": 4.009, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12948 + }, + { + "epoch": 499.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336627712858656, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982368906715606, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.1725, + "eval_samples_per_second": 945.076, + "eval_steps_per_second": 5.798, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 12974 + }, + { + "epoch": 500.0, + "grad_norm": 38080.09765625, + "learning_rate": 1.8024699288687884e-14, + "loss": 0.1978, + "step": 13000 + }, + { + "epoch": 500.0, + "eval_accuracy": 0.9067520846044336, + "eval_auc": 0.9336628296808596, + "eval_binding_site_ratio": 0.1576672767947936, + "eval_f1": 0.700032711808963, + "eval_f1_macro": 0.822414164585476, + "eval_loss": 0.26813796162605286, + "eval_pr_auc": 0.6982374485795344, + "eval_precision": 0.7102555592432791, + "eval_precision_macro": 0.826277584485044, + "eval_pred_class_0": 16655, + "eval_pred_class_1": 3013, + "eval_predicted_binding_ratio": 0.1531930038641448, + "eval_recall": 0.690099967752338, + "eval_recall_macro": 0.8187024254769416, + "eval_runtime": 0.2652, + "eval_samples_per_second": 614.65, + "eval_steps_per_second": 3.771, + "eval_total_tokens": 19668, + "eval_true_class_0": 16567, + "eval_true_class_1": 3101, + "step": 13000 + } + ], + "logging_steps": 500, + "max_steps": 13000, + "num_input_tokens_seen": 0, + "num_train_epochs": 500, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.0635606344403768e+16, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/training_args.bin b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..43cdbe5adfb2a2b6cd48f66b6b5e6b0cc84c9bc3 --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fce922531bcc60b40ec3cfe0214120623a297c18ab37c3a2e94007f715374c7 +size 5368 diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/logs/events.out.tfevents.1772413682.amax.578393.0 b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/logs/events.out.tfevents.1772413682.amax.578393.0 new file mode 100644 index 0000000000000000000000000000000000000000..a778a181d44016e664db550883a9510ae556b80b --- /dev/null +++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/logs/events.out.tfevents.1772413682.amax.578393.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aab41102b2a53546cd1c6d1346cd03639b21aeb356f6085991a15eecf0dfac4 +size 583846 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/config.json b/pretrain_glome_nano_model_tiny/checkpoint-205000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4e7a54fdf8bdda8d2a7ac6356523b75cecb2eb5 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/config.json @@ -0,0 +1,44 @@ +{ + "architectures": [ + "GloMeModelForMaskedLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "bos_token_id": 28, + "classifier_activation": "gelu", + "classifier_bias": false, + "classifier_dropout": 0.1, + "classifier_pooling": "cls", + "cls_token_id": 28, + "compress_block_size": 16, + "compress_block_sliding_stride": 16, + "decoder_bias": true, + "dice_weight": 0.0, + "embedding_dropout": 0.1, + "eos_token_id": 29, + "hidden_activation": "gelu", + "hidden_size": 320, + "inner_rank": 32, + "intermediate_size": 1280, + "kv_heads": 10, + "mask_token_id": 31, + "mlp_bias": false, + "mlp_dropout": 0.1, + "model_size": "tiny", + "model_type": "glome", + "norm_bias": false, + "norm_eps": 1e-05, + "num_attention_heads": 20, + "num_hidden_layers": 6, + "num_selected_blocks": 8, + "num_slots": 64, + "pad_token_id": 30, + "reference_compile": null, + "selection_block_size": 16, + "sep_token_id": 29, + "sliding_window_size": 0, + "torch_dtype": "float32", + "transformers_version": "4.52.3", + "unk_token_id": 27, + "vocab_size": 36 +} diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/merges.txt b/pretrain_glome_nano_model_tiny/checkpoint-205000/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..5e7f1fd94996c8e2b65adea828af1b398eace61f --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/merges.txt @@ -0,0 +1 @@ +#version: 0.2 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/model.safetensors b/pretrain_glome_nano_model_tiny/checkpoint-205000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5f413b39994d9f383e707cb449461b09827efab --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e526e0b7181592a81fb1b7ba28a8fffc5ff6de77e1f50b6b1e7e912c0d0cc7a +size 61429032 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/optimizer.pt b/pretrain_glome_nano_model_tiny/checkpoint-205000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..46bf7d75a50b4bcfd7c9c41e90be697354b2dc7e --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64144aae45faeb97e156e7fe6411b0a74ba7a067c6bd585b9ac7aba62ad82e3a +size 122968954 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/rng_state.pth b/pretrain_glome_nano_model_tiny/checkpoint-205000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c573b36e0144e3072bdb18dc2bb9ffeb5a9dd7dd --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e927d415513ff83ac7aadcb06dbe2ca6fbfb935313cba0db936929660fa0c453 +size 14244 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/scaler.pt b/pretrain_glome_nano_model_tiny/checkpoint-205000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363 +size 988 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/scheduler.pt b/pretrain_glome_nano_model_tiny/checkpoint-205000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..49bac9878e7ca3459e2bbb6a4ff0e0d90965f8f9 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac77fb48a5120ab5d787e0d509c5ec923d039324ed23c2788fb412f6b926c15c +size 1064 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/special_tokens_map.json b/pretrain_glome_nano_model_tiny/checkpoint-205000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..14f7c9ed7b0bde6d23ee7b6a24ac2996789d1a0b --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/special_tokens_map.json @@ -0,0 +1,51 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/tokenizer.json b/pretrain_glome_nano_model_tiny/checkpoint-205000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..0ece9b8e6fa70a006c5c10c47e30c9cff4ff95f0 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/tokenizer.json @@ -0,0 +1,123 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 27, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 28, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 29, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 30, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 31, + "content": "", + "single_word": false, + "lstrip": true, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": true + }, + "post_processor": { + "type": "RobertaProcessing", + "sep": [ + "", + 29 + ], + "cls": [ + "", + 28 + ], + "trim_offsets": true, + "add_prefix_space": false + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": "", + "end_of_word_suffix": "", + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "A": 0, + "R": 1, + "N": 2, + "D": 3, + "C": 4, + "Q": 5, + "E": 6, + "G": 7, + "H": 8, + "I": 9, + "L": 10, + "K": 11, + "M": 12, + "F": 13, + "P": 14, + "S": 15, + "T": 16, + "W": 17, + "Y": 18, + "V": 19, + "X": 20, + "B": 21, + "U": 22, + "Z": 23, + "O": 24, + ".": 25, + "-": 26, + "": 27, + "": 28, + "": 29, + "": 30, + "": 31 + }, + "merges": [] + } +} \ No newline at end of file diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/tokenizer_config.json b/pretrain_glome_nano_model_tiny/checkpoint-205000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1c77f0533c6d3bd60b0a23b8adfacc351923d671 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/tokenizer_config.json @@ -0,0 +1,58 @@ +{ + "add_prefix_space": false, + "added_tokens_decoder": { + "27": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "28": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "29": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "30": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "31": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "errors": "replace", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "RobertaTokenizer", + "trim_offsets": true, + "unk_token": "" +} diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/trainer_state.json b/pretrain_glome_nano_model_tiny/checkpoint-205000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cb345b0ed08918599e022176ae23a35ff7788a5d --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/trainer_state.json @@ -0,0 +1,29071 @@ +{ + "best_global_step": null, + "best_metric": 0.386392205953598, + "best_model_checkpoint": null, + "epoch": 1.057676928712575, + "eval_steps": 5000, + "global_step": 205000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002579699826128232, + "grad_norm": 314643.03125, + "learning_rate": 4.9e-07, + "loss": 3.7018, + "step": 50 + }, + { + "epoch": 0.0005159399652256464, + "grad_norm": 286448.65625, + "learning_rate": 9.9e-07, + "loss": 3.6146, + "step": 100 + }, + { + "epoch": 0.0007739099478384695, + "grad_norm": 214582.28125, + "learning_rate": 1.4900000000000001e-06, + "loss": 3.4562, + "step": 150 + }, + { + "epoch": 0.0010318799304512927, + "grad_norm": 137193.9375, + "learning_rate": 1.99e-06, + "loss": 3.2558, + "step": 200 + }, + { + "epoch": 0.0012898499130641159, + "grad_norm": 82222.84375, + "learning_rate": 2.49e-06, + "loss": 3.0641, + "step": 250 + }, + { + "epoch": 0.001547819895676939, + "grad_norm": 56772.109375, + "learning_rate": 2.99e-06, + "loss": 2.9123, + "step": 300 + }, + { + "epoch": 0.0018057898782897622, + "grad_norm": 46737.6796875, + "learning_rate": 3.49e-06, + "loss": 2.8131, + "step": 350 + }, + { + "epoch": 0.0020637598609025854, + "grad_norm": 38769.04296875, + "learning_rate": 3.99e-06, + "loss": 2.7526, + "step": 400 + }, + { + "epoch": 0.0023217298435154084, + "grad_norm": 34701.5546875, + "learning_rate": 4.49e-06, + "loss": 2.7178, + "step": 450 + }, + { + "epoch": 0.0025796998261282318, + "grad_norm": 30332.826171875, + "learning_rate": 4.9900000000000005e-06, + "loss": 2.6967, + "step": 500 + }, + { + "epoch": 0.0028376698087410547, + "grad_norm": 27192.7421875, + "learning_rate": 5.49e-06, + "loss": 2.6798, + "step": 550 + }, + { + "epoch": 0.003095639791353878, + "grad_norm": 36959.33984375, + "learning_rate": 5.99e-06, + "loss": 2.668, + "step": 600 + }, + { + "epoch": 0.003353609773966701, + "grad_norm": 30939.365234375, + "learning_rate": 6.4900000000000005e-06, + "loss": 2.6572, + "step": 650 + }, + { + "epoch": 0.0036115797565795245, + "grad_norm": 26976.78125, + "learning_rate": 6.990000000000001e-06, + "loss": 2.6397, + "step": 700 + }, + { + "epoch": 0.0038695497391923474, + "grad_norm": 32308.447265625, + "learning_rate": 7.4899999999999994e-06, + "loss": 2.6021, + "step": 750 + }, + { + "epoch": 0.004127519721805171, + "grad_norm": 33838.8046875, + "learning_rate": 7.99e-06, + "loss": 2.5058, + "step": 800 + }, + { + "epoch": 0.004385489704417994, + "grad_norm": 49298.84765625, + "learning_rate": 8.49e-06, + "loss": 2.4095, + "step": 850 + }, + { + "epoch": 0.004643459687030817, + "grad_norm": 36241.56640625, + "learning_rate": 8.99e-06, + "loss": 2.2961, + "step": 900 + }, + { + "epoch": 0.004901429669643641, + "grad_norm": 37884.82421875, + "learning_rate": 9.49e-06, + "loss": 2.1526, + "step": 950 + }, + { + "epoch": 0.0051593996522564635, + "grad_norm": 36827.66796875, + "learning_rate": 9.990000000000001e-06, + "loss": 2.0338, + "step": 1000 + }, + { + "epoch": 0.0054173696348692865, + "grad_norm": 41398.421875, + "learning_rate": 1.049e-05, + "loss": 1.939, + "step": 1050 + }, + { + "epoch": 0.0056753396174821094, + "grad_norm": 42648.38671875, + "learning_rate": 1.099e-05, + "loss": 1.8325, + "step": 1100 + }, + { + "epoch": 0.005933309600094933, + "grad_norm": 40172.9453125, + "learning_rate": 1.149e-05, + "loss": 1.7643, + "step": 1150 + }, + { + "epoch": 0.006191279582707756, + "grad_norm": 35860.8515625, + "learning_rate": 1.199e-05, + "loss": 1.6742, + "step": 1200 + }, + { + "epoch": 0.006449249565320579, + "grad_norm": 44456.93359375, + "learning_rate": 1.249e-05, + "loss": 1.6026, + "step": 1250 + }, + { + "epoch": 0.006707219547933402, + "grad_norm": 36839.08984375, + "learning_rate": 1.299e-05, + "loss": 1.521, + "step": 1300 + }, + { + "epoch": 0.006965189530546226, + "grad_norm": 44026.68359375, + "learning_rate": 1.349e-05, + "loss": 1.4436, + "step": 1350 + }, + { + "epoch": 0.007223159513159049, + "grad_norm": 35557.578125, + "learning_rate": 1.399e-05, + "loss": 1.3773, + "step": 1400 + }, + { + "epoch": 0.007481129495771872, + "grad_norm": 38767.60546875, + "learning_rate": 1.449e-05, + "loss": 1.3023, + "step": 1450 + }, + { + "epoch": 0.007739099478384695, + "grad_norm": 36654.796875, + "learning_rate": 1.499e-05, + "loss": 1.2627, + "step": 1500 + }, + { + "epoch": 0.007997069460997519, + "grad_norm": 41690.328125, + "learning_rate": 1.5490000000000002e-05, + "loss": 1.2063, + "step": 1550 + }, + { + "epoch": 0.008255039443610342, + "grad_norm": 38743.59375, + "learning_rate": 1.599e-05, + "loss": 1.1626, + "step": 1600 + }, + { + "epoch": 0.008513009426223165, + "grad_norm": 41839.7890625, + "learning_rate": 1.649e-05, + "loss": 1.1225, + "step": 1650 + }, + { + "epoch": 0.008770979408835988, + "grad_norm": 42897.0703125, + "learning_rate": 1.699e-05, + "loss": 1.0864, + "step": 1700 + }, + { + "epoch": 0.00902894939144881, + "grad_norm": 37412.30859375, + "learning_rate": 1.749e-05, + "loss": 1.0613, + "step": 1750 + }, + { + "epoch": 0.009286919374061633, + "grad_norm": 37235.484375, + "learning_rate": 1.7990000000000002e-05, + "loss": 1.0354, + "step": 1800 + }, + { + "epoch": 0.009544889356674458, + "grad_norm": 39117.6328125, + "learning_rate": 1.849e-05, + "loss": 1.0059, + "step": 1850 + }, + { + "epoch": 0.009802859339287281, + "grad_norm": 37297.6875, + "learning_rate": 1.8990000000000003e-05, + "loss": 0.9795, + "step": 1900 + }, + { + "epoch": 0.010060829321900104, + "grad_norm": 33772.24609375, + "learning_rate": 1.949e-05, + "loss": 0.9639, + "step": 1950 + }, + { + "epoch": 0.010318799304512927, + "grad_norm": 39775.046875, + "learning_rate": 1.999e-05, + "loss": 0.9386, + "step": 2000 + }, + { + "epoch": 0.01057676928712575, + "grad_norm": 38412.2109375, + "learning_rate": 2.0490000000000002e-05, + "loss": 0.9212, + "step": 2050 + }, + { + "epoch": 0.010834739269738573, + "grad_norm": 39548.98046875, + "learning_rate": 2.099e-05, + "loss": 0.9112, + "step": 2100 + }, + { + "epoch": 0.011092709252351396, + "grad_norm": 38127.77734375, + "learning_rate": 2.1490000000000003e-05, + "loss": 0.8866, + "step": 2150 + }, + { + "epoch": 0.011350679234964219, + "grad_norm": 39877.0390625, + "learning_rate": 2.199e-05, + "loss": 0.8806, + "step": 2200 + }, + { + "epoch": 0.011608649217577044, + "grad_norm": 34642.28515625, + "learning_rate": 2.249e-05, + "loss": 0.8645, + "step": 2250 + }, + { + "epoch": 0.011866619200189867, + "grad_norm": 38508.0078125, + "learning_rate": 2.2990000000000002e-05, + "loss": 0.8609, + "step": 2300 + }, + { + "epoch": 0.01212458918280269, + "grad_norm": 33287.765625, + "learning_rate": 2.349e-05, + "loss": 0.8443, + "step": 2350 + }, + { + "epoch": 0.012382559165415512, + "grad_norm": 35477.5546875, + "learning_rate": 2.3990000000000002e-05, + "loss": 0.839, + "step": 2400 + }, + { + "epoch": 0.012640529148028335, + "grad_norm": 32204.408203125, + "learning_rate": 2.449e-05, + "loss": 0.8204, + "step": 2450 + }, + { + "epoch": 0.012898499130641158, + "grad_norm": 35113.59765625, + "learning_rate": 2.4990000000000003e-05, + "loss": 0.8214, + "step": 2500 + }, + { + "epoch": 0.013156469113253981, + "grad_norm": 36591.2421875, + "learning_rate": 2.549e-05, + "loss": 0.8066, + "step": 2550 + }, + { + "epoch": 0.013414439095866804, + "grad_norm": 37926.3125, + "learning_rate": 2.5990000000000004e-05, + "loss": 0.7993, + "step": 2600 + }, + { + "epoch": 0.013672409078479627, + "grad_norm": 35413.01171875, + "learning_rate": 2.6490000000000002e-05, + "loss": 0.8012, + "step": 2650 + }, + { + "epoch": 0.013930379061092452, + "grad_norm": 33275.1796875, + "learning_rate": 2.6989999999999997e-05, + "loss": 0.7879, + "step": 2700 + }, + { + "epoch": 0.014188349043705275, + "grad_norm": 35463.87109375, + "learning_rate": 2.749e-05, + "loss": 0.7808, + "step": 2750 + }, + { + "epoch": 0.014446319026318098, + "grad_norm": 33143.234375, + "learning_rate": 2.7989999999999998e-05, + "loss": 0.7813, + "step": 2800 + }, + { + "epoch": 0.01470428900893092, + "grad_norm": 32908.71484375, + "learning_rate": 2.849e-05, + "loss": 0.7725, + "step": 2850 + }, + { + "epoch": 0.014962258991543744, + "grad_norm": 36443.578125, + "learning_rate": 2.8990000000000002e-05, + "loss": 0.761, + "step": 2900 + }, + { + "epoch": 0.015220228974156567, + "grad_norm": 32331.728515625, + "learning_rate": 2.949e-05, + "loss": 0.7588, + "step": 2950 + }, + { + "epoch": 0.01547819895676939, + "grad_norm": 33401.546875, + "learning_rate": 2.9990000000000003e-05, + "loss": 0.7462, + "step": 3000 + }, + { + "epoch": 0.015736168939382213, + "grad_norm": 32041.26171875, + "learning_rate": 3.049e-05, + "loss": 0.7449, + "step": 3050 + }, + { + "epoch": 0.015994138921995037, + "grad_norm": 32035.814453125, + "learning_rate": 3.099e-05, + "loss": 0.7373, + "step": 3100 + }, + { + "epoch": 0.01625210890460786, + "grad_norm": 31430.421875, + "learning_rate": 3.1490000000000005e-05, + "loss": 0.7371, + "step": 3150 + }, + { + "epoch": 0.016510078887220683, + "grad_norm": 30911.267578125, + "learning_rate": 3.1990000000000004e-05, + "loss": 0.7315, + "step": 3200 + }, + { + "epoch": 0.016768048869833505, + "grad_norm": 31906.193359375, + "learning_rate": 3.249e-05, + "loss": 0.7405, + "step": 3250 + }, + { + "epoch": 0.01702601885244633, + "grad_norm": 30320.1640625, + "learning_rate": 3.299e-05, + "loss": 0.7323, + "step": 3300 + }, + { + "epoch": 0.017283988835059154, + "grad_norm": 32357.072265625, + "learning_rate": 3.349e-05, + "loss": 0.7244, + "step": 3350 + }, + { + "epoch": 0.017541958817671975, + "grad_norm": 34023.2109375, + "learning_rate": 3.399e-05, + "loss": 0.7214, + "step": 3400 + }, + { + "epoch": 0.0177999288002848, + "grad_norm": 33940.8046875, + "learning_rate": 3.449e-05, + "loss": 0.7158, + "step": 3450 + }, + { + "epoch": 0.01805789878289762, + "grad_norm": 31701.14453125, + "learning_rate": 3.499e-05, + "loss": 0.7102, + "step": 3500 + }, + { + "epoch": 0.018315868765510446, + "grad_norm": 32291.861328125, + "learning_rate": 3.549e-05, + "loss": 0.7104, + "step": 3550 + }, + { + "epoch": 0.018573838748123267, + "grad_norm": 28074.177734375, + "learning_rate": 3.599e-05, + "loss": 0.7001, + "step": 3600 + }, + { + "epoch": 0.01883180873073609, + "grad_norm": 29823.787109375, + "learning_rate": 3.6490000000000005e-05, + "loss": 0.7029, + "step": 3650 + }, + { + "epoch": 0.019089778713348916, + "grad_norm": 29792.24609375, + "learning_rate": 3.699e-05, + "loss": 0.6949, + "step": 3700 + }, + { + "epoch": 0.019347748695961738, + "grad_norm": 31345.296875, + "learning_rate": 3.749e-05, + "loss": 0.6989, + "step": 3750 + }, + { + "epoch": 0.019605718678574562, + "grad_norm": 33923.0625, + "learning_rate": 3.799e-05, + "loss": 0.6984, + "step": 3800 + }, + { + "epoch": 0.019863688661187383, + "grad_norm": 30762.97265625, + "learning_rate": 3.8490000000000006e-05, + "loss": 0.6931, + "step": 3850 + }, + { + "epoch": 0.020121658643800208, + "grad_norm": 30794.13671875, + "learning_rate": 3.8990000000000004e-05, + "loss": 0.6923, + "step": 3900 + }, + { + "epoch": 0.02037962862641303, + "grad_norm": 29854.923828125, + "learning_rate": 3.9489999999999996e-05, + "loss": 0.6895, + "step": 3950 + }, + { + "epoch": 0.020637598609025854, + "grad_norm": 27336.958984375, + "learning_rate": 3.999e-05, + "loss": 0.6853, + "step": 4000 + }, + { + "epoch": 0.020895568591638675, + "grad_norm": 31836.81640625, + "learning_rate": 4.049e-05, + "loss": 0.6821, + "step": 4050 + }, + { + "epoch": 0.0211535385742515, + "grad_norm": 28508.548828125, + "learning_rate": 4.099e-05, + "loss": 0.6857, + "step": 4100 + }, + { + "epoch": 0.021411508556864325, + "grad_norm": 30309.2421875, + "learning_rate": 4.1490000000000004e-05, + "loss": 0.6791, + "step": 4150 + }, + { + "epoch": 0.021669478539477146, + "grad_norm": 31035.0703125, + "learning_rate": 4.199e-05, + "loss": 0.6762, + "step": 4200 + }, + { + "epoch": 0.02192744852208997, + "grad_norm": 30893.951171875, + "learning_rate": 4.249e-05, + "loss": 0.6739, + "step": 4250 + }, + { + "epoch": 0.022185418504702792, + "grad_norm": 28317.12890625, + "learning_rate": 4.299e-05, + "loss": 0.6635, + "step": 4300 + }, + { + "epoch": 0.022443388487315617, + "grad_norm": 27140.29296875, + "learning_rate": 4.3490000000000005e-05, + "loss": 0.6694, + "step": 4350 + }, + { + "epoch": 0.022701358469928438, + "grad_norm": 27948.32421875, + "learning_rate": 4.3990000000000004e-05, + "loss": 0.6667, + "step": 4400 + }, + { + "epoch": 0.022959328452541262, + "grad_norm": 27243.44140625, + "learning_rate": 4.449e-05, + "loss": 0.6689, + "step": 4450 + }, + { + "epoch": 0.023217298435154087, + "grad_norm": 29163.98828125, + "learning_rate": 4.499e-05, + "loss": 0.6639, + "step": 4500 + }, + { + "epoch": 0.02347526841776691, + "grad_norm": 27801.79296875, + "learning_rate": 4.549000000000001e-05, + "loss": 0.6612, + "step": 4550 + }, + { + "epoch": 0.023733238400379733, + "grad_norm": 28201.7265625, + "learning_rate": 4.599e-05, + "loss": 0.6608, + "step": 4600 + }, + { + "epoch": 0.023991208382992554, + "grad_norm": 28875.06640625, + "learning_rate": 4.649e-05, + "loss": 0.6642, + "step": 4650 + }, + { + "epoch": 0.02424917836560538, + "grad_norm": 25467.376953125, + "learning_rate": 4.699e-05, + "loss": 0.6513, + "step": 4700 + }, + { + "epoch": 0.0245071483482182, + "grad_norm": 27359.97265625, + "learning_rate": 4.749e-05, + "loss": 0.6554, + "step": 4750 + }, + { + "epoch": 0.024765118330831025, + "grad_norm": 30614.15234375, + "learning_rate": 4.799e-05, + "loss": 0.6574, + "step": 4800 + }, + { + "epoch": 0.025023088313443846, + "grad_norm": 29069.677734375, + "learning_rate": 4.8490000000000005e-05, + "loss": 0.6562, + "step": 4850 + }, + { + "epoch": 0.02528105829605667, + "grad_norm": 27337.37109375, + "learning_rate": 4.8990000000000004e-05, + "loss": 0.6507, + "step": 4900 + }, + { + "epoch": 0.025539028278669496, + "grad_norm": 26784.7265625, + "learning_rate": 4.949e-05, + "loss": 0.64, + "step": 4950 + }, + { + "epoch": 0.025796998261282317, + "grad_norm": 27480.509765625, + "learning_rate": 4.999e-05, + "loss": 0.6515, + "step": 5000 + }, + { + "epoch": 0.025796998261282317, + "eval_loss": 0.6312834024429321, + "eval_runtime": 3280.995, + "eval_samples_per_second": 945.177, + "eval_steps_per_second": 1.846, + "step": 5000 + }, + { + "epoch": 0.02605496824389514, + "grad_norm": 27871.740234375, + "learning_rate": 5.0490000000000006e-05, + "loss": 0.6424, + "step": 5050 + }, + { + "epoch": 0.026312938226507963, + "grad_norm": 31187.00390625, + "learning_rate": 5.0990000000000005e-05, + "loss": 0.643, + "step": 5100 + }, + { + "epoch": 0.026570908209120787, + "grad_norm": 25956.521484375, + "learning_rate": 5.149e-05, + "loss": 0.65, + "step": 5150 + }, + { + "epoch": 0.02682887819173361, + "grad_norm": 25967.70703125, + "learning_rate": 5.199000000000001e-05, + "loss": 0.6466, + "step": 5200 + }, + { + "epoch": 0.027086848174346433, + "grad_norm": 25310.275390625, + "learning_rate": 5.249000000000001e-05, + "loss": 0.6429, + "step": 5250 + }, + { + "epoch": 0.027344818156959255, + "grad_norm": 24740.033203125, + "learning_rate": 5.2990000000000006e-05, + "loss": 0.6415, + "step": 5300 + }, + { + "epoch": 0.02760278813957208, + "grad_norm": 30795.58984375, + "learning_rate": 5.3490000000000005e-05, + "loss": 0.6424, + "step": 5350 + }, + { + "epoch": 0.027860758122184904, + "grad_norm": 30625.59375, + "learning_rate": 5.399000000000001e-05, + "loss": 0.6361, + "step": 5400 + }, + { + "epoch": 0.028118728104797725, + "grad_norm": 27036.14453125, + "learning_rate": 5.449000000000001e-05, + "loss": 0.6351, + "step": 5450 + }, + { + "epoch": 0.02837669808741055, + "grad_norm": 26934.447265625, + "learning_rate": 5.499000000000001e-05, + "loss": 0.6304, + "step": 5500 + }, + { + "epoch": 0.02863466807002337, + "grad_norm": 25540.291015625, + "learning_rate": 5.549e-05, + "loss": 0.6304, + "step": 5550 + }, + { + "epoch": 0.028892638052636196, + "grad_norm": 26574.9375, + "learning_rate": 5.599e-05, + "loss": 0.6444, + "step": 5600 + }, + { + "epoch": 0.029150608035249017, + "grad_norm": 26941.955078125, + "learning_rate": 5.6489999999999996e-05, + "loss": 0.6373, + "step": 5650 + }, + { + "epoch": 0.02940857801786184, + "grad_norm": 26957.7734375, + "learning_rate": 5.699e-05, + "loss": 0.6363, + "step": 5700 + }, + { + "epoch": 0.029666548000474666, + "grad_norm": 24377.55859375, + "learning_rate": 5.749e-05, + "loss": 0.6213, + "step": 5750 + }, + { + "epoch": 0.029924517983087488, + "grad_norm": 25600.697265625, + "learning_rate": 5.799e-05, + "loss": 0.6362, + "step": 5800 + }, + { + "epoch": 0.030182487965700312, + "grad_norm": 23841.47265625, + "learning_rate": 5.849e-05, + "loss": 0.6274, + "step": 5850 + }, + { + "epoch": 0.030440457948313134, + "grad_norm": 23847.73046875, + "learning_rate": 5.899e-05, + "loss": 0.624, + "step": 5900 + }, + { + "epoch": 0.030698427930925958, + "grad_norm": 25549.033203125, + "learning_rate": 5.949e-05, + "loss": 0.627, + "step": 5950 + }, + { + "epoch": 0.03095639791353878, + "grad_norm": 25286.8046875, + "learning_rate": 5.999e-05, + "loss": 0.6272, + "step": 6000 + }, + { + "epoch": 0.031214367896151604, + "grad_norm": 25137.384765625, + "learning_rate": 6.0490000000000005e-05, + "loss": 0.622, + "step": 6050 + }, + { + "epoch": 0.031472337878764425, + "grad_norm": 23606.23828125, + "learning_rate": 6.0990000000000004e-05, + "loss": 0.6262, + "step": 6100 + }, + { + "epoch": 0.031730307861377254, + "grad_norm": 32101.404296875, + "learning_rate": 6.149000000000001e-05, + "loss": 0.619, + "step": 6150 + }, + { + "epoch": 0.031988277843990075, + "grad_norm": 23683.73046875, + "learning_rate": 6.199000000000001e-05, + "loss": 0.6129, + "step": 6200 + }, + { + "epoch": 0.032246247826602896, + "grad_norm": 25243.49609375, + "learning_rate": 6.249e-05, + "loss": 0.6194, + "step": 6250 + }, + { + "epoch": 0.03250421780921572, + "grad_norm": 28690.10546875, + "learning_rate": 6.299e-05, + "loss": 0.6199, + "step": 6300 + }, + { + "epoch": 0.032762187791828545, + "grad_norm": 24198.47265625, + "learning_rate": 6.349e-05, + "loss": 0.6077, + "step": 6350 + }, + { + "epoch": 0.03302015777444137, + "grad_norm": 24742.998046875, + "learning_rate": 6.399e-05, + "loss": 0.6168, + "step": 6400 + }, + { + "epoch": 0.03327812775705419, + "grad_norm": 27489.93359375, + "learning_rate": 6.449e-05, + "loss": 0.6136, + "step": 6450 + }, + { + "epoch": 0.03353609773966701, + "grad_norm": 28733.7265625, + "learning_rate": 6.499000000000001e-05, + "loss": 0.6184, + "step": 6500 + }, + { + "epoch": 0.03379406772227984, + "grad_norm": 23810.544921875, + "learning_rate": 6.549000000000001e-05, + "loss": 0.6167, + "step": 6550 + }, + { + "epoch": 0.03405203770489266, + "grad_norm": 25503.98828125, + "learning_rate": 6.599000000000001e-05, + "loss": 0.6184, + "step": 6600 + }, + { + "epoch": 0.03431000768750548, + "grad_norm": 24550.26171875, + "learning_rate": 6.649000000000001e-05, + "loss": 0.6146, + "step": 6650 + }, + { + "epoch": 0.03456797767011831, + "grad_norm": 22774.71875, + "learning_rate": 6.699000000000001e-05, + "loss": 0.6132, + "step": 6700 + }, + { + "epoch": 0.03482594765273113, + "grad_norm": 23878.90625, + "learning_rate": 6.749e-05, + "loss": 0.6127, + "step": 6750 + }, + { + "epoch": 0.03508391763534395, + "grad_norm": 28744.9921875, + "learning_rate": 6.799e-05, + "loss": 0.6203, + "step": 6800 + }, + { + "epoch": 0.03534188761795677, + "grad_norm": 24239.826171875, + "learning_rate": 6.849e-05, + "loss": 0.6069, + "step": 6850 + }, + { + "epoch": 0.0355998576005696, + "grad_norm": 27030.513671875, + "learning_rate": 6.899e-05, + "loss": 0.614, + "step": 6900 + }, + { + "epoch": 0.03585782758318242, + "grad_norm": 22872.59375, + "learning_rate": 6.949e-05, + "loss": 0.6068, + "step": 6950 + }, + { + "epoch": 0.03611579756579524, + "grad_norm": 23280.333984375, + "learning_rate": 6.999e-05, + "loss": 0.6064, + "step": 7000 + }, + { + "epoch": 0.03637376754840807, + "grad_norm": 24819.060546875, + "learning_rate": 7.049e-05, + "loss": 0.606, + "step": 7050 + }, + { + "epoch": 0.03663173753102089, + "grad_norm": 23739.595703125, + "learning_rate": 7.099e-05, + "loss": 0.6065, + "step": 7100 + }, + { + "epoch": 0.03688970751363371, + "grad_norm": 24261.28515625, + "learning_rate": 7.149e-05, + "loss": 0.6037, + "step": 7150 + }, + { + "epoch": 0.037147677496246534, + "grad_norm": 24133.744140625, + "learning_rate": 7.199000000000001e-05, + "loss": 0.6097, + "step": 7200 + }, + { + "epoch": 0.03740564747885936, + "grad_norm": 22903.197265625, + "learning_rate": 7.249e-05, + "loss": 0.6048, + "step": 7250 + }, + { + "epoch": 0.03766361746147218, + "grad_norm": 23503.970703125, + "learning_rate": 7.299e-05, + "loss": 0.6039, + "step": 7300 + }, + { + "epoch": 0.037921587444085005, + "grad_norm": 20935.388671875, + "learning_rate": 7.349e-05, + "loss": 0.6016, + "step": 7350 + }, + { + "epoch": 0.03817955742669783, + "grad_norm": 22991.720703125, + "learning_rate": 7.399e-05, + "loss": 0.6111, + "step": 7400 + }, + { + "epoch": 0.038437527409310654, + "grad_norm": 21915.90234375, + "learning_rate": 7.449e-05, + "loss": 0.5969, + "step": 7450 + }, + { + "epoch": 0.038695497391923475, + "grad_norm": 22474.25390625, + "learning_rate": 7.499e-05, + "loss": 0.6068, + "step": 7500 + }, + { + "epoch": 0.038953467374536296, + "grad_norm": 24122.150390625, + "learning_rate": 7.549000000000001e-05, + "loss": 0.6037, + "step": 7550 + }, + { + "epoch": 0.039211437357149125, + "grad_norm": 22262.220703125, + "learning_rate": 7.599000000000001e-05, + "loss": 0.5946, + "step": 7600 + }, + { + "epoch": 0.039469407339761946, + "grad_norm": 23959.7265625, + "learning_rate": 7.649000000000001e-05, + "loss": 0.598, + "step": 7650 + }, + { + "epoch": 0.03972737732237477, + "grad_norm": 21918.5859375, + "learning_rate": 7.699e-05, + "loss": 0.5959, + "step": 7700 + }, + { + "epoch": 0.03998534730498759, + "grad_norm": 23740.5390625, + "learning_rate": 7.749e-05, + "loss": 0.594, + "step": 7750 + }, + { + "epoch": 0.040243317287600416, + "grad_norm": 23406.4296875, + "learning_rate": 7.799e-05, + "loss": 0.6048, + "step": 7800 + }, + { + "epoch": 0.04050128727021324, + "grad_norm": 23423.201171875, + "learning_rate": 7.849e-05, + "loss": 0.5944, + "step": 7850 + }, + { + "epoch": 0.04075925725282606, + "grad_norm": 23187.76171875, + "learning_rate": 7.899000000000001e-05, + "loss": 0.5944, + "step": 7900 + }, + { + "epoch": 0.04101722723543889, + "grad_norm": 25532.4375, + "learning_rate": 7.949000000000001e-05, + "loss": 0.5978, + "step": 7950 + }, + { + "epoch": 0.04127519721805171, + "grad_norm": 23045.28515625, + "learning_rate": 7.999000000000001e-05, + "loss": 0.5968, + "step": 8000 + }, + { + "epoch": 0.04153316720066453, + "grad_norm": 22853.826171875, + "learning_rate": 8.049e-05, + "loss": 0.5915, + "step": 8050 + }, + { + "epoch": 0.04179113718327735, + "grad_norm": 21853.658203125, + "learning_rate": 8.099e-05, + "loss": 0.5932, + "step": 8100 + }, + { + "epoch": 0.04204910716589018, + "grad_norm": 22395.74609375, + "learning_rate": 8.149e-05, + "loss": 0.5925, + "step": 8150 + }, + { + "epoch": 0.042307077148503, + "grad_norm": 23933.40625, + "learning_rate": 8.199e-05, + "loss": 0.5878, + "step": 8200 + }, + { + "epoch": 0.04256504713111582, + "grad_norm": 21773.087890625, + "learning_rate": 8.249e-05, + "loss": 0.5916, + "step": 8250 + }, + { + "epoch": 0.04282301711372865, + "grad_norm": 22665.11328125, + "learning_rate": 8.299e-05, + "loss": 0.5906, + "step": 8300 + }, + { + "epoch": 0.04308098709634147, + "grad_norm": 22157.091796875, + "learning_rate": 8.349e-05, + "loss": 0.5873, + "step": 8350 + }, + { + "epoch": 0.04333895707895429, + "grad_norm": 21506.8125, + "learning_rate": 8.399e-05, + "loss": 0.5927, + "step": 8400 + }, + { + "epoch": 0.04359692706156711, + "grad_norm": 22143.341796875, + "learning_rate": 8.449e-05, + "loss": 0.5828, + "step": 8450 + }, + { + "epoch": 0.04385489704417994, + "grad_norm": 23341.23828125, + "learning_rate": 8.499e-05, + "loss": 0.5885, + "step": 8500 + }, + { + "epoch": 0.04411286702679276, + "grad_norm": 21876.96484375, + "learning_rate": 8.549000000000001e-05, + "loss": 0.5913, + "step": 8550 + }, + { + "epoch": 0.044370837009405584, + "grad_norm": 22307.29296875, + "learning_rate": 8.599000000000001e-05, + "loss": 0.583, + "step": 8600 + }, + { + "epoch": 0.04462880699201841, + "grad_norm": 22859.017578125, + "learning_rate": 8.649000000000001e-05, + "loss": 0.5889, + "step": 8650 + }, + { + "epoch": 0.04488677697463123, + "grad_norm": 22058.24609375, + "learning_rate": 8.699e-05, + "loss": 0.5848, + "step": 8700 + }, + { + "epoch": 0.045144746957244054, + "grad_norm": 22116.837890625, + "learning_rate": 8.749e-05, + "loss": 0.5858, + "step": 8750 + }, + { + "epoch": 0.045402716939856876, + "grad_norm": 23110.17578125, + "learning_rate": 8.799e-05, + "loss": 0.5855, + "step": 8800 + }, + { + "epoch": 0.045660686922469704, + "grad_norm": 24173.064453125, + "learning_rate": 8.849e-05, + "loss": 0.5878, + "step": 8850 + }, + { + "epoch": 0.045918656905082525, + "grad_norm": 21521.48046875, + "learning_rate": 8.899e-05, + "loss": 0.5914, + "step": 8900 + }, + { + "epoch": 0.046176626887695346, + "grad_norm": 24516.0, + "learning_rate": 8.949000000000001e-05, + "loss": 0.5849, + "step": 8950 + }, + { + "epoch": 0.046434596870308174, + "grad_norm": 22074.9609375, + "learning_rate": 8.999000000000001e-05, + "loss": 0.5848, + "step": 9000 + }, + { + "epoch": 0.046692566852920996, + "grad_norm": 21495.4140625, + "learning_rate": 9.049000000000001e-05, + "loss": 0.579, + "step": 9050 + }, + { + "epoch": 0.04695053683553382, + "grad_norm": 23548.224609375, + "learning_rate": 9.099000000000001e-05, + "loss": 0.5826, + "step": 9100 + }, + { + "epoch": 0.04720850681814664, + "grad_norm": 22144.51953125, + "learning_rate": 9.149e-05, + "loss": 0.5879, + "step": 9150 + }, + { + "epoch": 0.047466476800759466, + "grad_norm": 20656.185546875, + "learning_rate": 9.199e-05, + "loss": 0.5806, + "step": 9200 + }, + { + "epoch": 0.04772444678337229, + "grad_norm": 21228.814453125, + "learning_rate": 9.249e-05, + "loss": 0.5858, + "step": 9250 + }, + { + "epoch": 0.04798241676598511, + "grad_norm": 20801.869140625, + "learning_rate": 9.299e-05, + "loss": 0.5816, + "step": 9300 + }, + { + "epoch": 0.04824038674859793, + "grad_norm": 24044.283203125, + "learning_rate": 9.349e-05, + "loss": 0.5811, + "step": 9350 + }, + { + "epoch": 0.04849835673121076, + "grad_norm": 22395.47265625, + "learning_rate": 9.399e-05, + "loss": 0.5782, + "step": 9400 + }, + { + "epoch": 0.04875632671382358, + "grad_norm": 22353.078125, + "learning_rate": 9.449e-05, + "loss": 0.5758, + "step": 9450 + }, + { + "epoch": 0.0490142966964364, + "grad_norm": 22520.72265625, + "learning_rate": 9.499e-05, + "loss": 0.5752, + "step": 9500 + }, + { + "epoch": 0.04927226667904923, + "grad_norm": 22016.951171875, + "learning_rate": 9.549e-05, + "loss": 0.5764, + "step": 9550 + }, + { + "epoch": 0.04953023666166205, + "grad_norm": 20046.615234375, + "learning_rate": 9.599000000000001e-05, + "loss": 0.5759, + "step": 9600 + }, + { + "epoch": 0.04978820664427487, + "grad_norm": 21346.029296875, + "learning_rate": 9.649e-05, + "loss": 0.5798, + "step": 9650 + }, + { + "epoch": 0.05004617662688769, + "grad_norm": 22449.796875, + "learning_rate": 9.699e-05, + "loss": 0.5829, + "step": 9700 + }, + { + "epoch": 0.05030414660950052, + "grad_norm": 20538.751953125, + "learning_rate": 9.749e-05, + "loss": 0.5809, + "step": 9750 + }, + { + "epoch": 0.05056211659211334, + "grad_norm": 21123.19921875, + "learning_rate": 9.799e-05, + "loss": 0.5726, + "step": 9800 + }, + { + "epoch": 0.05082008657472616, + "grad_norm": 20853.08203125, + "learning_rate": 9.849e-05, + "loss": 0.5726, + "step": 9850 + }, + { + "epoch": 0.05107805655733899, + "grad_norm": 22160.841796875, + "learning_rate": 9.899e-05, + "loss": 0.5783, + "step": 9900 + }, + { + "epoch": 0.05133602653995181, + "grad_norm": 19711.109375, + "learning_rate": 9.949000000000001e-05, + "loss": 0.5722, + "step": 9950 + }, + { + "epoch": 0.051593996522564634, + "grad_norm": 21442.310546875, + "learning_rate": 9.999000000000001e-05, + "loss": 0.5773, + "step": 10000 + }, + { + "epoch": 0.051593996522564634, + "eval_loss": 0.5661358833312988, + "eval_runtime": 3272.6524, + "eval_samples_per_second": 947.586, + "eval_steps_per_second": 1.851, + "step": 10000 + }, + { + "epoch": 0.051851966505177455, + "grad_norm": 21442.943359375, + "learning_rate": 9.999998718392692e-05, + "loss": 0.5727, + "step": 10050 + }, + { + "epoch": 0.05210993648779028, + "grad_norm": 21711.177734375, + "learning_rate": 9.999994768416664e-05, + "loss": 0.5707, + "step": 10100 + }, + { + "epoch": 0.052367906470403104, + "grad_norm": 21793.666015625, + "learning_rate": 9.999988149540251e-05, + "loss": 0.5727, + "step": 10150 + }, + { + "epoch": 0.052625876453015925, + "grad_norm": 18847.970703125, + "learning_rate": 9.999978861766983e-05, + "loss": 0.5726, + "step": 10200 + }, + { + "epoch": 0.052883846435628754, + "grad_norm": 22870.91796875, + "learning_rate": 9.999966905101816e-05, + "loss": 0.5751, + "step": 10250 + }, + { + "epoch": 0.053141816418241575, + "grad_norm": 23970.431640625, + "learning_rate": 9.999952279551135e-05, + "loss": 0.5745, + "step": 10300 + }, + { + "epoch": 0.053399786400854396, + "grad_norm": 19482.65625, + "learning_rate": 9.999934985122746e-05, + "loss": 0.5734, + "step": 10350 + }, + { + "epoch": 0.05365775638346722, + "grad_norm": 19720.65625, + "learning_rate": 9.999915021825879e-05, + "loss": 0.5697, + "step": 10400 + }, + { + "epoch": 0.053915726366080045, + "grad_norm": 21484.8203125, + "learning_rate": 9.99989238967119e-05, + "loss": 0.5678, + "step": 10450 + }, + { + "epoch": 0.05417369634869287, + "grad_norm": 20198.669921875, + "learning_rate": 9.999867088670762e-05, + "loss": 0.5731, + "step": 10500 + }, + { + "epoch": 0.05443166633130569, + "grad_norm": 19887.86328125, + "learning_rate": 9.999839118838099e-05, + "loss": 0.5711, + "step": 10550 + }, + { + "epoch": 0.05468963631391851, + "grad_norm": 21250.41796875, + "learning_rate": 9.999808480188131e-05, + "loss": 0.5653, + "step": 10600 + }, + { + "epoch": 0.05494760629653134, + "grad_norm": 21179.904296875, + "learning_rate": 9.999775172737211e-05, + "loss": 0.5666, + "step": 10650 + }, + { + "epoch": 0.05520557627914416, + "grad_norm": 21106.083984375, + "learning_rate": 9.999739196503119e-05, + "loss": 0.5656, + "step": 10700 + }, + { + "epoch": 0.05546354626175698, + "grad_norm": 19393.994140625, + "learning_rate": 9.999700551505057e-05, + "loss": 0.566, + "step": 10750 + }, + { + "epoch": 0.05572151624436981, + "grad_norm": 22788.060546875, + "learning_rate": 9.999659237763656e-05, + "loss": 0.5681, + "step": 10800 + }, + { + "epoch": 0.05597948622698263, + "grad_norm": 20106.75390625, + "learning_rate": 9.999615255300966e-05, + "loss": 0.5668, + "step": 10850 + }, + { + "epoch": 0.05623745620959545, + "grad_norm": 22390.466796875, + "learning_rate": 9.999568604140464e-05, + "loss": 0.5665, + "step": 10900 + }, + { + "epoch": 0.05649542619220827, + "grad_norm": 21145.044921875, + "learning_rate": 9.999519284307053e-05, + "loss": 0.5645, + "step": 10950 + }, + { + "epoch": 0.0567533961748211, + "grad_norm": 22501.64453125, + "learning_rate": 9.999467295827059e-05, + "loss": 0.5663, + "step": 11000 + }, + { + "epoch": 0.05701136615743392, + "grad_norm": 21079.431640625, + "learning_rate": 9.999412638728229e-05, + "loss": 0.5605, + "step": 11050 + }, + { + "epoch": 0.05726933614004674, + "grad_norm": 21501.4375, + "learning_rate": 9.999355313039742e-05, + "loss": 0.5643, + "step": 11100 + }, + { + "epoch": 0.05752730612265957, + "grad_norm": 22092.6328125, + "learning_rate": 9.999295318792194e-05, + "loss": 0.5602, + "step": 11150 + }, + { + "epoch": 0.05778527610527239, + "grad_norm": 19948.81640625, + "learning_rate": 9.999232656017613e-05, + "loss": 0.5649, + "step": 11200 + }, + { + "epoch": 0.05804324608788521, + "grad_norm": 20543.5859375, + "learning_rate": 9.999167324749443e-05, + "loss": 0.5598, + "step": 11250 + }, + { + "epoch": 0.058301216070498034, + "grad_norm": 20948.060546875, + "learning_rate": 9.99909932502256e-05, + "loss": 0.5631, + "step": 11300 + }, + { + "epoch": 0.05855918605311086, + "grad_norm": 20384.732421875, + "learning_rate": 9.999028656873257e-05, + "loss": 0.5592, + "step": 11350 + }, + { + "epoch": 0.05881715603572368, + "grad_norm": 20027.615234375, + "learning_rate": 9.99895532033926e-05, + "loss": 0.5658, + "step": 11400 + }, + { + "epoch": 0.059075126018336505, + "grad_norm": 20702.263671875, + "learning_rate": 9.99887931545971e-05, + "loss": 0.56, + "step": 11450 + }, + { + "epoch": 0.05933309600094933, + "grad_norm": 21589.52734375, + "learning_rate": 9.99880064227518e-05, + "loss": 0.5595, + "step": 11500 + }, + { + "epoch": 0.059591065983562154, + "grad_norm": 20375.181640625, + "learning_rate": 9.998719300827663e-05, + "loss": 0.5627, + "step": 11550 + }, + { + "epoch": 0.059849035966174975, + "grad_norm": 20207.677734375, + "learning_rate": 9.998635291160577e-05, + "loss": 0.5615, + "step": 11600 + }, + { + "epoch": 0.060107005948787796, + "grad_norm": 20898.291015625, + "learning_rate": 9.998548613318767e-05, + "loss": 0.5594, + "step": 11650 + }, + { + "epoch": 0.060364975931400625, + "grad_norm": 20133.822265625, + "learning_rate": 9.998459267348497e-05, + "loss": 0.5631, + "step": 11700 + }, + { + "epoch": 0.060622945914013446, + "grad_norm": 19021.533203125, + "learning_rate": 9.99836725329746e-05, + "loss": 0.5576, + "step": 11750 + }, + { + "epoch": 0.06088091589662627, + "grad_norm": 19088.32421875, + "learning_rate": 9.998272571214772e-05, + "loss": 0.5619, + "step": 11800 + }, + { + "epoch": 0.061138885879239095, + "grad_norm": 19742.841796875, + "learning_rate": 9.99817522115097e-05, + "loss": 0.5626, + "step": 11850 + }, + { + "epoch": 0.061396855861851916, + "grad_norm": 21584.271484375, + "learning_rate": 9.99807520315802e-05, + "loss": 0.555, + "step": 11900 + }, + { + "epoch": 0.06165482584446474, + "grad_norm": 19766.76953125, + "learning_rate": 9.997972517289309e-05, + "loss": 0.5584, + "step": 11950 + }, + { + "epoch": 0.06191279582707756, + "grad_norm": 19821.556640625, + "learning_rate": 9.997867163599646e-05, + "loss": 0.5623, + "step": 12000 + }, + { + "epoch": 0.06217076580969039, + "grad_norm": 19488.490234375, + "learning_rate": 9.997759142145271e-05, + "loss": 0.5591, + "step": 12050 + }, + { + "epoch": 0.06242873579230321, + "grad_norm": 20093.806640625, + "learning_rate": 9.997648452983842e-05, + "loss": 0.5597, + "step": 12100 + }, + { + "epoch": 0.06268670577491603, + "grad_norm": 20202.154296875, + "learning_rate": 9.997535096174441e-05, + "loss": 0.5542, + "step": 12150 + }, + { + "epoch": 0.06294467575752885, + "grad_norm": 19978.154296875, + "learning_rate": 9.99741907177758e-05, + "loss": 0.5629, + "step": 12200 + }, + { + "epoch": 0.06320264574014167, + "grad_norm": 19697.005859375, + "learning_rate": 9.997300379855186e-05, + "loss": 0.5571, + "step": 12250 + }, + { + "epoch": 0.06346061572275451, + "grad_norm": 20384.287109375, + "learning_rate": 9.997179020470618e-05, + "loss": 0.5526, + "step": 12300 + }, + { + "epoch": 0.06371858570536733, + "grad_norm": 18652.044921875, + "learning_rate": 9.997054993688651e-05, + "loss": 0.5531, + "step": 12350 + }, + { + "epoch": 0.06397655568798015, + "grad_norm": 20133.990234375, + "learning_rate": 9.996928299575493e-05, + "loss": 0.5561, + "step": 12400 + }, + { + "epoch": 0.06423452567059297, + "grad_norm": 20575.875, + "learning_rate": 9.996798938198766e-05, + "loss": 0.5559, + "step": 12450 + }, + { + "epoch": 0.06449249565320579, + "grad_norm": 19524.828125, + "learning_rate": 9.996666909627525e-05, + "loss": 0.5437, + "step": 12500 + }, + { + "epoch": 0.06475046563581861, + "grad_norm": 22106.927734375, + "learning_rate": 9.996532213932242e-05, + "loss": 0.5691, + "step": 12550 + }, + { + "epoch": 0.06500843561843143, + "grad_norm": 18443.4609375, + "learning_rate": 9.996394851184814e-05, + "loss": 0.553, + "step": 12600 + }, + { + "epoch": 0.06526640560104426, + "grad_norm": 21786.943359375, + "learning_rate": 9.996254821458565e-05, + "loss": 0.562, + "step": 12650 + }, + { + "epoch": 0.06552437558365709, + "grad_norm": 22699.578125, + "learning_rate": 9.996112124828241e-05, + "loss": 0.5526, + "step": 12700 + }, + { + "epoch": 0.06578234556626991, + "grad_norm": 18522.822265625, + "learning_rate": 9.995966761370006e-05, + "loss": 0.5525, + "step": 12750 + }, + { + "epoch": 0.06604031554888273, + "grad_norm": 19723.44140625, + "learning_rate": 9.995818731161458e-05, + "loss": 0.5555, + "step": 12800 + }, + { + "epoch": 0.06629828553149555, + "grad_norm": 20643.173828125, + "learning_rate": 9.995668034281606e-05, + "loss": 0.5506, + "step": 12850 + }, + { + "epoch": 0.06655625551410838, + "grad_norm": 19303.68359375, + "learning_rate": 9.995514670810896e-05, + "loss": 0.5599, + "step": 12900 + }, + { + "epoch": 0.0668142254967212, + "grad_norm": 19837.240234375, + "learning_rate": 9.995358640831187e-05, + "loss": 0.5514, + "step": 12950 + }, + { + "epoch": 0.06707219547933402, + "grad_norm": 19212.25390625, + "learning_rate": 9.995199944425764e-05, + "loss": 0.5542, + "step": 13000 + }, + { + "epoch": 0.06733016546194685, + "grad_norm": 19908.70703125, + "learning_rate": 9.995038581679337e-05, + "loss": 0.5421, + "step": 13050 + }, + { + "epoch": 0.06758813544455967, + "grad_norm": 18933.306640625, + "learning_rate": 9.994874552678038e-05, + "loss": 0.549, + "step": 13100 + }, + { + "epoch": 0.0678461054271725, + "grad_norm": 19313.990234375, + "learning_rate": 9.994707857509422e-05, + "loss": 0.5569, + "step": 13150 + }, + { + "epoch": 0.06810407540978532, + "grad_norm": 20800.984375, + "learning_rate": 9.99453849626247e-05, + "loss": 0.5518, + "step": 13200 + }, + { + "epoch": 0.06836204539239814, + "grad_norm": 18623.361328125, + "learning_rate": 9.994366469027583e-05, + "loss": 0.5549, + "step": 13250 + }, + { + "epoch": 0.06862001537501096, + "grad_norm": 19761.654296875, + "learning_rate": 9.994191775896584e-05, + "loss": 0.5467, + "step": 13300 + }, + { + "epoch": 0.06887798535762378, + "grad_norm": 20618.501953125, + "learning_rate": 9.994014416962723e-05, + "loss": 0.5554, + "step": 13350 + }, + { + "epoch": 0.06913595534023662, + "grad_norm": 19279.791015625, + "learning_rate": 9.993834392320668e-05, + "loss": 0.5567, + "step": 13400 + }, + { + "epoch": 0.06939392532284944, + "grad_norm": 18802.34375, + "learning_rate": 9.993651702066516e-05, + "loss": 0.5608, + "step": 13450 + }, + { + "epoch": 0.06965189530546226, + "grad_norm": 20132.15625, + "learning_rate": 9.993466346297779e-05, + "loss": 0.547, + "step": 13500 + }, + { + "epoch": 0.06990986528807508, + "grad_norm": 19165.26171875, + "learning_rate": 9.993278325113403e-05, + "loss": 0.5485, + "step": 13550 + }, + { + "epoch": 0.0701678352706879, + "grad_norm": 18493.01171875, + "learning_rate": 9.993087638613743e-05, + "loss": 0.5455, + "step": 13600 + }, + { + "epoch": 0.07042580525330072, + "grad_norm": 18225.78125, + "learning_rate": 9.992894286900589e-05, + "loss": 0.5499, + "step": 13650 + }, + { + "epoch": 0.07068377523591354, + "grad_norm": 20189.802734375, + "learning_rate": 9.992698270077146e-05, + "loss": 0.5468, + "step": 13700 + }, + { + "epoch": 0.07094174521852638, + "grad_norm": 20861.2734375, + "learning_rate": 9.992499588248043e-05, + "loss": 0.5588, + "step": 13750 + }, + { + "epoch": 0.0711997152011392, + "grad_norm": 19876.689453125, + "learning_rate": 9.992298241519335e-05, + "loss": 0.5486, + "step": 13800 + }, + { + "epoch": 0.07145768518375202, + "grad_norm": 18371.142578125, + "learning_rate": 9.992094229998497e-05, + "loss": 0.5475, + "step": 13850 + }, + { + "epoch": 0.07171565516636484, + "grad_norm": 18274.396484375, + "learning_rate": 9.991887553794423e-05, + "loss": 0.549, + "step": 13900 + }, + { + "epoch": 0.07197362514897766, + "grad_norm": 18204.947265625, + "learning_rate": 9.991678213017437e-05, + "loss": 0.5419, + "step": 13950 + }, + { + "epoch": 0.07223159513159048, + "grad_norm": 18634.162109375, + "learning_rate": 9.991466207779278e-05, + "loss": 0.5528, + "step": 14000 + }, + { + "epoch": 0.0724895651142033, + "grad_norm": 21840.685546875, + "learning_rate": 9.991251538193112e-05, + "loss": 0.5492, + "step": 14050 + }, + { + "epoch": 0.07274753509681614, + "grad_norm": 18888.935546875, + "learning_rate": 9.991034204373524e-05, + "loss": 0.5504, + "step": 14100 + }, + { + "epoch": 0.07300550507942896, + "grad_norm": 19353.263671875, + "learning_rate": 9.990814206436524e-05, + "loss": 0.5425, + "step": 14150 + }, + { + "epoch": 0.07326347506204178, + "grad_norm": 18891.79296875, + "learning_rate": 9.990591544499543e-05, + "loss": 0.551, + "step": 14200 + }, + { + "epoch": 0.0735214450446546, + "grad_norm": 17878.33203125, + "learning_rate": 9.99036621868143e-05, + "loss": 0.5403, + "step": 14250 + }, + { + "epoch": 0.07377941502726743, + "grad_norm": 18997.544921875, + "learning_rate": 9.990138229102465e-05, + "loss": 0.5458, + "step": 14300 + }, + { + "epoch": 0.07403738500988025, + "grad_norm": 22162.03125, + "learning_rate": 9.989907575884341e-05, + "loss": 0.5482, + "step": 14350 + }, + { + "epoch": 0.07429535499249307, + "grad_norm": 17026.828125, + "learning_rate": 9.989674259150177e-05, + "loss": 0.5487, + "step": 14400 + }, + { + "epoch": 0.0745533249751059, + "grad_norm": 18335.169921875, + "learning_rate": 9.989438279024513e-05, + "loss": 0.5459, + "step": 14450 + }, + { + "epoch": 0.07481129495771872, + "grad_norm": 19508.666015625, + "learning_rate": 9.989199635633309e-05, + "loss": 0.5456, + "step": 14500 + }, + { + "epoch": 0.07506926494033155, + "grad_norm": 20281.28515625, + "learning_rate": 9.98895832910395e-05, + "loss": 0.5455, + "step": 14550 + }, + { + "epoch": 0.07532723492294437, + "grad_norm": 20196.259765625, + "learning_rate": 9.98871435956524e-05, + "loss": 0.5474, + "step": 14600 + }, + { + "epoch": 0.07558520490555719, + "grad_norm": 18934.544921875, + "learning_rate": 9.988467727147409e-05, + "loss": 0.546, + "step": 14650 + }, + { + "epoch": 0.07584317488817001, + "grad_norm": 20257.126953125, + "learning_rate": 9.988218431982098e-05, + "loss": 0.5443, + "step": 14700 + }, + { + "epoch": 0.07610114487078283, + "grad_norm": 20330.86328125, + "learning_rate": 9.98796647420238e-05, + "loss": 0.5423, + "step": 14750 + }, + { + "epoch": 0.07635911485339567, + "grad_norm": 19077.765625, + "learning_rate": 9.987711853942745e-05, + "loss": 0.5446, + "step": 14800 + }, + { + "epoch": 0.07661708483600849, + "grad_norm": 20855.169921875, + "learning_rate": 9.987454571339103e-05, + "loss": 0.5427, + "step": 14850 + }, + { + "epoch": 0.07687505481862131, + "grad_norm": 20556.005859375, + "learning_rate": 9.987194626528788e-05, + "loss": 0.5417, + "step": 14900 + }, + { + "epoch": 0.07713302480123413, + "grad_norm": 19028.7421875, + "learning_rate": 9.986932019650553e-05, + "loss": 0.5412, + "step": 14950 + }, + { + "epoch": 0.07739099478384695, + "grad_norm": 18669.166015625, + "learning_rate": 9.986666750844572e-05, + "loss": 0.5404, + "step": 15000 + }, + { + "epoch": 0.07739099478384695, + "eval_loss": 0.5350670218467712, + "eval_runtime": 3217.7876, + "eval_samples_per_second": 963.743, + "eval_steps_per_second": 1.882, + "step": 15000 + }, + { + "epoch": 0.07764896476645977, + "grad_norm": 19965.779296875, + "learning_rate": 9.98639882025244e-05, + "loss": 0.5439, + "step": 15050 + }, + { + "epoch": 0.07790693474907259, + "grad_norm": 18329.9921875, + "learning_rate": 9.986128228017173e-05, + "loss": 0.5425, + "step": 15100 + }, + { + "epoch": 0.07816490473168543, + "grad_norm": 20102.005859375, + "learning_rate": 9.985854974283211e-05, + "loss": 0.5444, + "step": 15150 + }, + { + "epoch": 0.07842287471429825, + "grad_norm": 19234.671875, + "learning_rate": 9.985579059196406e-05, + "loss": 0.5443, + "step": 15200 + }, + { + "epoch": 0.07868084469691107, + "grad_norm": 18324.298828125, + "learning_rate": 9.985300482904041e-05, + "loss": 0.5419, + "step": 15250 + }, + { + "epoch": 0.07893881467952389, + "grad_norm": 18766.2734375, + "learning_rate": 9.985019245554814e-05, + "loss": 0.5412, + "step": 15300 + }, + { + "epoch": 0.07919678466213671, + "grad_norm": 18805.765625, + "learning_rate": 9.984735347298841e-05, + "loss": 0.5443, + "step": 15350 + }, + { + "epoch": 0.07945475464474953, + "grad_norm": 17677.30078125, + "learning_rate": 9.984448788287665e-05, + "loss": 0.5421, + "step": 15400 + }, + { + "epoch": 0.07971272462736236, + "grad_norm": 19851.3515625, + "learning_rate": 9.984159568674243e-05, + "loss": 0.5426, + "step": 15450 + }, + { + "epoch": 0.07997069460997518, + "grad_norm": 18453.05859375, + "learning_rate": 9.983867688612956e-05, + "loss": 0.5445, + "step": 15500 + }, + { + "epoch": 0.08022866459258801, + "grad_norm": 17366.869140625, + "learning_rate": 9.983573148259603e-05, + "loss": 0.5451, + "step": 15550 + }, + { + "epoch": 0.08048663457520083, + "grad_norm": 18628.716796875, + "learning_rate": 9.983275947771407e-05, + "loss": 0.5373, + "step": 15600 + }, + { + "epoch": 0.08074460455781365, + "grad_norm": 19403.87890625, + "learning_rate": 9.982976087307003e-05, + "loss": 0.5489, + "step": 15650 + }, + { + "epoch": 0.08100257454042648, + "grad_norm": 18485.71875, + "learning_rate": 9.982673567026455e-05, + "loss": 0.538, + "step": 15700 + }, + { + "epoch": 0.0812605445230393, + "grad_norm": 19837.1796875, + "learning_rate": 9.982368387091241e-05, + "loss": 0.5356, + "step": 15750 + }, + { + "epoch": 0.08151851450565212, + "grad_norm": 19505.34375, + "learning_rate": 9.982060547664258e-05, + "loss": 0.5356, + "step": 15800 + }, + { + "epoch": 0.08177648448826494, + "grad_norm": 18645.48828125, + "learning_rate": 9.981750048909828e-05, + "loss": 0.5381, + "step": 15850 + }, + { + "epoch": 0.08203445447087777, + "grad_norm": 20191.73828125, + "learning_rate": 9.981436890993689e-05, + "loss": 0.535, + "step": 15900 + }, + { + "epoch": 0.0822924244534906, + "grad_norm": 18908.15625, + "learning_rate": 9.981121074082995e-05, + "loss": 0.5405, + "step": 15950 + }, + { + "epoch": 0.08255039443610342, + "grad_norm": 19517.73828125, + "learning_rate": 9.980802598346326e-05, + "loss": 0.5407, + "step": 16000 + }, + { + "epoch": 0.08280836441871624, + "grad_norm": 18368.16015625, + "learning_rate": 9.980481463953679e-05, + "loss": 0.5391, + "step": 16050 + }, + { + "epoch": 0.08306633440132906, + "grad_norm": 19727.35546875, + "learning_rate": 9.980157671076466e-05, + "loss": 0.537, + "step": 16100 + }, + { + "epoch": 0.08332430438394188, + "grad_norm": 20757.890625, + "learning_rate": 9.979831219887525e-05, + "loss": 0.5408, + "step": 16150 + }, + { + "epoch": 0.0835822743665547, + "grad_norm": 19334.708984375, + "learning_rate": 9.979502110561108e-05, + "loss": 0.5371, + "step": 16200 + }, + { + "epoch": 0.08384024434916754, + "grad_norm": 19338.498046875, + "learning_rate": 9.979170343272886e-05, + "loss": 0.531, + "step": 16250 + }, + { + "epoch": 0.08409821433178036, + "grad_norm": 18722.365234375, + "learning_rate": 9.978835918199949e-05, + "loss": 0.5398, + "step": 16300 + }, + { + "epoch": 0.08435618431439318, + "grad_norm": 18026.109375, + "learning_rate": 9.97849883552081e-05, + "loss": 0.5423, + "step": 16350 + }, + { + "epoch": 0.084614154297006, + "grad_norm": 19646.78125, + "learning_rate": 9.978159095415396e-05, + "loss": 0.5387, + "step": 16400 + }, + { + "epoch": 0.08487212427961882, + "grad_norm": 20091.552734375, + "learning_rate": 9.977816698065052e-05, + "loss": 0.5376, + "step": 16450 + }, + { + "epoch": 0.08513009426223164, + "grad_norm": 20539.73046875, + "learning_rate": 9.977471643652546e-05, + "loss": 0.5333, + "step": 16500 + }, + { + "epoch": 0.08538806424484446, + "grad_norm": 18306.24609375, + "learning_rate": 9.977123932362059e-05, + "loss": 0.5405, + "step": 16550 + }, + { + "epoch": 0.0856460342274573, + "grad_norm": 20133.513671875, + "learning_rate": 9.976773564379193e-05, + "loss": 0.541, + "step": 16600 + }, + { + "epoch": 0.08590400421007012, + "grad_norm": 19533.50390625, + "learning_rate": 9.976420539890969e-05, + "loss": 0.5333, + "step": 16650 + }, + { + "epoch": 0.08616197419268294, + "grad_norm": 19509.087890625, + "learning_rate": 9.976064859085822e-05, + "loss": 0.5347, + "step": 16700 + }, + { + "epoch": 0.08641994417529576, + "grad_norm": 19590.818359375, + "learning_rate": 9.97570652215361e-05, + "loss": 0.5377, + "step": 16750 + }, + { + "epoch": 0.08667791415790858, + "grad_norm": 19510.705078125, + "learning_rate": 9.975345529285605e-05, + "loss": 0.5367, + "step": 16800 + }, + { + "epoch": 0.0869358841405214, + "grad_norm": 20015.8046875, + "learning_rate": 9.974981880674499e-05, + "loss": 0.5386, + "step": 16850 + }, + { + "epoch": 0.08719385412313423, + "grad_norm": 18704.03125, + "learning_rate": 9.974615576514399e-05, + "loss": 0.5361, + "step": 16900 + }, + { + "epoch": 0.08745182410574706, + "grad_norm": 18257.869140625, + "learning_rate": 9.974246617000832e-05, + "loss": 0.5304, + "step": 16950 + }, + { + "epoch": 0.08770979408835988, + "grad_norm": 18150.517578125, + "learning_rate": 9.973875002330743e-05, + "loss": 0.5289, + "step": 17000 + }, + { + "epoch": 0.0879677640709727, + "grad_norm": 18326.041015625, + "learning_rate": 9.97350073270249e-05, + "loss": 0.5347, + "step": 17050 + }, + { + "epoch": 0.08822573405358553, + "grad_norm": 18199.224609375, + "learning_rate": 9.973123808315852e-05, + "loss": 0.5269, + "step": 17100 + }, + { + "epoch": 0.08848370403619835, + "grad_norm": 20351.447265625, + "learning_rate": 9.972744229372025e-05, + "loss": 0.5334, + "step": 17150 + }, + { + "epoch": 0.08874167401881117, + "grad_norm": 19200.703125, + "learning_rate": 9.97236199607362e-05, + "loss": 0.5316, + "step": 17200 + }, + { + "epoch": 0.08899964400142399, + "grad_norm": 18855.7890625, + "learning_rate": 9.971977108624664e-05, + "loss": 0.5342, + "step": 17250 + }, + { + "epoch": 0.08925761398403682, + "grad_norm": 18889.56640625, + "learning_rate": 9.971589567230606e-05, + "loss": 0.5361, + "step": 17300 + }, + { + "epoch": 0.08951558396664965, + "grad_norm": 18003.9921875, + "learning_rate": 9.971199372098304e-05, + "loss": 0.5353, + "step": 17350 + }, + { + "epoch": 0.08977355394926247, + "grad_norm": 19555.30078125, + "learning_rate": 9.970806523436041e-05, + "loss": 0.5306, + "step": 17400 + }, + { + "epoch": 0.09003152393187529, + "grad_norm": 19433.37890625, + "learning_rate": 9.97041102145351e-05, + "loss": 0.5341, + "step": 17450 + }, + { + "epoch": 0.09028949391448811, + "grad_norm": 19238.341796875, + "learning_rate": 9.97001286636182e-05, + "loss": 0.5372, + "step": 17500 + }, + { + "epoch": 0.09054746389710093, + "grad_norm": 18698.78125, + "learning_rate": 9.969612058373502e-05, + "loss": 0.5356, + "step": 17550 + }, + { + "epoch": 0.09080543387971375, + "grad_norm": 17953.580078125, + "learning_rate": 9.969208597702497e-05, + "loss": 0.529, + "step": 17600 + }, + { + "epoch": 0.09106340386232659, + "grad_norm": 17678.716796875, + "learning_rate": 9.968802484564168e-05, + "loss": 0.5329, + "step": 17650 + }, + { + "epoch": 0.09132137384493941, + "grad_norm": 20412.287109375, + "learning_rate": 9.968393719175286e-05, + "loss": 0.534, + "step": 17700 + }, + { + "epoch": 0.09157934382755223, + "grad_norm": 20080.16015625, + "learning_rate": 9.967982301754044e-05, + "loss": 0.5307, + "step": 17750 + }, + { + "epoch": 0.09183731381016505, + "grad_norm": 18570.314453125, + "learning_rate": 9.96756823252005e-05, + "loss": 0.526, + "step": 17800 + }, + { + "epoch": 0.09209528379277787, + "grad_norm": 18329.107421875, + "learning_rate": 9.967151511694324e-05, + "loss": 0.5273, + "step": 17850 + }, + { + "epoch": 0.09235325377539069, + "grad_norm": 19036.18359375, + "learning_rate": 9.966732139499304e-05, + "loss": 0.5275, + "step": 17900 + }, + { + "epoch": 0.09261122375800351, + "grad_norm": 18708.826171875, + "learning_rate": 9.966310116158844e-05, + "loss": 0.5313, + "step": 17950 + }, + { + "epoch": 0.09286919374061635, + "grad_norm": 18660.791015625, + "learning_rate": 9.96588544189821e-05, + "loss": 0.5303, + "step": 18000 + }, + { + "epoch": 0.09312716372322917, + "grad_norm": 19709.181640625, + "learning_rate": 9.965458116944086e-05, + "loss": 0.5347, + "step": 18050 + }, + { + "epoch": 0.09338513370584199, + "grad_norm": 19683.798828125, + "learning_rate": 9.96502814152457e-05, + "loss": 0.5359, + "step": 18100 + }, + { + "epoch": 0.09364310368845481, + "grad_norm": 19533.09765625, + "learning_rate": 9.964595515869175e-05, + "loss": 0.5263, + "step": 18150 + }, + { + "epoch": 0.09390107367106763, + "grad_norm": 20254.892578125, + "learning_rate": 9.964160240208826e-05, + "loss": 0.5307, + "step": 18200 + }, + { + "epoch": 0.09415904365368045, + "grad_norm": 21316.876953125, + "learning_rate": 9.963722314775868e-05, + "loss": 0.5316, + "step": 18250 + }, + { + "epoch": 0.09441701363629328, + "grad_norm": 20027.03515625, + "learning_rate": 9.963281739804054e-05, + "loss": 0.5274, + "step": 18300 + }, + { + "epoch": 0.0946749836189061, + "grad_norm": 18551.994140625, + "learning_rate": 9.962838515528554e-05, + "loss": 0.5339, + "step": 18350 + }, + { + "epoch": 0.09493295360151893, + "grad_norm": 17779.97265625, + "learning_rate": 9.962392642185956e-05, + "loss": 0.5301, + "step": 18400 + }, + { + "epoch": 0.09519092358413175, + "grad_norm": 20620.232421875, + "learning_rate": 9.961944120014256e-05, + "loss": 0.522, + "step": 18450 + }, + { + "epoch": 0.09544889356674457, + "grad_norm": 18669.73046875, + "learning_rate": 9.961492949252868e-05, + "loss": 0.5261, + "step": 18500 + }, + { + "epoch": 0.0957068635493574, + "grad_norm": 19528.4765625, + "learning_rate": 9.961039130142617e-05, + "loss": 0.5276, + "step": 18550 + }, + { + "epoch": 0.09596483353197022, + "grad_norm": 19643.099609375, + "learning_rate": 9.960582662925744e-05, + "loss": 0.5332, + "step": 18600 + }, + { + "epoch": 0.09622280351458304, + "grad_norm": 19024.4375, + "learning_rate": 9.960123547845901e-05, + "loss": 0.529, + "step": 18650 + }, + { + "epoch": 0.09648077349719586, + "grad_norm": 20228.248046875, + "learning_rate": 9.959661785148155e-05, + "loss": 0.5322, + "step": 18700 + }, + { + "epoch": 0.0967387434798087, + "grad_norm": 20120.126953125, + "learning_rate": 9.959197375078986e-05, + "loss": 0.5256, + "step": 18750 + }, + { + "epoch": 0.09699671346242152, + "grad_norm": 19894.423828125, + "learning_rate": 9.95873031788629e-05, + "loss": 0.5257, + "step": 18800 + }, + { + "epoch": 0.09725468344503434, + "grad_norm": 18450.8671875, + "learning_rate": 9.958260613819367e-05, + "loss": 0.5268, + "step": 18850 + }, + { + "epoch": 0.09751265342764716, + "grad_norm": 22775.53125, + "learning_rate": 9.95778826312894e-05, + "loss": 0.5293, + "step": 18900 + }, + { + "epoch": 0.09777062341025998, + "grad_norm": 17769.38671875, + "learning_rate": 9.95731326606714e-05, + "loss": 0.5281, + "step": 18950 + }, + { + "epoch": 0.0980285933928728, + "grad_norm": 20731.322265625, + "learning_rate": 9.956835622887514e-05, + "loss": 0.5327, + "step": 19000 + }, + { + "epoch": 0.09828656337548562, + "grad_norm": 20059.11328125, + "learning_rate": 9.956355333845014e-05, + "loss": 0.5279, + "step": 19050 + }, + { + "epoch": 0.09854453335809846, + "grad_norm": 17477.626953125, + "learning_rate": 9.955872399196012e-05, + "loss": 0.5257, + "step": 19100 + }, + { + "epoch": 0.09880250334071128, + "grad_norm": 20293.232421875, + "learning_rate": 9.955386819198287e-05, + "loss": 0.5258, + "step": 19150 + }, + { + "epoch": 0.0990604733233241, + "grad_norm": 19330.4140625, + "learning_rate": 9.954898594111035e-05, + "loss": 0.5231, + "step": 19200 + }, + { + "epoch": 0.09931844330593692, + "grad_norm": 19410.818359375, + "learning_rate": 9.954407724194858e-05, + "loss": 0.5286, + "step": 19250 + }, + { + "epoch": 0.09957641328854974, + "grad_norm": 18320.552734375, + "learning_rate": 9.953914209711775e-05, + "loss": 0.5287, + "step": 19300 + }, + { + "epoch": 0.09983438327116256, + "grad_norm": 17585.583984375, + "learning_rate": 9.953418050925213e-05, + "loss": 0.5265, + "step": 19350 + }, + { + "epoch": 0.10009235325377538, + "grad_norm": 20318.298828125, + "learning_rate": 9.952919248100012e-05, + "loss": 0.5292, + "step": 19400 + }, + { + "epoch": 0.10035032323638822, + "grad_norm": 20239.33984375, + "learning_rate": 9.952417801502426e-05, + "loss": 0.522, + "step": 19450 + }, + { + "epoch": 0.10060829321900104, + "grad_norm": 18922.158203125, + "learning_rate": 9.951913711400115e-05, + "loss": 0.5275, + "step": 19500 + }, + { + "epoch": 0.10086626320161386, + "grad_norm": 18332.673828125, + "learning_rate": 9.951406978062153e-05, + "loss": 0.5282, + "step": 19550 + }, + { + "epoch": 0.10112423318422668, + "grad_norm": 19321.662109375, + "learning_rate": 9.950897601759024e-05, + "loss": 0.5236, + "step": 19600 + }, + { + "epoch": 0.1013822031668395, + "grad_norm": 19050.42578125, + "learning_rate": 9.950385582762624e-05, + "loss": 0.5269, + "step": 19650 + }, + { + "epoch": 0.10164017314945233, + "grad_norm": 18592.8125, + "learning_rate": 9.949870921346259e-05, + "loss": 0.5294, + "step": 19700 + }, + { + "epoch": 0.10189814313206515, + "grad_norm": 17702.080078125, + "learning_rate": 9.949353617784644e-05, + "loss": 0.5321, + "step": 19750 + }, + { + "epoch": 0.10215611311467798, + "grad_norm": 18935.71875, + "learning_rate": 9.948833672353907e-05, + "loss": 0.5279, + "step": 19800 + }, + { + "epoch": 0.1024140830972908, + "grad_norm": 19814.96484375, + "learning_rate": 9.948311085331585e-05, + "loss": 0.5174, + "step": 19850 + }, + { + "epoch": 0.10267205307990362, + "grad_norm": 18945.4375, + "learning_rate": 9.947785856996623e-05, + "loss": 0.525, + "step": 19900 + }, + { + "epoch": 0.10293002306251645, + "grad_norm": 19162.28125, + "learning_rate": 9.947257987629379e-05, + "loss": 0.5268, + "step": 19950 + }, + { + "epoch": 0.10318799304512927, + "grad_norm": 18814.861328125, + "learning_rate": 9.94672747751162e-05, + "loss": 0.5191, + "step": 20000 + }, + { + "epoch": 0.10318799304512927, + "eval_loss": 0.5160176157951355, + "eval_runtime": 3272.5369, + "eval_samples_per_second": 947.62, + "eval_steps_per_second": 1.851, + "step": 20000 + }, + { + "epoch": 0.10344596302774209, + "grad_norm": 19089.77734375, + "learning_rate": 9.94619432692652e-05, + "loss": 0.5254, + "step": 20050 + }, + { + "epoch": 0.10370393301035491, + "grad_norm": 19005.53125, + "learning_rate": 9.945658536158667e-05, + "loss": 0.525, + "step": 20100 + }, + { + "epoch": 0.10396190299296774, + "grad_norm": 20896.8125, + "learning_rate": 9.945120105494054e-05, + "loss": 0.5173, + "step": 20150 + }, + { + "epoch": 0.10421987297558057, + "grad_norm": 19254.22265625, + "learning_rate": 9.944579035220085e-05, + "loss": 0.5195, + "step": 20200 + }, + { + "epoch": 0.10447784295819339, + "grad_norm": 19317.572265625, + "learning_rate": 9.944035325625573e-05, + "loss": 0.5239, + "step": 20250 + }, + { + "epoch": 0.10473581294080621, + "grad_norm": 18661.330078125, + "learning_rate": 9.94348897700074e-05, + "loss": 0.5243, + "step": 20300 + }, + { + "epoch": 0.10499378292341903, + "grad_norm": 18914.298828125, + "learning_rate": 9.942939989637216e-05, + "loss": 0.5247, + "step": 20350 + }, + { + "epoch": 0.10525175290603185, + "grad_norm": 17788.77734375, + "learning_rate": 9.942388363828041e-05, + "loss": 0.5205, + "step": 20400 + }, + { + "epoch": 0.10550972288864467, + "grad_norm": 17314.578125, + "learning_rate": 9.941834099867659e-05, + "loss": 0.5182, + "step": 20450 + }, + { + "epoch": 0.10576769287125751, + "grad_norm": 18627.068359375, + "learning_rate": 9.941277198051931e-05, + "loss": 0.5208, + "step": 20500 + }, + { + "epoch": 0.10602566285387033, + "grad_norm": 18274.4609375, + "learning_rate": 9.940717658678113e-05, + "loss": 0.5244, + "step": 20550 + }, + { + "epoch": 0.10628363283648315, + "grad_norm": 18668.767578125, + "learning_rate": 9.940155482044884e-05, + "loss": 0.5237, + "step": 20600 + }, + { + "epoch": 0.10654160281909597, + "grad_norm": 17703.703125, + "learning_rate": 9.939590668452316e-05, + "loss": 0.5148, + "step": 20650 + }, + { + "epoch": 0.10679957280170879, + "grad_norm": 18372.7578125, + "learning_rate": 9.939023218201901e-05, + "loss": 0.522, + "step": 20700 + }, + { + "epoch": 0.10705754278432161, + "grad_norm": 18439.521484375, + "learning_rate": 9.93845313159653e-05, + "loss": 0.5177, + "step": 20750 + }, + { + "epoch": 0.10731551276693443, + "grad_norm": 18812.10546875, + "learning_rate": 9.937880408940504e-05, + "loss": 0.5161, + "step": 20800 + }, + { + "epoch": 0.10757348274954727, + "grad_norm": 19163.4296875, + "learning_rate": 9.937305050539534e-05, + "loss": 0.5175, + "step": 20850 + }, + { + "epoch": 0.10783145273216009, + "grad_norm": 19459.3984375, + "learning_rate": 9.936727056700732e-05, + "loss": 0.5257, + "step": 20900 + }, + { + "epoch": 0.10808942271477291, + "grad_norm": 20272.22265625, + "learning_rate": 9.93614642773262e-05, + "loss": 0.5244, + "step": 20950 + }, + { + "epoch": 0.10834739269738573, + "grad_norm": 19995.736328125, + "learning_rate": 9.93556316394513e-05, + "loss": 0.5179, + "step": 21000 + }, + { + "epoch": 0.10860536267999855, + "grad_norm": 20567.369140625, + "learning_rate": 9.934977265649594e-05, + "loss": 0.528, + "step": 21050 + }, + { + "epoch": 0.10886333266261138, + "grad_norm": 19328.57421875, + "learning_rate": 9.934388733158753e-05, + "loss": 0.5249, + "step": 21100 + }, + { + "epoch": 0.1091213026452242, + "grad_norm": 17305.19921875, + "learning_rate": 9.933797566786757e-05, + "loss": 0.5163, + "step": 21150 + }, + { + "epoch": 0.10937927262783702, + "grad_norm": 19983.99609375, + "learning_rate": 9.933203766849155e-05, + "loss": 0.5227, + "step": 21200 + }, + { + "epoch": 0.10963724261044985, + "grad_norm": 18918.16015625, + "learning_rate": 9.93260733366291e-05, + "loss": 0.521, + "step": 21250 + }, + { + "epoch": 0.10989521259306267, + "grad_norm": 19260.40625, + "learning_rate": 9.932008267546384e-05, + "loss": 0.5195, + "step": 21300 + }, + { + "epoch": 0.1101531825756755, + "grad_norm": 16713.015625, + "learning_rate": 9.931406568819348e-05, + "loss": 0.5187, + "step": 21350 + }, + { + "epoch": 0.11041115255828832, + "grad_norm": 19787.67578125, + "learning_rate": 9.930802237802976e-05, + "loss": 0.5152, + "step": 21400 + }, + { + "epoch": 0.11066912254090114, + "grad_norm": 20632.775390625, + "learning_rate": 9.93019527481985e-05, + "loss": 0.5158, + "step": 21450 + }, + { + "epoch": 0.11092709252351396, + "grad_norm": 18545.748046875, + "learning_rate": 9.929585680193951e-05, + "loss": 0.5161, + "step": 21500 + }, + { + "epoch": 0.11118506250612678, + "grad_norm": 18961.138671875, + "learning_rate": 9.928973454250674e-05, + "loss": 0.5192, + "step": 21550 + }, + { + "epoch": 0.11144303248873962, + "grad_norm": 18970.013671875, + "learning_rate": 9.928358597316812e-05, + "loss": 0.5211, + "step": 21600 + }, + { + "epoch": 0.11170100247135244, + "grad_norm": 20800.046875, + "learning_rate": 9.927741109720561e-05, + "loss": 0.5143, + "step": 21650 + }, + { + "epoch": 0.11195897245396526, + "grad_norm": 18738.564453125, + "learning_rate": 9.927120991791528e-05, + "loss": 0.5232, + "step": 21700 + }, + { + "epoch": 0.11221694243657808, + "grad_norm": 18495.798828125, + "learning_rate": 9.926498243860715e-05, + "loss": 0.5176, + "step": 21750 + }, + { + "epoch": 0.1124749124191909, + "grad_norm": 18129.375, + "learning_rate": 9.925872866260537e-05, + "loss": 0.5132, + "step": 21800 + }, + { + "epoch": 0.11273288240180372, + "grad_norm": 19332.751953125, + "learning_rate": 9.925244859324807e-05, + "loss": 0.5135, + "step": 21850 + }, + { + "epoch": 0.11299085238441654, + "grad_norm": 19395.544921875, + "learning_rate": 9.924614223388742e-05, + "loss": 0.5191, + "step": 21900 + }, + { + "epoch": 0.11324882236702938, + "grad_norm": 20292.890625, + "learning_rate": 9.923980958788964e-05, + "loss": 0.5212, + "step": 21950 + }, + { + "epoch": 0.1135067923496422, + "grad_norm": 20309.033203125, + "learning_rate": 9.923345065863498e-05, + "loss": 0.5134, + "step": 22000 + }, + { + "epoch": 0.11376476233225502, + "grad_norm": 17513.578125, + "learning_rate": 9.922706544951772e-05, + "loss": 0.5216, + "step": 22050 + }, + { + "epoch": 0.11402273231486784, + "grad_norm": 18886.10546875, + "learning_rate": 9.922065396394614e-05, + "loss": 0.5219, + "step": 22100 + }, + { + "epoch": 0.11428070229748066, + "grad_norm": 19656.1484375, + "learning_rate": 9.921421620534257e-05, + "loss": 0.5163, + "step": 22150 + }, + { + "epoch": 0.11453867228009348, + "grad_norm": 18463.068359375, + "learning_rate": 9.920775217714338e-05, + "loss": 0.5198, + "step": 22200 + }, + { + "epoch": 0.1147966422627063, + "grad_norm": 20666.400390625, + "learning_rate": 9.920126188279892e-05, + "loss": 0.5164, + "step": 22250 + }, + { + "epoch": 0.11505461224531914, + "grad_norm": 20401.681640625, + "learning_rate": 9.919474532577359e-05, + "loss": 0.5163, + "step": 22300 + }, + { + "epoch": 0.11531258222793196, + "grad_norm": 21289.541015625, + "learning_rate": 9.918820250954581e-05, + "loss": 0.5114, + "step": 22350 + }, + { + "epoch": 0.11557055221054478, + "grad_norm": 17559.50390625, + "learning_rate": 9.918163343760801e-05, + "loss": 0.5156, + "step": 22400 + }, + { + "epoch": 0.1158285221931576, + "grad_norm": 17041.087890625, + "learning_rate": 9.917503811346662e-05, + "loss": 0.5146, + "step": 22450 + }, + { + "epoch": 0.11608649217577043, + "grad_norm": 20508.087890625, + "learning_rate": 9.916841654064212e-05, + "loss": 0.5202, + "step": 22500 + }, + { + "epoch": 0.11634446215838325, + "grad_norm": 21307.646484375, + "learning_rate": 9.916176872266894e-05, + "loss": 0.5108, + "step": 22550 + }, + { + "epoch": 0.11660243214099607, + "grad_norm": 21765.580078125, + "learning_rate": 9.91550946630956e-05, + "loss": 0.5158, + "step": 22600 + }, + { + "epoch": 0.1168604021236089, + "grad_norm": 18173.646484375, + "learning_rate": 9.914839436548454e-05, + "loss": 0.5081, + "step": 22650 + }, + { + "epoch": 0.11711837210622172, + "grad_norm": 19044.880859375, + "learning_rate": 9.914166783341227e-05, + "loss": 0.5144, + "step": 22700 + }, + { + "epoch": 0.11737634208883455, + "grad_norm": 19291.37109375, + "learning_rate": 9.91349150704693e-05, + "loss": 0.5147, + "step": 22750 + }, + { + "epoch": 0.11763431207144737, + "grad_norm": 16757.376953125, + "learning_rate": 9.91281360802601e-05, + "loss": 0.5163, + "step": 22800 + }, + { + "epoch": 0.11789228205406019, + "grad_norm": 18870.287109375, + "learning_rate": 9.912133086640318e-05, + "loss": 0.512, + "step": 22850 + }, + { + "epoch": 0.11815025203667301, + "grad_norm": 20520.115234375, + "learning_rate": 9.911449943253102e-05, + "loss": 0.5175, + "step": 22900 + }, + { + "epoch": 0.11840822201928583, + "grad_norm": 20585.21484375, + "learning_rate": 9.910764178229011e-05, + "loss": 0.5114, + "step": 22950 + }, + { + "epoch": 0.11866619200189867, + "grad_norm": 18660.384765625, + "learning_rate": 9.910075791934092e-05, + "loss": 0.5115, + "step": 23000 + }, + { + "epoch": 0.11892416198451149, + "grad_norm": 19391.318359375, + "learning_rate": 9.909384784735794e-05, + "loss": 0.5198, + "step": 23050 + }, + { + "epoch": 0.11918213196712431, + "grad_norm": 18007.306640625, + "learning_rate": 9.908691157002962e-05, + "loss": 0.5125, + "step": 23100 + }, + { + "epoch": 0.11944010194973713, + "grad_norm": 20804.501953125, + "learning_rate": 9.907994909105842e-05, + "loss": 0.516, + "step": 23150 + }, + { + "epoch": 0.11969807193234995, + "grad_norm": 18307.63671875, + "learning_rate": 9.907296041416076e-05, + "loss": 0.5108, + "step": 23200 + }, + { + "epoch": 0.11995604191496277, + "grad_norm": 19694.552734375, + "learning_rate": 9.906594554306709e-05, + "loss": 0.5092, + "step": 23250 + }, + { + "epoch": 0.12021401189757559, + "grad_norm": 20234.0703125, + "learning_rate": 9.90589044815218e-05, + "loss": 0.515, + "step": 23300 + }, + { + "epoch": 0.12047198188018843, + "grad_norm": 18483.4296875, + "learning_rate": 9.905183723328327e-05, + "loss": 0.5127, + "step": 23350 + }, + { + "epoch": 0.12072995186280125, + "grad_norm": 17447.51953125, + "learning_rate": 9.904474380212384e-05, + "loss": 0.5107, + "step": 23400 + }, + { + "epoch": 0.12098792184541407, + "grad_norm": 18881.7109375, + "learning_rate": 9.903762419182986e-05, + "loss": 0.5177, + "step": 23450 + }, + { + "epoch": 0.12124589182802689, + "grad_norm": 17861.990234375, + "learning_rate": 9.903047840620168e-05, + "loss": 0.5128, + "step": 23500 + }, + { + "epoch": 0.12150386181063971, + "grad_norm": 19111.53515625, + "learning_rate": 9.902330644905351e-05, + "loss": 0.5134, + "step": 23550 + }, + { + "epoch": 0.12176183179325253, + "grad_norm": 18461.107421875, + "learning_rate": 9.901610832421366e-05, + "loss": 0.51, + "step": 23600 + }, + { + "epoch": 0.12201980177586536, + "grad_norm": 18103.701171875, + "learning_rate": 9.900888403552431e-05, + "loss": 0.5131, + "step": 23650 + }, + { + "epoch": 0.12227777175847819, + "grad_norm": 18334.755859375, + "learning_rate": 9.900163358684168e-05, + "loss": 0.511, + "step": 23700 + }, + { + "epoch": 0.12253574174109101, + "grad_norm": 17476.322265625, + "learning_rate": 9.89943569820359e-05, + "loss": 0.5151, + "step": 23750 + }, + { + "epoch": 0.12279371172370383, + "grad_norm": 18698.09765625, + "learning_rate": 9.898705422499107e-05, + "loss": 0.5146, + "step": 23800 + }, + { + "epoch": 0.12305168170631665, + "grad_norm": 18321.80859375, + "learning_rate": 9.897972531960528e-05, + "loss": 0.5109, + "step": 23850 + }, + { + "epoch": 0.12330965168892948, + "grad_norm": 18234.361328125, + "learning_rate": 9.897237026979056e-05, + "loss": 0.5115, + "step": 23900 + }, + { + "epoch": 0.1235676216715423, + "grad_norm": 19737.849609375, + "learning_rate": 9.896498907947287e-05, + "loss": 0.5155, + "step": 23950 + }, + { + "epoch": 0.12382559165415512, + "grad_norm": 19136.279296875, + "learning_rate": 9.895758175259218e-05, + "loss": 0.5162, + "step": 24000 + }, + { + "epoch": 0.12408356163676794, + "grad_norm": 18575.431640625, + "learning_rate": 9.895014829310235e-05, + "loss": 0.5141, + "step": 24050 + }, + { + "epoch": 0.12434153161938077, + "grad_norm": 17589.353515625, + "learning_rate": 9.894268870497121e-05, + "loss": 0.501, + "step": 24100 + }, + { + "epoch": 0.1245995016019936, + "grad_norm": 19781.830078125, + "learning_rate": 9.893520299218057e-05, + "loss": 0.5128, + "step": 24150 + }, + { + "epoch": 0.12485747158460642, + "grad_norm": 17501.150390625, + "learning_rate": 9.892769115872617e-05, + "loss": 0.5113, + "step": 24200 + }, + { + "epoch": 0.12511544156721924, + "grad_norm": 21107.34375, + "learning_rate": 9.892015320861762e-05, + "loss": 0.5041, + "step": 24250 + }, + { + "epoch": 0.12537341154983206, + "grad_norm": 17529.345703125, + "learning_rate": 9.89125891458786e-05, + "loss": 0.5093, + "step": 24300 + }, + { + "epoch": 0.12563138153244488, + "grad_norm": 18061.890625, + "learning_rate": 9.890499897454663e-05, + "loss": 0.5111, + "step": 24350 + }, + { + "epoch": 0.1258893515150577, + "grad_norm": 21213.177734375, + "learning_rate": 9.889738269867318e-05, + "loss": 0.5106, + "step": 24400 + }, + { + "epoch": 0.12614732149767052, + "grad_norm": 17838.625, + "learning_rate": 9.88897403223237e-05, + "loss": 0.5144, + "step": 24450 + }, + { + "epoch": 0.12640529148028334, + "grad_norm": 19047.787109375, + "learning_rate": 9.888207184957752e-05, + "loss": 0.5133, + "step": 24500 + }, + { + "epoch": 0.12666326146289617, + "grad_norm": 17355.26171875, + "learning_rate": 9.887437728452794e-05, + "loss": 0.5054, + "step": 24550 + }, + { + "epoch": 0.12692123144550901, + "grad_norm": 20496.369140625, + "learning_rate": 9.886665663128216e-05, + "loss": 0.51, + "step": 24600 + }, + { + "epoch": 0.12717920142812184, + "grad_norm": 19887.734375, + "learning_rate": 9.885890989396133e-05, + "loss": 0.5049, + "step": 24650 + }, + { + "epoch": 0.12743717141073466, + "grad_norm": 20027.69140625, + "learning_rate": 9.885113707670049e-05, + "loss": 0.5118, + "step": 24700 + }, + { + "epoch": 0.12769514139334748, + "grad_norm": 18888.92578125, + "learning_rate": 9.884333818364861e-05, + "loss": 0.5168, + "step": 24750 + }, + { + "epoch": 0.1279531113759603, + "grad_norm": 20906.673828125, + "learning_rate": 9.883551321896862e-05, + "loss": 0.5109, + "step": 24800 + }, + { + "epoch": 0.12821108135857312, + "grad_norm": 20228.833984375, + "learning_rate": 9.882766218683731e-05, + "loss": 0.5167, + "step": 24850 + }, + { + "epoch": 0.12846905134118594, + "grad_norm": 19832.4609375, + "learning_rate": 9.881978509144543e-05, + "loss": 0.5113, + "step": 24900 + }, + { + "epoch": 0.12872702132379876, + "grad_norm": 18049.193359375, + "learning_rate": 9.881188193699758e-05, + "loss": 0.5121, + "step": 24950 + }, + { + "epoch": 0.12898499130641158, + "grad_norm": 18765.033203125, + "learning_rate": 9.880395272771236e-05, + "loss": 0.5123, + "step": 25000 + }, + { + "epoch": 0.12898499130641158, + "eval_loss": 0.5013377666473389, + "eval_runtime": 3332.4061, + "eval_samples_per_second": 930.595, + "eval_steps_per_second": 1.818, + "step": 25000 + }, + { + "epoch": 0.1292429612890244, + "grad_norm": 18435.787109375, + "learning_rate": 9.879599746782221e-05, + "loss": 0.5096, + "step": 25050 + }, + { + "epoch": 0.12950093127163723, + "grad_norm": 18993.890625, + "learning_rate": 9.878801616157348e-05, + "loss": 0.5091, + "step": 25100 + }, + { + "epoch": 0.12975890125425005, + "grad_norm": 19766.783203125, + "learning_rate": 9.878000881322646e-05, + "loss": 0.5059, + "step": 25150 + }, + { + "epoch": 0.13001687123686287, + "grad_norm": 19316.537109375, + "learning_rate": 9.87719754270553e-05, + "loss": 0.5112, + "step": 25200 + }, + { + "epoch": 0.1302748412194757, + "grad_norm": 19288.64453125, + "learning_rate": 9.876391600734807e-05, + "loss": 0.5031, + "step": 25250 + }, + { + "epoch": 0.1305328112020885, + "grad_norm": 18962.7734375, + "learning_rate": 9.875583055840673e-05, + "loss": 0.5113, + "step": 25300 + }, + { + "epoch": 0.13079078118470136, + "grad_norm": 19399.21875, + "learning_rate": 9.874771908454714e-05, + "loss": 0.5177, + "step": 25350 + }, + { + "epoch": 0.13104875116731418, + "grad_norm": 20511.134765625, + "learning_rate": 9.873958159009904e-05, + "loss": 0.5049, + "step": 25400 + }, + { + "epoch": 0.131306721149927, + "grad_norm": 17669.00390625, + "learning_rate": 9.87314180794061e-05, + "loss": 0.5076, + "step": 25450 + }, + { + "epoch": 0.13156469113253982, + "grad_norm": 20254.75390625, + "learning_rate": 9.872322855682579e-05, + "loss": 0.5102, + "step": 25500 + }, + { + "epoch": 0.13182266111515265, + "grad_norm": 21859.880859375, + "learning_rate": 9.871501302672956e-05, + "loss": 0.5098, + "step": 25550 + }, + { + "epoch": 0.13208063109776547, + "grad_norm": 18794.90625, + "learning_rate": 9.870677149350268e-05, + "loss": 0.5078, + "step": 25600 + }, + { + "epoch": 0.1323386010803783, + "grad_norm": 19909.65625, + "learning_rate": 9.869850396154434e-05, + "loss": 0.5129, + "step": 25650 + }, + { + "epoch": 0.1325965710629911, + "grad_norm": 17887.99609375, + "learning_rate": 9.869021043526756e-05, + "loss": 0.508, + "step": 25700 + }, + { + "epoch": 0.13285454104560393, + "grad_norm": 17189.033203125, + "learning_rate": 9.868189091909929e-05, + "loss": 0.5114, + "step": 25750 + }, + { + "epoch": 0.13311251102821675, + "grad_norm": 21320.78125, + "learning_rate": 9.867354541748033e-05, + "loss": 0.5081, + "step": 25800 + }, + { + "epoch": 0.13337048101082957, + "grad_norm": 19035.33984375, + "learning_rate": 9.866517393486532e-05, + "loss": 0.5065, + "step": 25850 + }, + { + "epoch": 0.1336284509934424, + "grad_norm": 19038.876953125, + "learning_rate": 9.86567764757228e-05, + "loss": 0.5055, + "step": 25900 + }, + { + "epoch": 0.13388642097605521, + "grad_norm": 20425.6875, + "learning_rate": 9.86483530445352e-05, + "loss": 0.5091, + "step": 25950 + }, + { + "epoch": 0.13414439095866804, + "grad_norm": 19947.34765625, + "learning_rate": 9.863990364579876e-05, + "loss": 0.5062, + "step": 26000 + }, + { + "epoch": 0.13440236094128089, + "grad_norm": 18758.7890625, + "learning_rate": 9.863142828402361e-05, + "loss": 0.5099, + "step": 26050 + }, + { + "epoch": 0.1346603309238937, + "grad_norm": 18494.076171875, + "learning_rate": 9.862292696373372e-05, + "loss": 0.5043, + "step": 26100 + }, + { + "epoch": 0.13491830090650653, + "grad_norm": 19646.841796875, + "learning_rate": 9.861439968946696e-05, + "loss": 0.508, + "step": 26150 + }, + { + "epoch": 0.13517627088911935, + "grad_norm": 19356.009765625, + "learning_rate": 9.8605846465775e-05, + "loss": 0.5015, + "step": 26200 + }, + { + "epoch": 0.13543424087173217, + "grad_norm": 19243.1875, + "learning_rate": 9.859726729722341e-05, + "loss": 0.5086, + "step": 26250 + }, + { + "epoch": 0.135692210854345, + "grad_norm": 20116.43359375, + "learning_rate": 9.858866218839156e-05, + "loss": 0.5074, + "step": 26300 + }, + { + "epoch": 0.1359501808369578, + "grad_norm": 18592.1015625, + "learning_rate": 9.858003114387269e-05, + "loss": 0.5054, + "step": 26350 + }, + { + "epoch": 0.13620815081957063, + "grad_norm": 19552.505859375, + "learning_rate": 9.85713741682739e-05, + "loss": 0.5042, + "step": 26400 + }, + { + "epoch": 0.13646612080218345, + "grad_norm": 18818.142578125, + "learning_rate": 9.856269126621611e-05, + "loss": 0.5106, + "step": 26450 + }, + { + "epoch": 0.13672409078479628, + "grad_norm": 21973.685546875, + "learning_rate": 9.855398244233407e-05, + "loss": 0.5116, + "step": 26500 + }, + { + "epoch": 0.1369820607674091, + "grad_norm": 19296.7890625, + "learning_rate": 9.854524770127641e-05, + "loss": 0.5103, + "step": 26550 + }, + { + "epoch": 0.13724003075002192, + "grad_norm": 18975.22265625, + "learning_rate": 9.853648704770554e-05, + "loss": 0.5093, + "step": 26600 + }, + { + "epoch": 0.13749800073263474, + "grad_norm": 20003.19140625, + "learning_rate": 9.852770048629776e-05, + "loss": 0.5094, + "step": 26650 + }, + { + "epoch": 0.13775597071524756, + "grad_norm": 19885.341796875, + "learning_rate": 9.851888802174312e-05, + "loss": 0.502, + "step": 26700 + }, + { + "epoch": 0.1380139406978604, + "grad_norm": 18030.115234375, + "learning_rate": 9.851004965874557e-05, + "loss": 0.5045, + "step": 26750 + }, + { + "epoch": 0.13827191068047323, + "grad_norm": 19143.369140625, + "learning_rate": 9.850118540202286e-05, + "loss": 0.5068, + "step": 26800 + }, + { + "epoch": 0.13852988066308605, + "grad_norm": 18902.5390625, + "learning_rate": 9.849229525630656e-05, + "loss": 0.4984, + "step": 26850 + }, + { + "epoch": 0.13878785064569887, + "grad_norm": 18523.115234375, + "learning_rate": 9.848337922634206e-05, + "loss": 0.5099, + "step": 26900 + }, + { + "epoch": 0.1390458206283117, + "grad_norm": 19873.283203125, + "learning_rate": 9.847443731688852e-05, + "loss": 0.5039, + "step": 26950 + }, + { + "epoch": 0.13930379061092452, + "grad_norm": 20202.23046875, + "learning_rate": 9.846546953271902e-05, + "loss": 0.507, + "step": 27000 + }, + { + "epoch": 0.13956176059353734, + "grad_norm": 17484.572265625, + "learning_rate": 9.845647587862034e-05, + "loss": 0.5113, + "step": 27050 + }, + { + "epoch": 0.13981973057615016, + "grad_norm": 17931.634765625, + "learning_rate": 9.844745635939316e-05, + "loss": 0.5051, + "step": 27100 + }, + { + "epoch": 0.14007770055876298, + "grad_norm": 20536.693359375, + "learning_rate": 9.843841097985191e-05, + "loss": 0.5044, + "step": 27150 + }, + { + "epoch": 0.1403356705413758, + "grad_norm": 18379.619140625, + "learning_rate": 9.842933974482482e-05, + "loss": 0.5071, + "step": 27200 + }, + { + "epoch": 0.14059364052398862, + "grad_norm": 19097.240234375, + "learning_rate": 9.842024265915397e-05, + "loss": 0.5046, + "step": 27250 + }, + { + "epoch": 0.14085161050660144, + "grad_norm": 22569.80859375, + "learning_rate": 9.841111972769517e-05, + "loss": 0.5022, + "step": 27300 + }, + { + "epoch": 0.14110958048921426, + "grad_norm": 17499.166015625, + "learning_rate": 9.84019709553181e-05, + "loss": 0.5014, + "step": 27350 + }, + { + "epoch": 0.14136755047182709, + "grad_norm": 20447.25, + "learning_rate": 9.839279634690619e-05, + "loss": 0.5065, + "step": 27400 + }, + { + "epoch": 0.14162552045443993, + "grad_norm": 20977.70703125, + "learning_rate": 9.838359590735665e-05, + "loss": 0.5042, + "step": 27450 + }, + { + "epoch": 0.14188349043705276, + "grad_norm": 18168.962890625, + "learning_rate": 9.83743696415805e-05, + "loss": 0.5043, + "step": 27500 + }, + { + "epoch": 0.14214146041966558, + "grad_norm": 18671.841796875, + "learning_rate": 9.836511755450256e-05, + "loss": 0.5054, + "step": 27550 + }, + { + "epoch": 0.1423994304022784, + "grad_norm": 17737.90625, + "learning_rate": 9.835583965106141e-05, + "loss": 0.507, + "step": 27600 + }, + { + "epoch": 0.14265740038489122, + "grad_norm": 23218.873046875, + "learning_rate": 9.834653593620939e-05, + "loss": 0.5055, + "step": 27650 + }, + { + "epoch": 0.14291537036750404, + "grad_norm": 20013.341796875, + "learning_rate": 9.833720641491269e-05, + "loss": 0.5008, + "step": 27700 + }, + { + "epoch": 0.14317334035011686, + "grad_norm": 21755.08203125, + "learning_rate": 9.832785109215119e-05, + "loss": 0.5029, + "step": 27750 + }, + { + "epoch": 0.14343131033272968, + "grad_norm": 18450.541015625, + "learning_rate": 9.831846997291859e-05, + "loss": 0.5086, + "step": 27800 + }, + { + "epoch": 0.1436892803153425, + "grad_norm": 17578.990234375, + "learning_rate": 9.830906306222235e-05, + "loss": 0.498, + "step": 27850 + }, + { + "epoch": 0.14394725029795533, + "grad_norm": 18771.2578125, + "learning_rate": 9.82996303650837e-05, + "loss": 0.5006, + "step": 27900 + }, + { + "epoch": 0.14420522028056815, + "grad_norm": 19841.912109375, + "learning_rate": 9.829017188653763e-05, + "loss": 0.5003, + "step": 27950 + }, + { + "epoch": 0.14446319026318097, + "grad_norm": 19089.384765625, + "learning_rate": 9.82806876316329e-05, + "loss": 0.5028, + "step": 28000 + }, + { + "epoch": 0.1447211602457938, + "grad_norm": 17971.998046875, + "learning_rate": 9.827117760543198e-05, + "loss": 0.5103, + "step": 28050 + }, + { + "epoch": 0.1449791302284066, + "grad_norm": 19590.46875, + "learning_rate": 9.826164181301121e-05, + "loss": 0.5075, + "step": 28100 + }, + { + "epoch": 0.14523710021101943, + "grad_norm": 19316.150390625, + "learning_rate": 9.825208025946056e-05, + "loss": 0.4971, + "step": 28150 + }, + { + "epoch": 0.14549507019363228, + "grad_norm": 19814.3125, + "learning_rate": 9.82424929498838e-05, + "loss": 0.501, + "step": 28200 + }, + { + "epoch": 0.1457530401762451, + "grad_norm": 18669.203125, + "learning_rate": 9.823287988939847e-05, + "loss": 0.5027, + "step": 28250 + }, + { + "epoch": 0.14601101015885792, + "grad_norm": 20375.48828125, + "learning_rate": 9.822324108313585e-05, + "loss": 0.4966, + "step": 28300 + }, + { + "epoch": 0.14626898014147074, + "grad_norm": 19665.4296875, + "learning_rate": 9.82135765362409e-05, + "loss": 0.4966, + "step": 28350 + }, + { + "epoch": 0.14652695012408357, + "grad_norm": 19579.771484375, + "learning_rate": 9.820388625387242e-05, + "loss": 0.5028, + "step": 28400 + }, + { + "epoch": 0.1467849201066964, + "grad_norm": 20270.564453125, + "learning_rate": 9.819417024120285e-05, + "loss": 0.4972, + "step": 28450 + }, + { + "epoch": 0.1470428900893092, + "grad_norm": 20025.6328125, + "learning_rate": 9.818442850341845e-05, + "loss": 0.5082, + "step": 28500 + }, + { + "epoch": 0.14730086007192203, + "grad_norm": 19062.525390625, + "learning_rate": 9.817466104571915e-05, + "loss": 0.4983, + "step": 28550 + }, + { + "epoch": 0.14755883005453485, + "grad_norm": 18558.0390625, + "learning_rate": 9.816486787331862e-05, + "loss": 0.5004, + "step": 28600 + }, + { + "epoch": 0.14781680003714767, + "grad_norm": 20880.6875, + "learning_rate": 9.815504899144428e-05, + "loss": 0.5036, + "step": 28650 + }, + { + "epoch": 0.1480747700197605, + "grad_norm": 19120.3359375, + "learning_rate": 9.814520440533726e-05, + "loss": 0.5004, + "step": 28700 + }, + { + "epoch": 0.14833274000237331, + "grad_norm": 17185.451171875, + "learning_rate": 9.813533412025242e-05, + "loss": 0.5047, + "step": 28750 + }, + { + "epoch": 0.14859070998498614, + "grad_norm": 21795.697265625, + "learning_rate": 9.81254381414583e-05, + "loss": 0.5033, + "step": 28800 + }, + { + "epoch": 0.14884867996759896, + "grad_norm": 21923.44140625, + "learning_rate": 9.811551647423718e-05, + "loss": 0.4957, + "step": 28850 + }, + { + "epoch": 0.1491066499502118, + "grad_norm": 18988.30078125, + "learning_rate": 9.810556912388509e-05, + "loss": 0.4979, + "step": 28900 + }, + { + "epoch": 0.14936461993282463, + "grad_norm": 18479.74609375, + "learning_rate": 9.809559609571169e-05, + "loss": 0.5003, + "step": 28950 + }, + { + "epoch": 0.14962258991543745, + "grad_norm": 20426.57421875, + "learning_rate": 9.808559739504043e-05, + "loss": 0.5019, + "step": 29000 + }, + { + "epoch": 0.14988055989805027, + "grad_norm": 20044.365234375, + "learning_rate": 9.80755730272084e-05, + "loss": 0.5012, + "step": 29050 + }, + { + "epoch": 0.1501385298806631, + "grad_norm": 18321.439453125, + "learning_rate": 9.806552299756641e-05, + "loss": 0.4918, + "step": 29100 + }, + { + "epoch": 0.1503964998632759, + "grad_norm": 20315.681640625, + "learning_rate": 9.805544731147899e-05, + "loss": 0.5015, + "step": 29150 + }, + { + "epoch": 0.15065446984588873, + "grad_norm": 20399.990234375, + "learning_rate": 9.804534597432432e-05, + "loss": 0.4967, + "step": 29200 + }, + { + "epoch": 0.15091243982850155, + "grad_norm": 19539.91796875, + "learning_rate": 9.803521899149432e-05, + "loss": 0.5002, + "step": 29250 + }, + { + "epoch": 0.15117040981111438, + "grad_norm": 20317.970703125, + "learning_rate": 9.802506636839457e-05, + "loss": 0.4988, + "step": 29300 + }, + { + "epoch": 0.1514283797937272, + "grad_norm": 18728.32421875, + "learning_rate": 9.801488811044434e-05, + "loss": 0.5007, + "step": 29350 + }, + { + "epoch": 0.15168634977634002, + "grad_norm": 21256.51171875, + "learning_rate": 9.80046842230766e-05, + "loss": 0.5066, + "step": 29400 + }, + { + "epoch": 0.15194431975895284, + "grad_norm": 18871.8828125, + "learning_rate": 9.799445471173799e-05, + "loss": 0.502, + "step": 29450 + }, + { + "epoch": 0.15220228974156566, + "grad_norm": 18434.251953125, + "learning_rate": 9.798419958188878e-05, + "loss": 0.5018, + "step": 29500 + }, + { + "epoch": 0.15246025972417848, + "grad_norm": 18562.412109375, + "learning_rate": 9.7973918839003e-05, + "loss": 0.4978, + "step": 29550 + }, + { + "epoch": 0.15271822970679133, + "grad_norm": 20020.7890625, + "learning_rate": 9.796361248856832e-05, + "loss": 0.4989, + "step": 29600 + }, + { + "epoch": 0.15297619968940415, + "grad_norm": 20026.6015625, + "learning_rate": 9.795328053608606e-05, + "loss": 0.5002, + "step": 29650 + }, + { + "epoch": 0.15323416967201697, + "grad_norm": 20098.703125, + "learning_rate": 9.794292298707119e-05, + "loss": 0.4938, + "step": 29700 + }, + { + "epoch": 0.1534921396546298, + "grad_norm": 18960.154296875, + "learning_rate": 9.793253984705239e-05, + "loss": 0.4956, + "step": 29750 + }, + { + "epoch": 0.15375010963724262, + "grad_norm": 20478.669921875, + "learning_rate": 9.7922131121572e-05, + "loss": 0.4998, + "step": 29800 + }, + { + "epoch": 0.15400807961985544, + "grad_norm": 20406.701171875, + "learning_rate": 9.791169681618596e-05, + "loss": 0.5083, + "step": 29850 + }, + { + "epoch": 0.15426604960246826, + "grad_norm": 17598.75390625, + "learning_rate": 9.790123693646391e-05, + "loss": 0.4968, + "step": 29900 + }, + { + "epoch": 0.15452401958508108, + "grad_norm": 19622.521484375, + "learning_rate": 9.789075148798915e-05, + "loss": 0.4881, + "step": 29950 + }, + { + "epoch": 0.1547819895676939, + "grad_norm": 20092.87109375, + "learning_rate": 9.78802404763586e-05, + "loss": 0.4994, + "step": 30000 + }, + { + "epoch": 0.1547819895676939, + "eval_loss": 0.4904574453830719, + "eval_runtime": 3267.2095, + "eval_samples_per_second": 949.165, + "eval_steps_per_second": 1.854, + "step": 30000 + }, + { + "epoch": 0.15503995955030672, + "grad_norm": 19136.84765625, + "learning_rate": 9.786970390718282e-05, + "loss": 0.4963, + "step": 30050 + }, + { + "epoch": 0.15529792953291954, + "grad_norm": 20464.998046875, + "learning_rate": 9.785914178608603e-05, + "loss": 0.4994, + "step": 30100 + }, + { + "epoch": 0.15555589951553236, + "grad_norm": 23388.55859375, + "learning_rate": 9.784855411870611e-05, + "loss": 0.5036, + "step": 30150 + }, + { + "epoch": 0.15581386949814519, + "grad_norm": 20002.095703125, + "learning_rate": 9.783794091069451e-05, + "loss": 0.5021, + "step": 30200 + }, + { + "epoch": 0.156071839480758, + "grad_norm": 19565.419921875, + "learning_rate": 9.782730216771641e-05, + "loss": 0.4929, + "step": 30250 + }, + { + "epoch": 0.15632980946337086, + "grad_norm": 20284.173828125, + "learning_rate": 9.781663789545052e-05, + "loss": 0.4889, + "step": 30300 + }, + { + "epoch": 0.15658777944598368, + "grad_norm": 18613.439453125, + "learning_rate": 9.780594809958922e-05, + "loss": 0.496, + "step": 30350 + }, + { + "epoch": 0.1568457494285965, + "grad_norm": 19419.1640625, + "learning_rate": 9.779523278583855e-05, + "loss": 0.4977, + "step": 30400 + }, + { + "epoch": 0.15710371941120932, + "grad_norm": 21695.361328125, + "learning_rate": 9.778449195991813e-05, + "loss": 0.4998, + "step": 30450 + }, + { + "epoch": 0.15736168939382214, + "grad_norm": 21914.3828125, + "learning_rate": 9.777372562756117e-05, + "loss": 0.4936, + "step": 30500 + }, + { + "epoch": 0.15761965937643496, + "grad_norm": 22384.525390625, + "learning_rate": 9.776293379451458e-05, + "loss": 0.5034, + "step": 30550 + }, + { + "epoch": 0.15787762935904778, + "grad_norm": 21174.220703125, + "learning_rate": 9.775211646653879e-05, + "loss": 0.4928, + "step": 30600 + }, + { + "epoch": 0.1581355993416606, + "grad_norm": 19809.953125, + "learning_rate": 9.77412736494079e-05, + "loss": 0.5014, + "step": 30650 + }, + { + "epoch": 0.15839356932427343, + "grad_norm": 19657.048828125, + "learning_rate": 9.773040534890958e-05, + "loss": 0.5022, + "step": 30700 + }, + { + "epoch": 0.15865153930688625, + "grad_norm": 20559.490234375, + "learning_rate": 9.771951157084514e-05, + "loss": 0.4923, + "step": 30750 + }, + { + "epoch": 0.15890950928949907, + "grad_norm": 19473.294921875, + "learning_rate": 9.770859232102946e-05, + "loss": 0.4991, + "step": 30800 + }, + { + "epoch": 0.1591674792721119, + "grad_norm": 19243.509765625, + "learning_rate": 9.769764760529102e-05, + "loss": 0.4934, + "step": 30850 + }, + { + "epoch": 0.1594254492547247, + "grad_norm": 20882.853515625, + "learning_rate": 9.768667742947189e-05, + "loss": 0.4989, + "step": 30900 + }, + { + "epoch": 0.15968341923733753, + "grad_norm": 19654.17578125, + "learning_rate": 9.767568179942776e-05, + "loss": 0.501, + "step": 30950 + }, + { + "epoch": 0.15994138921995035, + "grad_norm": 20069.412109375, + "learning_rate": 9.766466072102786e-05, + "loss": 0.5001, + "step": 31000 + }, + { + "epoch": 0.1601993592025632, + "grad_norm": 19730.416015625, + "learning_rate": 9.765361420015506e-05, + "loss": 0.4947, + "step": 31050 + }, + { + "epoch": 0.16045732918517602, + "grad_norm": 19825.43359375, + "learning_rate": 9.764254224270573e-05, + "loss": 0.5012, + "step": 31100 + }, + { + "epoch": 0.16071529916778884, + "grad_norm": 19111.859375, + "learning_rate": 9.763144485458992e-05, + "loss": 0.4946, + "step": 31150 + }, + { + "epoch": 0.16097326915040167, + "grad_norm": 20071.7578125, + "learning_rate": 9.762032204173116e-05, + "loss": 0.4961, + "step": 31200 + }, + { + "epoch": 0.1612312391330145, + "grad_norm": 18780.638671875, + "learning_rate": 9.76091738100666e-05, + "loss": 0.4952, + "step": 31250 + }, + { + "epoch": 0.1614892091156273, + "grad_norm": 20192.69140625, + "learning_rate": 9.759800016554699e-05, + "loss": 0.4919, + "step": 31300 + }, + { + "epoch": 0.16174717909824013, + "grad_norm": 18430.57421875, + "learning_rate": 9.758680111413653e-05, + "loss": 0.4953, + "step": 31350 + }, + { + "epoch": 0.16200514908085295, + "grad_norm": 18921.740234375, + "learning_rate": 9.757557666181314e-05, + "loss": 0.5013, + "step": 31400 + }, + { + "epoch": 0.16226311906346577, + "grad_norm": 18918.857421875, + "learning_rate": 9.756432681456815e-05, + "loss": 0.4976, + "step": 31450 + }, + { + "epoch": 0.1625210890460786, + "grad_norm": 21373.814453125, + "learning_rate": 9.755305157840655e-05, + "loss": 0.4975, + "step": 31500 + }, + { + "epoch": 0.16277905902869141, + "grad_norm": 19509.482421875, + "learning_rate": 9.754175095934684e-05, + "loss": 0.4966, + "step": 31550 + }, + { + "epoch": 0.16303702901130424, + "grad_norm": 18362.125, + "learning_rate": 9.753042496342103e-05, + "loss": 0.505, + "step": 31600 + }, + { + "epoch": 0.16329499899391706, + "grad_norm": 20344.11328125, + "learning_rate": 9.751907359667476e-05, + "loss": 0.4988, + "step": 31650 + }, + { + "epoch": 0.16355296897652988, + "grad_norm": 21398.97265625, + "learning_rate": 9.750769686516715e-05, + "loss": 0.493, + "step": 31700 + }, + { + "epoch": 0.16381093895914273, + "grad_norm": 21106.955078125, + "learning_rate": 9.74962947749709e-05, + "loss": 0.4999, + "step": 31750 + }, + { + "epoch": 0.16406890894175555, + "grad_norm": 19787.216796875, + "learning_rate": 9.74848673321722e-05, + "loss": 0.4932, + "step": 31800 + }, + { + "epoch": 0.16432687892436837, + "grad_norm": 19198.83984375, + "learning_rate": 9.747341454287082e-05, + "loss": 0.4919, + "step": 31850 + }, + { + "epoch": 0.1645848489069812, + "grad_norm": 18460.92578125, + "learning_rate": 9.746193641318002e-05, + "loss": 0.495, + "step": 31900 + }, + { + "epoch": 0.164842818889594, + "grad_norm": 18591.427734375, + "learning_rate": 9.74504329492266e-05, + "loss": 0.4888, + "step": 31950 + }, + { + "epoch": 0.16510078887220683, + "grad_norm": 21651.3515625, + "learning_rate": 9.743890415715091e-05, + "loss": 0.4909, + "step": 32000 + }, + { + "epoch": 0.16535875885481965, + "grad_norm": 18884.486328125, + "learning_rate": 9.742735004310677e-05, + "loss": 0.4981, + "step": 32050 + }, + { + "epoch": 0.16561672883743248, + "grad_norm": 19223.658203125, + "learning_rate": 9.741577061326157e-05, + "loss": 0.4961, + "step": 32100 + }, + { + "epoch": 0.1658746988200453, + "grad_norm": 18266.560546875, + "learning_rate": 9.740416587379615e-05, + "loss": 0.4914, + "step": 32150 + }, + { + "epoch": 0.16613266880265812, + "grad_norm": 19871.509765625, + "learning_rate": 9.739253583090493e-05, + "loss": 0.499, + "step": 32200 + }, + { + "epoch": 0.16639063878527094, + "grad_norm": 19524.298828125, + "learning_rate": 9.738088049079577e-05, + "loss": 0.4944, + "step": 32250 + }, + { + "epoch": 0.16664860876788376, + "grad_norm": 20308.685546875, + "learning_rate": 9.73691998596901e-05, + "loss": 0.4941, + "step": 32300 + }, + { + "epoch": 0.16690657875049658, + "grad_norm": 19125.52734375, + "learning_rate": 9.735749394382278e-05, + "loss": 0.4968, + "step": 32350 + }, + { + "epoch": 0.1671645487331094, + "grad_norm": 18792.716796875, + "learning_rate": 9.734576274944223e-05, + "loss": 0.4959, + "step": 32400 + }, + { + "epoch": 0.16742251871572225, + "grad_norm": 18521.54296875, + "learning_rate": 9.73340062828103e-05, + "loss": 0.4913, + "step": 32450 + }, + { + "epoch": 0.16768048869833507, + "grad_norm": 19540.41796875, + "learning_rate": 9.732222455020241e-05, + "loss": 0.4999, + "step": 32500 + }, + { + "epoch": 0.1679384586809479, + "grad_norm": 18682.84375, + "learning_rate": 9.73104175579074e-05, + "loss": 0.4991, + "step": 32550 + }, + { + "epoch": 0.16819642866356072, + "grad_norm": 20134.8984375, + "learning_rate": 9.72985853122276e-05, + "loss": 0.4839, + "step": 32600 + }, + { + "epoch": 0.16845439864617354, + "grad_norm": 20375.1484375, + "learning_rate": 9.728672781947883e-05, + "loss": 0.4941, + "step": 32650 + }, + { + "epoch": 0.16871236862878636, + "grad_norm": 19720.98046875, + "learning_rate": 9.727484508599042e-05, + "loss": 0.4856, + "step": 32700 + }, + { + "epoch": 0.16897033861139918, + "grad_norm": 19408.7734375, + "learning_rate": 9.726293711810513e-05, + "loss": 0.4942, + "step": 32750 + }, + { + "epoch": 0.169228308594012, + "grad_norm": 20136.892578125, + "learning_rate": 9.725100392217919e-05, + "loss": 0.4942, + "step": 32800 + }, + { + "epoch": 0.16948627857662482, + "grad_norm": 20555.27734375, + "learning_rate": 9.723904550458232e-05, + "loss": 0.4907, + "step": 32850 + }, + { + "epoch": 0.16974424855923764, + "grad_norm": 18876.787109375, + "learning_rate": 9.722706187169769e-05, + "loss": 0.4951, + "step": 32900 + }, + { + "epoch": 0.17000221854185046, + "grad_norm": 19918.4296875, + "learning_rate": 9.721505302992194e-05, + "loss": 0.4871, + "step": 32950 + }, + { + "epoch": 0.17026018852446329, + "grad_norm": 18593.453125, + "learning_rate": 9.720301898566513e-05, + "loss": 0.4889, + "step": 33000 + }, + { + "epoch": 0.1705181585070761, + "grad_norm": 21007.5625, + "learning_rate": 9.719095974535084e-05, + "loss": 0.4936, + "step": 33050 + }, + { + "epoch": 0.17077612848968893, + "grad_norm": 21749.849609375, + "learning_rate": 9.717887531541601e-05, + "loss": 0.4915, + "step": 33100 + }, + { + "epoch": 0.17103409847230178, + "grad_norm": 19097.896484375, + "learning_rate": 9.716676570231114e-05, + "loss": 0.4857, + "step": 33150 + }, + { + "epoch": 0.1712920684549146, + "grad_norm": 18509.107421875, + "learning_rate": 9.715463091250003e-05, + "loss": 0.487, + "step": 33200 + }, + { + "epoch": 0.17155003843752742, + "grad_norm": 21414.916015625, + "learning_rate": 9.714247095246007e-05, + "loss": 0.4929, + "step": 33250 + }, + { + "epoch": 0.17180800842014024, + "grad_norm": 19836.978515625, + "learning_rate": 9.713028582868196e-05, + "loss": 0.4948, + "step": 33300 + }, + { + "epoch": 0.17206597840275306, + "grad_norm": 18013.787109375, + "learning_rate": 9.71180755476699e-05, + "loss": 0.4945, + "step": 33350 + }, + { + "epoch": 0.17232394838536588, + "grad_norm": 18498.1640625, + "learning_rate": 9.71058401159415e-05, + "loss": 0.4961, + "step": 33400 + }, + { + "epoch": 0.1725819183679787, + "grad_norm": 19871.404296875, + "learning_rate": 9.709357954002778e-05, + "loss": 0.4896, + "step": 33450 + }, + { + "epoch": 0.17283988835059153, + "grad_norm": 20794.05859375, + "learning_rate": 9.708129382647324e-05, + "loss": 0.4855, + "step": 33500 + }, + { + "epoch": 0.17309785833320435, + "grad_norm": 19775.6328125, + "learning_rate": 9.706898298183573e-05, + "loss": 0.4899, + "step": 33550 + }, + { + "epoch": 0.17335582831581717, + "grad_norm": 24329.740234375, + "learning_rate": 9.705664701268652e-05, + "loss": 0.4879, + "step": 33600 + }, + { + "epoch": 0.17361379829843, + "grad_norm": 19666.697265625, + "learning_rate": 9.704428592561037e-05, + "loss": 0.493, + "step": 33650 + }, + { + "epoch": 0.1738717682810428, + "grad_norm": 20382.115234375, + "learning_rate": 9.703189972720532e-05, + "loss": 0.4922, + "step": 33700 + }, + { + "epoch": 0.17412973826365563, + "grad_norm": 20240.46875, + "learning_rate": 9.701948842408293e-05, + "loss": 0.4908, + "step": 33750 + }, + { + "epoch": 0.17438770824626845, + "grad_norm": 18531.224609375, + "learning_rate": 9.700705202286811e-05, + "loss": 0.489, + "step": 33800 + }, + { + "epoch": 0.17464567822888127, + "grad_norm": 19121.0625, + "learning_rate": 9.699459053019912e-05, + "loss": 0.4884, + "step": 33850 + }, + { + "epoch": 0.17490364821149412, + "grad_norm": 19959.931640625, + "learning_rate": 9.698210395272773e-05, + "loss": 0.4912, + "step": 33900 + }, + { + "epoch": 0.17516161819410694, + "grad_norm": 18255.732421875, + "learning_rate": 9.696959229711901e-05, + "loss": 0.4888, + "step": 33950 + }, + { + "epoch": 0.17541958817671977, + "grad_norm": 21808.8671875, + "learning_rate": 9.695705557005142e-05, + "loss": 0.4945, + "step": 34000 + }, + { + "epoch": 0.1756775581593326, + "grad_norm": 18687.521484375, + "learning_rate": 9.694449377821685e-05, + "loss": 0.4891, + "step": 34050 + }, + { + "epoch": 0.1759355281419454, + "grad_norm": 18309.859375, + "learning_rate": 9.693190692832053e-05, + "loss": 0.4888, + "step": 34100 + }, + { + "epoch": 0.17619349812455823, + "grad_norm": 19453.705078125, + "learning_rate": 9.691929502708106e-05, + "loss": 0.4852, + "step": 34150 + }, + { + "epoch": 0.17645146810717105, + "grad_norm": 20964.595703125, + "learning_rate": 9.690665808123046e-05, + "loss": 0.4931, + "step": 34200 + }, + { + "epoch": 0.17670943808978387, + "grad_norm": 20170.5703125, + "learning_rate": 9.689399609751405e-05, + "loss": 0.4908, + "step": 34250 + }, + { + "epoch": 0.1769674080723967, + "grad_norm": 18276.19140625, + "learning_rate": 9.688130908269058e-05, + "loss": 0.4906, + "step": 34300 + }, + { + "epoch": 0.1772253780550095, + "grad_norm": 21062.56640625, + "learning_rate": 9.686859704353212e-05, + "loss": 0.4911, + "step": 34350 + }, + { + "epoch": 0.17748334803762233, + "grad_norm": 21678.6796875, + "learning_rate": 9.685585998682414e-05, + "loss": 0.4894, + "step": 34400 + }, + { + "epoch": 0.17774131802023516, + "grad_norm": 17795.384765625, + "learning_rate": 9.684309791936539e-05, + "loss": 0.4893, + "step": 34450 + }, + { + "epoch": 0.17799928800284798, + "grad_norm": 21536.837890625, + "learning_rate": 9.683031084796803e-05, + "loss": 0.4889, + "step": 34500 + }, + { + "epoch": 0.1782572579854608, + "grad_norm": 20554.423828125, + "learning_rate": 9.681749877945756e-05, + "loss": 0.4843, + "step": 34550 + }, + { + "epoch": 0.17851522796807365, + "grad_norm": 22045.376953125, + "learning_rate": 9.680466172067282e-05, + "loss": 0.4895, + "step": 34600 + }, + { + "epoch": 0.17877319795068647, + "grad_norm": 21406.853515625, + "learning_rate": 9.679179967846597e-05, + "loss": 0.4914, + "step": 34650 + }, + { + "epoch": 0.1790311679332993, + "grad_norm": 20971.037109375, + "learning_rate": 9.677891265970252e-05, + "loss": 0.485, + "step": 34700 + }, + { + "epoch": 0.1792891379159121, + "grad_norm": 20256.73828125, + "learning_rate": 9.676600067126129e-05, + "loss": 0.4918, + "step": 34750 + }, + { + "epoch": 0.17954710789852493, + "grad_norm": 19123.048828125, + "learning_rate": 9.67530637200345e-05, + "loss": 0.49, + "step": 34800 + }, + { + "epoch": 0.17980507788113775, + "grad_norm": 20799.748046875, + "learning_rate": 9.674010181292761e-05, + "loss": 0.4889, + "step": 34850 + }, + { + "epoch": 0.18006304786375057, + "grad_norm": 19569.609375, + "learning_rate": 9.672711495685945e-05, + "loss": 0.4882, + "step": 34900 + }, + { + "epoch": 0.1803210178463634, + "grad_norm": 18339.76171875, + "learning_rate": 9.671410315876213e-05, + "loss": 0.4884, + "step": 34950 + }, + { + "epoch": 0.18057898782897622, + "grad_norm": 20066.099609375, + "learning_rate": 9.670106642558111e-05, + "loss": 0.4866, + "step": 35000 + }, + { + "epoch": 0.18057898782897622, + "eval_loss": 0.48020538687705994, + "eval_runtime": 3265.3619, + "eval_samples_per_second": 949.702, + "eval_steps_per_second": 1.855, + "step": 35000 + }, + { + "epoch": 0.18083695781158904, + "grad_norm": 18703.037109375, + "learning_rate": 9.668800476427515e-05, + "loss": 0.4953, + "step": 35050 + }, + { + "epoch": 0.18109492779420186, + "grad_norm": 19886.177734375, + "learning_rate": 9.667491818181631e-05, + "loss": 0.4845, + "step": 35100 + }, + { + "epoch": 0.18135289777681468, + "grad_norm": 19349.08203125, + "learning_rate": 9.666180668518993e-05, + "loss": 0.493, + "step": 35150 + }, + { + "epoch": 0.1816108677594275, + "grad_norm": 19786.404296875, + "learning_rate": 9.664867028139473e-05, + "loss": 0.4815, + "step": 35200 + }, + { + "epoch": 0.18186883774204032, + "grad_norm": 21271.05859375, + "learning_rate": 9.66355089774426e-05, + "loss": 0.4907, + "step": 35250 + }, + { + "epoch": 0.18212680772465317, + "grad_norm": 19096.3125, + "learning_rate": 9.662232278035885e-05, + "loss": 0.4865, + "step": 35300 + }, + { + "epoch": 0.182384777707266, + "grad_norm": 20136.935546875, + "learning_rate": 9.660911169718196e-05, + "loss": 0.4824, + "step": 35350 + }, + { + "epoch": 0.18264274768987881, + "grad_norm": 19532.361328125, + "learning_rate": 9.65958757349638e-05, + "loss": 0.4857, + "step": 35400 + }, + { + "epoch": 0.18290071767249164, + "grad_norm": 18227.626953125, + "learning_rate": 9.658261490076944e-05, + "loss": 0.4871, + "step": 35450 + }, + { + "epoch": 0.18315868765510446, + "grad_norm": 21021.564453125, + "learning_rate": 9.656932920167727e-05, + "loss": 0.485, + "step": 35500 + }, + { + "epoch": 0.18341665763771728, + "grad_norm": 19943.9765625, + "learning_rate": 9.655601864477893e-05, + "loss": 0.4908, + "step": 35550 + }, + { + "epoch": 0.1836746276203301, + "grad_norm": 19356.8203125, + "learning_rate": 9.654268323717934e-05, + "loss": 0.4849, + "step": 35600 + }, + { + "epoch": 0.18393259760294292, + "grad_norm": 19431.9453125, + "learning_rate": 9.652932298599671e-05, + "loss": 0.4927, + "step": 35650 + }, + { + "epoch": 0.18419056758555574, + "grad_norm": 18860.0625, + "learning_rate": 9.651593789836242e-05, + "loss": 0.4879, + "step": 35700 + }, + { + "epoch": 0.18444853756816856, + "grad_norm": 18524.46875, + "learning_rate": 9.650252798142123e-05, + "loss": 0.4877, + "step": 35750 + }, + { + "epoch": 0.18470650755078138, + "grad_norm": 18897.322265625, + "learning_rate": 9.648909324233107e-05, + "loss": 0.4906, + "step": 35800 + }, + { + "epoch": 0.1849644775333942, + "grad_norm": 21080.552734375, + "learning_rate": 9.647563368826313e-05, + "loss": 0.4895, + "step": 35850 + }, + { + "epoch": 0.18522244751600703, + "grad_norm": 20014.828125, + "learning_rate": 9.64621493264019e-05, + "loss": 0.4816, + "step": 35900 + }, + { + "epoch": 0.18548041749861985, + "grad_norm": 19470.3984375, + "learning_rate": 9.644864016394504e-05, + "loss": 0.4812, + "step": 35950 + }, + { + "epoch": 0.1857383874812327, + "grad_norm": 21915.400390625, + "learning_rate": 9.643510620810348e-05, + "loss": 0.4859, + "step": 36000 + }, + { + "epoch": 0.18599635746384552, + "grad_norm": 19367.009765625, + "learning_rate": 9.642154746610139e-05, + "loss": 0.4905, + "step": 36050 + }, + { + "epoch": 0.18625432744645834, + "grad_norm": 18379.70703125, + "learning_rate": 9.640796394517616e-05, + "loss": 0.4878, + "step": 36100 + }, + { + "epoch": 0.18651229742907116, + "grad_norm": 18933.455078125, + "learning_rate": 9.639435565257842e-05, + "loss": 0.4877, + "step": 36150 + }, + { + "epoch": 0.18677026741168398, + "grad_norm": 19026.484375, + "learning_rate": 9.638072259557201e-05, + "loss": 0.4873, + "step": 36200 + }, + { + "epoch": 0.1870282373942968, + "grad_norm": 21111.09375, + "learning_rate": 9.636706478143398e-05, + "loss": 0.4815, + "step": 36250 + }, + { + "epoch": 0.18728620737690962, + "grad_norm": 19362.541015625, + "learning_rate": 9.635338221745462e-05, + "loss": 0.4854, + "step": 36300 + }, + { + "epoch": 0.18754417735952245, + "grad_norm": 19861.58984375, + "learning_rate": 9.63396749109374e-05, + "loss": 0.4832, + "step": 36350 + }, + { + "epoch": 0.18780214734213527, + "grad_norm": 18793.623046875, + "learning_rate": 9.632594286919905e-05, + "loss": 0.4811, + "step": 36400 + }, + { + "epoch": 0.1880601173247481, + "grad_norm": 20452.26953125, + "learning_rate": 9.631218609956943e-05, + "loss": 0.4872, + "step": 36450 + }, + { + "epoch": 0.1883180873073609, + "grad_norm": 19237.203125, + "learning_rate": 9.629840460939165e-05, + "loss": 0.4941, + "step": 36500 + }, + { + "epoch": 0.18857605728997373, + "grad_norm": 19828.84765625, + "learning_rate": 9.628459840602202e-05, + "loss": 0.4869, + "step": 36550 + }, + { + "epoch": 0.18883402727258655, + "grad_norm": 18171.08203125, + "learning_rate": 9.627076749683e-05, + "loss": 0.4915, + "step": 36600 + }, + { + "epoch": 0.18909199725519937, + "grad_norm": 21346.9375, + "learning_rate": 9.625691188919827e-05, + "loss": 0.4913, + "step": 36650 + }, + { + "epoch": 0.1893499672378122, + "grad_norm": 20066.7890625, + "learning_rate": 9.62430315905227e-05, + "loss": 0.4809, + "step": 36700 + }, + { + "epoch": 0.18960793722042504, + "grad_norm": 20736.546875, + "learning_rate": 9.622912660821231e-05, + "loss": 0.4849, + "step": 36750 + }, + { + "epoch": 0.18986590720303786, + "grad_norm": 20891.958984375, + "learning_rate": 9.62151969496893e-05, + "loss": 0.4831, + "step": 36800 + }, + { + "epoch": 0.19012387718565069, + "grad_norm": 21394.1953125, + "learning_rate": 9.620124262238908e-05, + "loss": 0.4855, + "step": 36850 + }, + { + "epoch": 0.1903818471682635, + "grad_norm": 19725.89453125, + "learning_rate": 9.618726363376016e-05, + "loss": 0.48, + "step": 36900 + }, + { + "epoch": 0.19063981715087633, + "grad_norm": 21622.78125, + "learning_rate": 9.617325999126429e-05, + "loss": 0.4832, + "step": 36950 + }, + { + "epoch": 0.19089778713348915, + "grad_norm": 22529.548828125, + "learning_rate": 9.615923170237633e-05, + "loss": 0.4852, + "step": 37000 + }, + { + "epoch": 0.19115575711610197, + "grad_norm": 21136.404296875, + "learning_rate": 9.614517877458428e-05, + "loss": 0.4816, + "step": 37050 + }, + { + "epoch": 0.1914137270987148, + "grad_norm": 19039.330078125, + "learning_rate": 9.61311012153894e-05, + "loss": 0.4835, + "step": 37100 + }, + { + "epoch": 0.1916716970813276, + "grad_norm": 19755.974609375, + "learning_rate": 9.611699903230594e-05, + "loss": 0.4846, + "step": 37150 + }, + { + "epoch": 0.19192966706394043, + "grad_norm": 19061.28515625, + "learning_rate": 9.610287223286139e-05, + "loss": 0.4816, + "step": 37200 + }, + { + "epoch": 0.19218763704655326, + "grad_norm": 21649.275390625, + "learning_rate": 9.608872082459639e-05, + "loss": 0.4837, + "step": 37250 + }, + { + "epoch": 0.19244560702916608, + "grad_norm": 19856.759765625, + "learning_rate": 9.607454481506466e-05, + "loss": 0.4848, + "step": 37300 + }, + { + "epoch": 0.1927035770117789, + "grad_norm": 19442.810546875, + "learning_rate": 9.60603442118331e-05, + "loss": 0.4828, + "step": 37350 + }, + { + "epoch": 0.19296154699439172, + "grad_norm": 20076.44140625, + "learning_rate": 9.604611902248168e-05, + "loss": 0.4896, + "step": 37400 + }, + { + "epoch": 0.19321951697700457, + "grad_norm": 18413.908203125, + "learning_rate": 9.603186925460359e-05, + "loss": 0.4806, + "step": 37450 + }, + { + "epoch": 0.1934774869596174, + "grad_norm": 19618.3984375, + "learning_rate": 9.601759491580503e-05, + "loss": 0.4864, + "step": 37500 + }, + { + "epoch": 0.1937354569422302, + "grad_norm": 20347.177734375, + "learning_rate": 9.600329601370539e-05, + "loss": 0.489, + "step": 37550 + }, + { + "epoch": 0.19399342692484303, + "grad_norm": 19288.380859375, + "learning_rate": 9.598897255593713e-05, + "loss": 0.4829, + "step": 37600 + }, + { + "epoch": 0.19425139690745585, + "grad_norm": 20326.1484375, + "learning_rate": 9.597462455014585e-05, + "loss": 0.4856, + "step": 37650 + }, + { + "epoch": 0.19450936689006867, + "grad_norm": 19598.14453125, + "learning_rate": 9.596025200399024e-05, + "loss": 0.4831, + "step": 37700 + }, + { + "epoch": 0.1947673368726815, + "grad_norm": 20041.28125, + "learning_rate": 9.594585492514205e-05, + "loss": 0.4822, + "step": 37750 + }, + { + "epoch": 0.19502530685529432, + "grad_norm": 20853.201171875, + "learning_rate": 9.593143332128623e-05, + "loss": 0.4874, + "step": 37800 + }, + { + "epoch": 0.19528327683790714, + "grad_norm": 21364.455078125, + "learning_rate": 9.591698720012068e-05, + "loss": 0.482, + "step": 37850 + }, + { + "epoch": 0.19554124682051996, + "grad_norm": 18795.447265625, + "learning_rate": 9.590251656935652e-05, + "loss": 0.489, + "step": 37900 + }, + { + "epoch": 0.19579921680313278, + "grad_norm": 23039.455078125, + "learning_rate": 9.588802143671784e-05, + "loss": 0.4879, + "step": 37950 + }, + { + "epoch": 0.1960571867857456, + "grad_norm": 19842.263671875, + "learning_rate": 9.58735018099419e-05, + "loss": 0.4869, + "step": 38000 + }, + { + "epoch": 0.19631515676835842, + "grad_norm": 21241.00390625, + "learning_rate": 9.585895769677897e-05, + "loss": 0.4746, + "step": 38050 + }, + { + "epoch": 0.19657312675097124, + "grad_norm": 19803.2265625, + "learning_rate": 9.584438910499245e-05, + "loss": 0.4824, + "step": 38100 + }, + { + "epoch": 0.1968310967335841, + "grad_norm": 18873.744140625, + "learning_rate": 9.582979604235873e-05, + "loss": 0.4817, + "step": 38150 + }, + { + "epoch": 0.19708906671619691, + "grad_norm": 19128.8828125, + "learning_rate": 9.581517851666734e-05, + "loss": 0.482, + "step": 38200 + }, + { + "epoch": 0.19734703669880974, + "grad_norm": 20514.16796875, + "learning_rate": 9.580053653572081e-05, + "loss": 0.4781, + "step": 38250 + }, + { + "epoch": 0.19760500668142256, + "grad_norm": 19135.58984375, + "learning_rate": 9.578587010733475e-05, + "loss": 0.4815, + "step": 38300 + }, + { + "epoch": 0.19786297666403538, + "grad_norm": 22849.197265625, + "learning_rate": 9.577117923933782e-05, + "loss": 0.4794, + "step": 38350 + }, + { + "epoch": 0.1981209466466482, + "grad_norm": 21278.736328125, + "learning_rate": 9.575646393957173e-05, + "loss": 0.4832, + "step": 38400 + }, + { + "epoch": 0.19837891662926102, + "grad_norm": 19292.162109375, + "learning_rate": 9.57417242158912e-05, + "loss": 0.4876, + "step": 38450 + }, + { + "epoch": 0.19863688661187384, + "grad_norm": 17778.423828125, + "learning_rate": 9.572696007616402e-05, + "loss": 0.4842, + "step": 38500 + }, + { + "epoch": 0.19889485659448666, + "grad_norm": 18855.140625, + "learning_rate": 9.5712171528271e-05, + "loss": 0.4846, + "step": 38550 + }, + { + "epoch": 0.19915282657709948, + "grad_norm": 21640.8203125, + "learning_rate": 9.5697358580106e-05, + "loss": 0.4829, + "step": 38600 + }, + { + "epoch": 0.1994107965597123, + "grad_norm": 19358.3828125, + "learning_rate": 9.568252123957586e-05, + "loss": 0.4806, + "step": 38650 + }, + { + "epoch": 0.19966876654232513, + "grad_norm": 20781.98828125, + "learning_rate": 9.566765951460046e-05, + "loss": 0.4849, + "step": 38700 + }, + { + "epoch": 0.19992673652493795, + "grad_norm": 20604.7265625, + "learning_rate": 9.565277341311271e-05, + "loss": 0.4856, + "step": 38750 + }, + { + "epoch": 0.20018470650755077, + "grad_norm": 20930.048828125, + "learning_rate": 9.563786294305854e-05, + "loss": 0.4812, + "step": 38800 + }, + { + "epoch": 0.20044267649016362, + "grad_norm": 22721.259765625, + "learning_rate": 9.562292811239686e-05, + "loss": 0.4857, + "step": 38850 + }, + { + "epoch": 0.20070064647277644, + "grad_norm": 19667.57421875, + "learning_rate": 9.560796892909957e-05, + "loss": 0.483, + "step": 38900 + }, + { + "epoch": 0.20095861645538926, + "grad_norm": 18259.19140625, + "learning_rate": 9.559298540115164e-05, + "loss": 0.4851, + "step": 38950 + }, + { + "epoch": 0.20121658643800208, + "grad_norm": 20980.18359375, + "learning_rate": 9.557797753655096e-05, + "loss": 0.4815, + "step": 39000 + }, + { + "epoch": 0.2014745564206149, + "grad_norm": 19840.025390625, + "learning_rate": 9.556294534330841e-05, + "loss": 0.4878, + "step": 39050 + }, + { + "epoch": 0.20173252640322772, + "grad_norm": 20406.69921875, + "learning_rate": 9.554788882944792e-05, + "loss": 0.481, + "step": 39100 + }, + { + "epoch": 0.20199049638584055, + "grad_norm": 19177.447265625, + "learning_rate": 9.553280800300637e-05, + "loss": 0.4857, + "step": 39150 + }, + { + "epoch": 0.20224846636845337, + "grad_norm": 21242.21875, + "learning_rate": 9.551770287203359e-05, + "loss": 0.4889, + "step": 39200 + }, + { + "epoch": 0.2025064363510662, + "grad_norm": 19343.58203125, + "learning_rate": 9.550257344459241e-05, + "loss": 0.482, + "step": 39250 + }, + { + "epoch": 0.202764406333679, + "grad_norm": 21327.587890625, + "learning_rate": 9.548741972875863e-05, + "loss": 0.4802, + "step": 39300 + }, + { + "epoch": 0.20302237631629183, + "grad_norm": 21366.98828125, + "learning_rate": 9.547224173262102e-05, + "loss": 0.4779, + "step": 39350 + }, + { + "epoch": 0.20328034629890465, + "grad_norm": 20876.39453125, + "learning_rate": 9.545703946428128e-05, + "loss": 0.4843, + "step": 39400 + }, + { + "epoch": 0.20353831628151747, + "grad_norm": 21280.873046875, + "learning_rate": 9.544181293185413e-05, + "loss": 0.4805, + "step": 39450 + }, + { + "epoch": 0.2037962862641303, + "grad_norm": 19546.134765625, + "learning_rate": 9.542656214346713e-05, + "loss": 0.4753, + "step": 39500 + }, + { + "epoch": 0.20405425624674312, + "grad_norm": 19179.05859375, + "learning_rate": 9.541128710726091e-05, + "loss": 0.4812, + "step": 39550 + }, + { + "epoch": 0.20431222622935596, + "grad_norm": 23525.50390625, + "learning_rate": 9.539598783138897e-05, + "loss": 0.4843, + "step": 39600 + }, + { + "epoch": 0.20457019621196879, + "grad_norm": 19369.103515625, + "learning_rate": 9.538066432401775e-05, + "loss": 0.4788, + "step": 39650 + }, + { + "epoch": 0.2048281661945816, + "grad_norm": 20777.119140625, + "learning_rate": 9.536531659332667e-05, + "loss": 0.4779, + "step": 39700 + }, + { + "epoch": 0.20508613617719443, + "grad_norm": 18987.701171875, + "learning_rate": 9.534994464750806e-05, + "loss": 0.4807, + "step": 39750 + }, + { + "epoch": 0.20534410615980725, + "grad_norm": 19523.873046875, + "learning_rate": 9.533454849476712e-05, + "loss": 0.4798, + "step": 39800 + }, + { + "epoch": 0.20560207614242007, + "grad_norm": 21302.05859375, + "learning_rate": 9.531912814332206e-05, + "loss": 0.4811, + "step": 39850 + }, + { + "epoch": 0.2058600461250329, + "grad_norm": 21545.626953125, + "learning_rate": 9.530368360140394e-05, + "loss": 0.4814, + "step": 39900 + }, + { + "epoch": 0.2061180161076457, + "grad_norm": 22709.7265625, + "learning_rate": 9.528821487725678e-05, + "loss": 0.4827, + "step": 39950 + }, + { + "epoch": 0.20637598609025853, + "grad_norm": 20853.228515625, + "learning_rate": 9.527272197913746e-05, + "loss": 0.4838, + "step": 40000 + }, + { + "epoch": 0.20637598609025853, + "eval_loss": 0.47092095017433167, + "eval_runtime": 3339.7722, + "eval_samples_per_second": 928.542, + "eval_steps_per_second": 1.814, + "step": 40000 + }, + { + "epoch": 0.20663395607287136, + "grad_norm": 18389.748046875, + "learning_rate": 9.525720491531581e-05, + "loss": 0.4809, + "step": 40050 + }, + { + "epoch": 0.20689192605548418, + "grad_norm": 20328.59765625, + "learning_rate": 9.524166369407453e-05, + "loss": 0.4827, + "step": 40100 + }, + { + "epoch": 0.207149896038097, + "grad_norm": 21094.966796875, + "learning_rate": 9.522609832370924e-05, + "loss": 0.484, + "step": 40150 + }, + { + "epoch": 0.20740786602070982, + "grad_norm": 22630.64453125, + "learning_rate": 9.52105088125284e-05, + "loss": 0.4829, + "step": 40200 + }, + { + "epoch": 0.20766583600332264, + "grad_norm": 19477.7265625, + "learning_rate": 9.51948951688534e-05, + "loss": 0.4793, + "step": 40250 + }, + { + "epoch": 0.2079238059859355, + "grad_norm": 20242.53125, + "learning_rate": 9.517925740101851e-05, + "loss": 0.4797, + "step": 40300 + }, + { + "epoch": 0.2081817759685483, + "grad_norm": 19952.421875, + "learning_rate": 9.516359551737087e-05, + "loss": 0.4785, + "step": 40350 + }, + { + "epoch": 0.20843974595116113, + "grad_norm": 19216.220703125, + "learning_rate": 9.514790952627049e-05, + "loss": 0.4753, + "step": 40400 + }, + { + "epoch": 0.20869771593377395, + "grad_norm": 20297.515625, + "learning_rate": 9.513219943609024e-05, + "loss": 0.4792, + "step": 40450 + }, + { + "epoch": 0.20895568591638677, + "grad_norm": 19528.7890625, + "learning_rate": 9.511646525521585e-05, + "loss": 0.4801, + "step": 40500 + }, + { + "epoch": 0.2092136558989996, + "grad_norm": 18037.7890625, + "learning_rate": 9.510070699204597e-05, + "loss": 0.483, + "step": 40550 + }, + { + "epoch": 0.20947162588161242, + "grad_norm": 20636.4296875, + "learning_rate": 9.508492465499199e-05, + "loss": 0.4761, + "step": 40600 + }, + { + "epoch": 0.20972959586422524, + "grad_norm": 20096.857421875, + "learning_rate": 9.506911825247827e-05, + "loss": 0.4804, + "step": 40650 + }, + { + "epoch": 0.20998756584683806, + "grad_norm": 20855.619140625, + "learning_rate": 9.505328779294192e-05, + "loss": 0.4823, + "step": 40700 + }, + { + "epoch": 0.21024553582945088, + "grad_norm": 19640.521484375, + "learning_rate": 9.503743328483296e-05, + "loss": 0.4818, + "step": 40750 + }, + { + "epoch": 0.2105035058120637, + "grad_norm": 20990.525390625, + "learning_rate": 9.50215547366142e-05, + "loss": 0.4804, + "step": 40800 + }, + { + "epoch": 0.21076147579467652, + "grad_norm": 18773.564453125, + "learning_rate": 9.500565215676132e-05, + "loss": 0.4798, + "step": 40850 + }, + { + "epoch": 0.21101944577728934, + "grad_norm": 18688.7265625, + "learning_rate": 9.498972555376282e-05, + "loss": 0.4773, + "step": 40900 + }, + { + "epoch": 0.21127741575990217, + "grad_norm": 22649.3671875, + "learning_rate": 9.497377493611998e-05, + "loss": 0.478, + "step": 40950 + }, + { + "epoch": 0.21153538574251501, + "grad_norm": 19575.95703125, + "learning_rate": 9.495780031234694e-05, + "loss": 0.4809, + "step": 41000 + }, + { + "epoch": 0.21179335572512784, + "grad_norm": 18587.681640625, + "learning_rate": 9.494180169097067e-05, + "loss": 0.4805, + "step": 41050 + }, + { + "epoch": 0.21205132570774066, + "grad_norm": 19466.5703125, + "learning_rate": 9.492577908053089e-05, + "loss": 0.4772, + "step": 41100 + }, + { + "epoch": 0.21230929569035348, + "grad_norm": 21085.15234375, + "learning_rate": 9.490973248958018e-05, + "loss": 0.4787, + "step": 41150 + }, + { + "epoch": 0.2125672656729663, + "grad_norm": 21866.95703125, + "learning_rate": 9.489366192668388e-05, + "loss": 0.4803, + "step": 41200 + }, + { + "epoch": 0.21282523565557912, + "grad_norm": 20759.609375, + "learning_rate": 9.487756740042015e-05, + "loss": 0.4782, + "step": 41250 + }, + { + "epoch": 0.21308320563819194, + "grad_norm": 20565.51171875, + "learning_rate": 9.486144891937997e-05, + "loss": 0.4765, + "step": 41300 + }, + { + "epoch": 0.21334117562080476, + "grad_norm": 21536.017578125, + "learning_rate": 9.484530649216705e-05, + "loss": 0.4753, + "step": 41350 + }, + { + "epoch": 0.21359914560341758, + "grad_norm": 19452.001953125, + "learning_rate": 9.482914012739788e-05, + "loss": 0.4807, + "step": 41400 + }, + { + "epoch": 0.2138571155860304, + "grad_norm": 21220.927734375, + "learning_rate": 9.481294983370179e-05, + "loss": 0.4803, + "step": 41450 + }, + { + "epoch": 0.21411508556864323, + "grad_norm": 18278.884765625, + "learning_rate": 9.479673561972082e-05, + "loss": 0.4807, + "step": 41500 + }, + { + "epoch": 0.21437305555125605, + "grad_norm": 21568.13671875, + "learning_rate": 9.478049749410983e-05, + "loss": 0.4751, + "step": 41550 + }, + { + "epoch": 0.21463102553386887, + "grad_norm": 21004.734375, + "learning_rate": 9.47642354655364e-05, + "loss": 0.4828, + "step": 41600 + }, + { + "epoch": 0.2148889955164817, + "grad_norm": 20709.193359375, + "learning_rate": 9.474794954268089e-05, + "loss": 0.477, + "step": 41650 + }, + { + "epoch": 0.21514696549909454, + "grad_norm": 21408.3671875, + "learning_rate": 9.47316397342364e-05, + "loss": 0.4783, + "step": 41700 + }, + { + "epoch": 0.21540493548170736, + "grad_norm": 18606.6328125, + "learning_rate": 9.47153060489088e-05, + "loss": 0.4771, + "step": 41750 + }, + { + "epoch": 0.21566290546432018, + "grad_norm": 19498.20703125, + "learning_rate": 9.469894849541667e-05, + "loss": 0.4782, + "step": 41800 + }, + { + "epoch": 0.215920875446933, + "grad_norm": 20441.9765625, + "learning_rate": 9.46825670824914e-05, + "loss": 0.4769, + "step": 41850 + }, + { + "epoch": 0.21617884542954582, + "grad_norm": 20925.109375, + "learning_rate": 9.466616181887704e-05, + "loss": 0.4858, + "step": 41900 + }, + { + "epoch": 0.21643681541215865, + "grad_norm": 21410.38671875, + "learning_rate": 9.464973271333042e-05, + "loss": 0.4791, + "step": 41950 + }, + { + "epoch": 0.21669478539477147, + "grad_norm": 19169.583984375, + "learning_rate": 9.463327977462106e-05, + "loss": 0.4783, + "step": 42000 + }, + { + "epoch": 0.2169527553773843, + "grad_norm": 19487.3359375, + "learning_rate": 9.461680301153124e-05, + "loss": 0.4792, + "step": 42050 + }, + { + "epoch": 0.2172107253599971, + "grad_norm": 21303.861328125, + "learning_rate": 9.460030243285592e-05, + "loss": 0.4811, + "step": 42100 + }, + { + "epoch": 0.21746869534260993, + "grad_norm": 21529.490234375, + "learning_rate": 9.458377804740279e-05, + "loss": 0.4761, + "step": 42150 + }, + { + "epoch": 0.21772666532522275, + "grad_norm": 21356.505859375, + "learning_rate": 9.456722986399227e-05, + "loss": 0.477, + "step": 42200 + }, + { + "epoch": 0.21798463530783557, + "grad_norm": 19551.33203125, + "learning_rate": 9.455065789145742e-05, + "loss": 0.4777, + "step": 42250 + }, + { + "epoch": 0.2182426052904484, + "grad_norm": 21424.58984375, + "learning_rate": 9.453406213864408e-05, + "loss": 0.4759, + "step": 42300 + }, + { + "epoch": 0.21850057527306121, + "grad_norm": 18835.1953125, + "learning_rate": 9.451744261441072e-05, + "loss": 0.4749, + "step": 42350 + }, + { + "epoch": 0.21875854525567404, + "grad_norm": 20333.490234375, + "learning_rate": 9.450079932762852e-05, + "loss": 0.4786, + "step": 42400 + }, + { + "epoch": 0.21901651523828689, + "grad_norm": 18957.232421875, + "learning_rate": 9.448413228718134e-05, + "loss": 0.4778, + "step": 42450 + }, + { + "epoch": 0.2192744852208997, + "grad_norm": 20251.939453125, + "learning_rate": 9.446744150196574e-05, + "loss": 0.4759, + "step": 42500 + }, + { + "epoch": 0.21953245520351253, + "grad_norm": 20740.82421875, + "learning_rate": 9.445072698089091e-05, + "loss": 0.4782, + "step": 42550 + }, + { + "epoch": 0.21979042518612535, + "grad_norm": 19501.91015625, + "learning_rate": 9.443398873287877e-05, + "loss": 0.479, + "step": 42600 + }, + { + "epoch": 0.22004839516873817, + "grad_norm": 20895.58984375, + "learning_rate": 9.441722676686386e-05, + "loss": 0.4754, + "step": 42650 + }, + { + "epoch": 0.220306365151351, + "grad_norm": 19932.66796875, + "learning_rate": 9.440044109179338e-05, + "loss": 0.4778, + "step": 42700 + }, + { + "epoch": 0.2205643351339638, + "grad_norm": 20158.693359375, + "learning_rate": 9.438363171662722e-05, + "loss": 0.4755, + "step": 42750 + }, + { + "epoch": 0.22082230511657663, + "grad_norm": 19128.953125, + "learning_rate": 9.436679865033789e-05, + "loss": 0.4744, + "step": 42800 + }, + { + "epoch": 0.22108027509918945, + "grad_norm": 19743.517578125, + "learning_rate": 9.434994190191054e-05, + "loss": 0.4781, + "step": 42850 + }, + { + "epoch": 0.22133824508180228, + "grad_norm": 17826.703125, + "learning_rate": 9.4333061480343e-05, + "loss": 0.4762, + "step": 42900 + }, + { + "epoch": 0.2215962150644151, + "grad_norm": 20606.48046875, + "learning_rate": 9.43161573946457e-05, + "loss": 0.4741, + "step": 42950 + }, + { + "epoch": 0.22185418504702792, + "grad_norm": 20116.66796875, + "learning_rate": 9.429922965384172e-05, + "loss": 0.4766, + "step": 43000 + }, + { + "epoch": 0.22211215502964074, + "grad_norm": 20560.970703125, + "learning_rate": 9.428227826696674e-05, + "loss": 0.481, + "step": 43050 + }, + { + "epoch": 0.22237012501225356, + "grad_norm": 20832.01953125, + "learning_rate": 9.42653032430691e-05, + "loss": 0.4806, + "step": 43100 + }, + { + "epoch": 0.2226280949948664, + "grad_norm": 18686.953125, + "learning_rate": 9.424830459120974e-05, + "loss": 0.4796, + "step": 43150 + }, + { + "epoch": 0.22288606497747923, + "grad_norm": 21061.240234375, + "learning_rate": 9.423128232046223e-05, + "loss": 0.474, + "step": 43200 + }, + { + "epoch": 0.22314403496009205, + "grad_norm": 21862.25, + "learning_rate": 9.421423643991267e-05, + "loss": 0.4721, + "step": 43250 + }, + { + "epoch": 0.22340200494270487, + "grad_norm": 18299.23828125, + "learning_rate": 9.419716695865988e-05, + "loss": 0.4744, + "step": 43300 + }, + { + "epoch": 0.2236599749253177, + "grad_norm": 20387.876953125, + "learning_rate": 9.418007388581517e-05, + "loss": 0.4748, + "step": 43350 + }, + { + "epoch": 0.22391794490793052, + "grad_norm": 21721.740234375, + "learning_rate": 9.416295723050254e-05, + "loss": 0.4782, + "step": 43400 + }, + { + "epoch": 0.22417591489054334, + "grad_norm": 20274.72265625, + "learning_rate": 9.414581700185851e-05, + "loss": 0.4734, + "step": 43450 + }, + { + "epoch": 0.22443388487315616, + "grad_norm": 22443.296875, + "learning_rate": 9.41286532090322e-05, + "loss": 0.4734, + "step": 43500 + }, + { + "epoch": 0.22469185485576898, + "grad_norm": 19874.8203125, + "learning_rate": 9.411146586118529e-05, + "loss": 0.4755, + "step": 43550 + }, + { + "epoch": 0.2249498248383818, + "grad_norm": 20362.3125, + "learning_rate": 9.409425496749209e-05, + "loss": 0.4776, + "step": 43600 + }, + { + "epoch": 0.22520779482099462, + "grad_norm": 22146.5078125, + "learning_rate": 9.40770205371394e-05, + "loss": 0.4784, + "step": 43650 + }, + { + "epoch": 0.22546576480360744, + "grad_norm": 19917.83203125, + "learning_rate": 9.405976257932667e-05, + "loss": 0.4744, + "step": 43700 + }, + { + "epoch": 0.22572373478622026, + "grad_norm": 19296.904296875, + "learning_rate": 9.404248110326583e-05, + "loss": 0.4766, + "step": 43750 + }, + { + "epoch": 0.22598170476883309, + "grad_norm": 20648.35546875, + "learning_rate": 9.402517611818142e-05, + "loss": 0.4801, + "step": 43800 + }, + { + "epoch": 0.22623967475144593, + "grad_norm": 21750.517578125, + "learning_rate": 9.40078476333105e-05, + "loss": 0.4752, + "step": 43850 + }, + { + "epoch": 0.22649764473405876, + "grad_norm": 21233.337890625, + "learning_rate": 9.399049565790266e-05, + "loss": 0.4758, + "step": 43900 + }, + { + "epoch": 0.22675561471667158, + "grad_norm": 21952.6796875, + "learning_rate": 9.397312020122006e-05, + "loss": 0.4755, + "step": 43950 + }, + { + "epoch": 0.2270135846992844, + "grad_norm": 18598.826171875, + "learning_rate": 9.39557212725374e-05, + "loss": 0.4725, + "step": 44000 + }, + { + "epoch": 0.22727155468189722, + "grad_norm": 20325.51171875, + "learning_rate": 9.393829888114188e-05, + "loss": 0.4789, + "step": 44050 + }, + { + "epoch": 0.22752952466451004, + "grad_norm": 17499.228515625, + "learning_rate": 9.392085303633323e-05, + "loss": 0.4738, + "step": 44100 + }, + { + "epoch": 0.22778749464712286, + "grad_norm": 21283.970703125, + "learning_rate": 9.39033837474237e-05, + "loss": 0.4743, + "step": 44150 + }, + { + "epoch": 0.22804546462973568, + "grad_norm": 19672.765625, + "learning_rate": 9.388589102373807e-05, + "loss": 0.4751, + "step": 44200 + }, + { + "epoch": 0.2283034346123485, + "grad_norm": 19722.314453125, + "learning_rate": 9.386837487461361e-05, + "loss": 0.4767, + "step": 44250 + }, + { + "epoch": 0.22856140459496133, + "grad_norm": 19948.154296875, + "learning_rate": 9.38508353094001e-05, + "loss": 0.4765, + "step": 44300 + }, + { + "epoch": 0.22881937457757415, + "grad_norm": 19880.611328125, + "learning_rate": 9.383327233745984e-05, + "loss": 0.4754, + "step": 44350 + }, + { + "epoch": 0.22907734456018697, + "grad_norm": 20052.91796875, + "learning_rate": 9.381568596816757e-05, + "loss": 0.4801, + "step": 44400 + }, + { + "epoch": 0.2293353145427998, + "grad_norm": 23129.869140625, + "learning_rate": 9.379807621091057e-05, + "loss": 0.4713, + "step": 44450 + }, + { + "epoch": 0.2295932845254126, + "grad_norm": 19922.0703125, + "learning_rate": 9.37804430750886e-05, + "loss": 0.4736, + "step": 44500 + }, + { + "epoch": 0.22985125450802546, + "grad_norm": 19704.24609375, + "learning_rate": 9.376278657011388e-05, + "loss": 0.4682, + "step": 44550 + }, + { + "epoch": 0.23010922449063828, + "grad_norm": 19080.125, + "learning_rate": 9.374510670541109e-05, + "loss": 0.4751, + "step": 44600 + }, + { + "epoch": 0.2303671944732511, + "grad_norm": 20858.388671875, + "learning_rate": 9.372740349041742e-05, + "loss": 0.4734, + "step": 44650 + }, + { + "epoch": 0.23062516445586392, + "grad_norm": 22074.056640625, + "learning_rate": 9.37096769345825e-05, + "loss": 0.4699, + "step": 44700 + }, + { + "epoch": 0.23088313443847674, + "grad_norm": 21852.623046875, + "learning_rate": 9.369192704736842e-05, + "loss": 0.47, + "step": 44750 + }, + { + "epoch": 0.23114110442108957, + "grad_norm": 20904.033203125, + "learning_rate": 9.367415383824974e-05, + "loss": 0.4736, + "step": 44800 + }, + { + "epoch": 0.2313990744037024, + "grad_norm": 18965.021484375, + "learning_rate": 9.365635731671343e-05, + "loss": 0.4687, + "step": 44850 + }, + { + "epoch": 0.2316570443863152, + "grad_norm": 16994.271484375, + "learning_rate": 9.363853749225894e-05, + "loss": 0.4747, + "step": 44900 + }, + { + "epoch": 0.23191501436892803, + "grad_norm": 19191.794921875, + "learning_rate": 9.362069437439814e-05, + "loss": 0.4689, + "step": 44950 + }, + { + "epoch": 0.23217298435154085, + "grad_norm": 19691.982421875, + "learning_rate": 9.360282797265537e-05, + "loss": 0.4683, + "step": 45000 + }, + { + "epoch": 0.23217298435154085, + "eval_loss": 0.4633353352546692, + "eval_runtime": 3256.5731, + "eval_samples_per_second": 952.265, + "eval_steps_per_second": 1.86, + "step": 45000 + }, + { + "epoch": 0.23243095433415367, + "grad_norm": 21778.20703125, + "learning_rate": 9.358493829656732e-05, + "loss": 0.4726, + "step": 45050 + }, + { + "epoch": 0.2326889243167665, + "grad_norm": 20281.802734375, + "learning_rate": 9.35670253556832e-05, + "loss": 0.4752, + "step": 45100 + }, + { + "epoch": 0.23294689429937931, + "grad_norm": 20620.580078125, + "learning_rate": 9.354908915956456e-05, + "loss": 0.474, + "step": 45150 + }, + { + "epoch": 0.23320486428199214, + "grad_norm": 21115.86328125, + "learning_rate": 9.353112971778542e-05, + "loss": 0.4763, + "step": 45200 + }, + { + "epoch": 0.23346283426460496, + "grad_norm": 19746.30859375, + "learning_rate": 9.351314703993215e-05, + "loss": 0.4792, + "step": 45250 + }, + { + "epoch": 0.2337208042472178, + "grad_norm": 21270.26171875, + "learning_rate": 9.349514113560358e-05, + "loss": 0.4726, + "step": 45300 + }, + { + "epoch": 0.23397877422983063, + "grad_norm": 20273.658203125, + "learning_rate": 9.347711201441092e-05, + "loss": 0.4683, + "step": 45350 + }, + { + "epoch": 0.23423674421244345, + "grad_norm": 19746.9609375, + "learning_rate": 9.345905968597773e-05, + "loss": 0.4778, + "step": 45400 + }, + { + "epoch": 0.23449471419505627, + "grad_norm": 22999.52734375, + "learning_rate": 9.344098415994003e-05, + "loss": 0.4799, + "step": 45450 + }, + { + "epoch": 0.2347526841776691, + "grad_norm": 19922.41015625, + "learning_rate": 9.342288544594617e-05, + "loss": 0.4773, + "step": 45500 + }, + { + "epoch": 0.2350106541602819, + "grad_norm": 19793.73828125, + "learning_rate": 9.340476355365688e-05, + "loss": 0.4743, + "step": 45550 + }, + { + "epoch": 0.23526862414289473, + "grad_norm": 19525.74609375, + "learning_rate": 9.33866184927453e-05, + "loss": 0.4729, + "step": 45600 + }, + { + "epoch": 0.23552659412550755, + "grad_norm": 26093.65625, + "learning_rate": 9.336845027289691e-05, + "loss": 0.4767, + "step": 45650 + }, + { + "epoch": 0.23578456410812038, + "grad_norm": 20045.16796875, + "learning_rate": 9.335025890380953e-05, + "loss": 0.4768, + "step": 45700 + }, + { + "epoch": 0.2360425340907332, + "grad_norm": 21272.36328125, + "learning_rate": 9.333204439519338e-05, + "loss": 0.4738, + "step": 45750 + }, + { + "epoch": 0.23630050407334602, + "grad_norm": 19174.44921875, + "learning_rate": 9.3313806756771e-05, + "loss": 0.4752, + "step": 45800 + }, + { + "epoch": 0.23655847405595884, + "grad_norm": 18446.640625, + "learning_rate": 9.32955459982773e-05, + "loss": 0.4747, + "step": 45850 + }, + { + "epoch": 0.23681644403857166, + "grad_norm": 23397.7109375, + "learning_rate": 9.327726212945953e-05, + "loss": 0.4723, + "step": 45900 + }, + { + "epoch": 0.23707441402118448, + "grad_norm": 20350.755859375, + "learning_rate": 9.325895516007725e-05, + "loss": 0.4671, + "step": 45950 + }, + { + "epoch": 0.23733238400379733, + "grad_norm": 21147.5546875, + "learning_rate": 9.324062509990235e-05, + "loss": 0.4689, + "step": 46000 + }, + { + "epoch": 0.23759035398641015, + "grad_norm": 19813.130859375, + "learning_rate": 9.322227195871909e-05, + "loss": 0.4723, + "step": 46050 + }, + { + "epoch": 0.23784832396902297, + "grad_norm": 22310.037109375, + "learning_rate": 9.320389574632399e-05, + "loss": 0.4727, + "step": 46100 + }, + { + "epoch": 0.2381062939516358, + "grad_norm": 19646.509765625, + "learning_rate": 9.318549647252596e-05, + "loss": 0.4723, + "step": 46150 + }, + { + "epoch": 0.23836426393424862, + "grad_norm": 20145.29296875, + "learning_rate": 9.316707414714614e-05, + "loss": 0.4652, + "step": 46200 + }, + { + "epoch": 0.23862223391686144, + "grad_norm": 19513.466796875, + "learning_rate": 9.314862878001803e-05, + "loss": 0.4774, + "step": 46250 + }, + { + "epoch": 0.23888020389947426, + "grad_norm": 20701.25390625, + "learning_rate": 9.313016038098739e-05, + "loss": 0.4721, + "step": 46300 + }, + { + "epoch": 0.23913817388208708, + "grad_norm": 18766.328125, + "learning_rate": 9.31116689599123e-05, + "loss": 0.4691, + "step": 46350 + }, + { + "epoch": 0.2393961438646999, + "grad_norm": 20925.5, + "learning_rate": 9.309315452666314e-05, + "loss": 0.4743, + "step": 46400 + }, + { + "epoch": 0.23965411384731272, + "grad_norm": 19413.0703125, + "learning_rate": 9.307461709112253e-05, + "loss": 0.469, + "step": 46450 + }, + { + "epoch": 0.23991208382992554, + "grad_norm": 18517.669921875, + "learning_rate": 9.305605666318543e-05, + "loss": 0.4769, + "step": 46500 + }, + { + "epoch": 0.24017005381253836, + "grad_norm": 20222.50390625, + "learning_rate": 9.3037473252759e-05, + "loss": 0.4701, + "step": 46550 + }, + { + "epoch": 0.24042802379515119, + "grad_norm": 21650.63671875, + "learning_rate": 9.301886686976272e-05, + "loss": 0.4693, + "step": 46600 + }, + { + "epoch": 0.240685993777764, + "grad_norm": 18923.498046875, + "learning_rate": 9.300023752412832e-05, + "loss": 0.4749, + "step": 46650 + }, + { + "epoch": 0.24094396376037686, + "grad_norm": 21353.748046875, + "learning_rate": 9.298158522579978e-05, + "loss": 0.4735, + "step": 46700 + }, + { + "epoch": 0.24120193374298968, + "grad_norm": 19405.5234375, + "learning_rate": 9.296290998473334e-05, + "loss": 0.4708, + "step": 46750 + }, + { + "epoch": 0.2414599037256025, + "grad_norm": 21692.3203125, + "learning_rate": 9.294421181089747e-05, + "loss": 0.4644, + "step": 46800 + }, + { + "epoch": 0.24171787370821532, + "grad_norm": 18488.671875, + "learning_rate": 9.292549071427291e-05, + "loss": 0.4668, + "step": 46850 + }, + { + "epoch": 0.24197584369082814, + "grad_norm": 21951.712890625, + "learning_rate": 9.29067467048526e-05, + "loss": 0.4749, + "step": 46900 + }, + { + "epoch": 0.24223381367344096, + "grad_norm": 20673.82421875, + "learning_rate": 9.288797979264176e-05, + "loss": 0.4687, + "step": 46950 + }, + { + "epoch": 0.24249178365605378, + "grad_norm": 18687.69140625, + "learning_rate": 9.286918998765776e-05, + "loss": 0.4731, + "step": 47000 + }, + { + "epoch": 0.2427497536386666, + "grad_norm": 18882.009765625, + "learning_rate": 9.285037729993027e-05, + "loss": 0.4699, + "step": 47050 + }, + { + "epoch": 0.24300772362127943, + "grad_norm": 22378.685546875, + "learning_rate": 9.283154173950112e-05, + "loss": 0.4678, + "step": 47100 + }, + { + "epoch": 0.24326569360389225, + "grad_norm": 19457.736328125, + "learning_rate": 9.281268331642439e-05, + "loss": 0.4665, + "step": 47150 + }, + { + "epoch": 0.24352366358650507, + "grad_norm": 19794.4296875, + "learning_rate": 9.279380204076631e-05, + "loss": 0.4683, + "step": 47200 + }, + { + "epoch": 0.2437816335691179, + "grad_norm": 18910.41796875, + "learning_rate": 9.277489792260536e-05, + "loss": 0.4683, + "step": 47250 + }, + { + "epoch": 0.2440396035517307, + "grad_norm": 21774.009765625, + "learning_rate": 9.275597097203216e-05, + "loss": 0.4729, + "step": 47300 + }, + { + "epoch": 0.24429757353434353, + "grad_norm": 21403.1796875, + "learning_rate": 9.273702119914962e-05, + "loss": 0.4681, + "step": 47350 + }, + { + "epoch": 0.24455554351695638, + "grad_norm": 20333.400390625, + "learning_rate": 9.271804861407269e-05, + "loss": 0.4713, + "step": 47400 + }, + { + "epoch": 0.2448135134995692, + "grad_norm": 22196.32421875, + "learning_rate": 9.269905322692862e-05, + "loss": 0.468, + "step": 47450 + }, + { + "epoch": 0.24507148348218202, + "grad_norm": 18356.623046875, + "learning_rate": 9.268003504785673e-05, + "loss": 0.4663, + "step": 47500 + }, + { + "epoch": 0.24532945346479484, + "grad_norm": 20337.546875, + "learning_rate": 9.266099408700859e-05, + "loss": 0.4657, + "step": 47550 + }, + { + "epoch": 0.24558742344740767, + "grad_norm": 20426.03515625, + "learning_rate": 9.264193035454789e-05, + "loss": 0.4677, + "step": 47600 + }, + { + "epoch": 0.2458453934300205, + "grad_norm": 20962.81640625, + "learning_rate": 9.262284386065047e-05, + "loss": 0.4759, + "step": 47650 + }, + { + "epoch": 0.2461033634126333, + "grad_norm": 20498.919921875, + "learning_rate": 9.260373461550435e-05, + "loss": 0.4647, + "step": 47700 + }, + { + "epoch": 0.24636133339524613, + "grad_norm": 21223.171875, + "learning_rate": 9.258460262930967e-05, + "loss": 0.4698, + "step": 47750 + }, + { + "epoch": 0.24661930337785895, + "grad_norm": 21146.671875, + "learning_rate": 9.256544791227871e-05, + "loss": 0.4727, + "step": 47800 + }, + { + "epoch": 0.24687727336047177, + "grad_norm": 19261.603515625, + "learning_rate": 9.254627047463588e-05, + "loss": 0.4734, + "step": 47850 + }, + { + "epoch": 0.2471352433430846, + "grad_norm": 21131.298828125, + "learning_rate": 9.252707032661774e-05, + "loss": 0.4686, + "step": 47900 + }, + { + "epoch": 0.24739321332569741, + "grad_norm": 22491.212890625, + "learning_rate": 9.250784747847294e-05, + "loss": 0.4701, + "step": 47950 + }, + { + "epoch": 0.24765118330831024, + "grad_norm": 20198.486328125, + "learning_rate": 9.248860194046228e-05, + "loss": 0.4657, + "step": 48000 + }, + { + "epoch": 0.24790915329092306, + "grad_norm": 21754.078125, + "learning_rate": 9.246933372285863e-05, + "loss": 0.4674, + "step": 48050 + }, + { + "epoch": 0.24816712327353588, + "grad_norm": 20948.244140625, + "learning_rate": 9.245004283594703e-05, + "loss": 0.4604, + "step": 48100 + }, + { + "epoch": 0.24842509325614873, + "grad_norm": 20916.3671875, + "learning_rate": 9.243072929002454e-05, + "loss": 0.4656, + "step": 48150 + }, + { + "epoch": 0.24868306323876155, + "grad_norm": 19935.021484375, + "learning_rate": 9.24113930954004e-05, + "loss": 0.4735, + "step": 48200 + }, + { + "epoch": 0.24894103322137437, + "grad_norm": 20075.96875, + "learning_rate": 9.239203426239585e-05, + "loss": 0.4679, + "step": 48250 + }, + { + "epoch": 0.2491990032039872, + "grad_norm": 20107.943359375, + "learning_rate": 9.23726528013443e-05, + "loss": 0.4773, + "step": 48300 + }, + { + "epoch": 0.2494569731866, + "grad_norm": 20341.1171875, + "learning_rate": 9.235324872259119e-05, + "loss": 0.4699, + "step": 48350 + }, + { + "epoch": 0.24971494316921283, + "grad_norm": 21787.4296875, + "learning_rate": 9.233382203649401e-05, + "loss": 0.4665, + "step": 48400 + }, + { + "epoch": 0.24997291315182565, + "grad_norm": 17707.583984375, + "learning_rate": 9.231437275342239e-05, + "loss": 0.4678, + "step": 48450 + }, + { + "epoch": 0.2502308831344385, + "grad_norm": 24467.810546875, + "learning_rate": 9.229490088375797e-05, + "loss": 0.466, + "step": 48500 + }, + { + "epoch": 0.2504888531170513, + "grad_norm": 20794.73828125, + "learning_rate": 9.227540643789446e-05, + "loss": 0.4711, + "step": 48550 + }, + { + "epoch": 0.2507468230996641, + "grad_norm": 20147.099609375, + "learning_rate": 9.225588942623758e-05, + "loss": 0.4689, + "step": 48600 + }, + { + "epoch": 0.25100479308227697, + "grad_norm": 20704.037109375, + "learning_rate": 9.223634985920517e-05, + "loss": 0.4687, + "step": 48650 + }, + { + "epoch": 0.25126276306488976, + "grad_norm": 19472.21875, + "learning_rate": 9.221678774722707e-05, + "loss": 0.4636, + "step": 48700 + }, + { + "epoch": 0.2515207330475026, + "grad_norm": 21352.755859375, + "learning_rate": 9.219720310074515e-05, + "loss": 0.4671, + "step": 48750 + }, + { + "epoch": 0.2517787030301154, + "grad_norm": 20956.146484375, + "learning_rate": 9.21775959302133e-05, + "loss": 0.4703, + "step": 48800 + }, + { + "epoch": 0.25203667301272825, + "grad_norm": 26295.541015625, + "learning_rate": 9.215796624609749e-05, + "loss": 0.4742, + "step": 48850 + }, + { + "epoch": 0.25229464299534105, + "grad_norm": 19862.15625, + "learning_rate": 9.213831405887564e-05, + "loss": 0.468, + "step": 48900 + }, + { + "epoch": 0.2525526129779539, + "grad_norm": 21760.404296875, + "learning_rate": 9.211863937903769e-05, + "loss": 0.4728, + "step": 48950 + }, + { + "epoch": 0.2528105829605667, + "grad_norm": 22488.1484375, + "learning_rate": 9.209894221708564e-05, + "loss": 0.4627, + "step": 49000 + }, + { + "epoch": 0.25306855294317954, + "grad_norm": 20244.5, + "learning_rate": 9.20792225835334e-05, + "loss": 0.4706, + "step": 49050 + }, + { + "epoch": 0.25332652292579233, + "grad_norm": 22642.44140625, + "learning_rate": 9.205948048890698e-05, + "loss": 0.4708, + "step": 49100 + }, + { + "epoch": 0.2535844929084052, + "grad_norm": 23121.501953125, + "learning_rate": 9.203971594374432e-05, + "loss": 0.4723, + "step": 49150 + }, + { + "epoch": 0.25384246289101803, + "grad_norm": 19514.916015625, + "learning_rate": 9.201992895859532e-05, + "loss": 0.4692, + "step": 49200 + }, + { + "epoch": 0.2541004328736308, + "grad_norm": 19467.662109375, + "learning_rate": 9.200011954402193e-05, + "loss": 0.4719, + "step": 49250 + }, + { + "epoch": 0.25435840285624367, + "grad_norm": 20737.7578125, + "learning_rate": 9.198028771059799e-05, + "loss": 0.4643, + "step": 49300 + }, + { + "epoch": 0.25461637283885646, + "grad_norm": 20229.341796875, + "learning_rate": 9.196043346890939e-05, + "loss": 0.462, + "step": 49350 + }, + { + "epoch": 0.2548743428214693, + "grad_norm": 23094.35546875, + "learning_rate": 9.194055682955392e-05, + "loss": 0.4701, + "step": 49400 + }, + { + "epoch": 0.2551323128040821, + "grad_norm": 21099.541015625, + "learning_rate": 9.192065780314132e-05, + "loss": 0.466, + "step": 49450 + }, + { + "epoch": 0.25539028278669496, + "grad_norm": 21500.302734375, + "learning_rate": 9.190073640029335e-05, + "loss": 0.4703, + "step": 49500 + }, + { + "epoch": 0.25564825276930775, + "grad_norm": 24272.228515625, + "learning_rate": 9.188079263164366e-05, + "loss": 0.4672, + "step": 49550 + }, + { + "epoch": 0.2559062227519206, + "grad_norm": 21129.013671875, + "learning_rate": 9.186082650783783e-05, + "loss": 0.4715, + "step": 49600 + }, + { + "epoch": 0.2561641927345334, + "grad_norm": 20696.32421875, + "learning_rate": 9.184083803953339e-05, + "loss": 0.4646, + "step": 49650 + }, + { + "epoch": 0.25642216271714624, + "grad_norm": 20142.7890625, + "learning_rate": 9.18208272373998e-05, + "loss": 0.4627, + "step": 49700 + }, + { + "epoch": 0.25668013269975903, + "grad_norm": 18810.43359375, + "learning_rate": 9.180079411211847e-05, + "loss": 0.4659, + "step": 49750 + }, + { + "epoch": 0.2569381026823719, + "grad_norm": 23121.84765625, + "learning_rate": 9.178073867438264e-05, + "loss": 0.4683, + "step": 49800 + }, + { + "epoch": 0.2571960726649847, + "grad_norm": 20432.021484375, + "learning_rate": 9.176066093489755e-05, + "loss": 0.4704, + "step": 49850 + }, + { + "epoch": 0.2574540426475975, + "grad_norm": 22056.09765625, + "learning_rate": 9.17405609043803e-05, + "loss": 0.4753, + "step": 49900 + }, + { + "epoch": 0.2577120126302104, + "grad_norm": 21094.931640625, + "learning_rate": 9.17204385935599e-05, + "loss": 0.4648, + "step": 49950 + }, + { + "epoch": 0.25796998261282317, + "grad_norm": 20127.525390625, + "learning_rate": 9.170029401317725e-05, + "loss": 0.4646, + "step": 50000 + }, + { + "epoch": 0.25796998261282317, + "eval_loss": 0.4567689299583435, + "eval_runtime": 3268.0543, + "eval_samples_per_second": 948.919, + "eval_steps_per_second": 1.853, + "step": 50000 + }, + { + "epoch": 0.258227952595436, + "grad_norm": 20947.306640625, + "learning_rate": 9.168012717398516e-05, + "loss": 0.4688, + "step": 50050 + }, + { + "epoch": 0.2584859225780488, + "grad_norm": 23591.646484375, + "learning_rate": 9.165993808674823e-05, + "loss": 0.4683, + "step": 50100 + }, + { + "epoch": 0.25874389256066166, + "grad_norm": 21227.677734375, + "learning_rate": 9.163972676224306e-05, + "loss": 0.4671, + "step": 50150 + }, + { + "epoch": 0.25900186254327445, + "grad_norm": 20084.953125, + "learning_rate": 9.161949321125807e-05, + "loss": 0.4598, + "step": 50200 + }, + { + "epoch": 0.2592598325258873, + "grad_norm": 21139.5, + "learning_rate": 9.159923744459349e-05, + "loss": 0.4707, + "step": 50250 + }, + { + "epoch": 0.2595178025085001, + "grad_norm": 20410.794921875, + "learning_rate": 9.15789594730615e-05, + "loss": 0.4675, + "step": 50300 + }, + { + "epoch": 0.25977577249111294, + "grad_norm": 20010.328125, + "learning_rate": 9.155865930748608e-05, + "loss": 0.4599, + "step": 50350 + }, + { + "epoch": 0.26003374247372574, + "grad_norm": 23502.890625, + "learning_rate": 9.153833695870304e-05, + "loss": 0.4664, + "step": 50400 + }, + { + "epoch": 0.2602917124563386, + "grad_norm": 20373.498046875, + "learning_rate": 9.151799243756008e-05, + "loss": 0.4655, + "step": 50450 + }, + { + "epoch": 0.2605496824389514, + "grad_norm": 21093.669921875, + "learning_rate": 9.149762575491671e-05, + "loss": 0.4623, + "step": 50500 + }, + { + "epoch": 0.26080765242156423, + "grad_norm": 22206.87890625, + "learning_rate": 9.147723692164427e-05, + "loss": 0.4687, + "step": 50550 + }, + { + "epoch": 0.261065622404177, + "grad_norm": 23264.875, + "learning_rate": 9.145682594862593e-05, + "loss": 0.4705, + "step": 50600 + }, + { + "epoch": 0.26132359238678987, + "grad_norm": 22029.849609375, + "learning_rate": 9.143639284675664e-05, + "loss": 0.4673, + "step": 50650 + }, + { + "epoch": 0.2615815623694027, + "grad_norm": 23016.955078125, + "learning_rate": 9.141593762694323e-05, + "loss": 0.4663, + "step": 50700 + }, + { + "epoch": 0.2618395323520155, + "grad_norm": 21590.80859375, + "learning_rate": 9.139546030010427e-05, + "loss": 0.4684, + "step": 50750 + }, + { + "epoch": 0.26209750233462836, + "grad_norm": 19839.986328125, + "learning_rate": 9.13749608771702e-05, + "loss": 0.4682, + "step": 50800 + }, + { + "epoch": 0.26235547231724116, + "grad_norm": 17922.802734375, + "learning_rate": 9.135443936908318e-05, + "loss": 0.4601, + "step": 50850 + }, + { + "epoch": 0.262613442299854, + "grad_norm": 21141.119140625, + "learning_rate": 9.133389578679723e-05, + "loss": 0.467, + "step": 50900 + }, + { + "epoch": 0.2628714122824668, + "grad_norm": 21858.158203125, + "learning_rate": 9.131333014127806e-05, + "loss": 0.4663, + "step": 50950 + }, + { + "epoch": 0.26312938226507965, + "grad_norm": 21516.46875, + "learning_rate": 9.129274244350326e-05, + "loss": 0.4656, + "step": 51000 + }, + { + "epoch": 0.26338735224769244, + "grad_norm": 21403.263671875, + "learning_rate": 9.127213270446213e-05, + "loss": 0.4717, + "step": 51050 + }, + { + "epoch": 0.2636453222303053, + "grad_norm": 20405.4296875, + "learning_rate": 9.125150093515575e-05, + "loss": 0.4656, + "step": 51100 + }, + { + "epoch": 0.2639032922129181, + "grad_norm": 21057.57421875, + "learning_rate": 9.123084714659698e-05, + "loss": 0.4655, + "step": 51150 + }, + { + "epoch": 0.26416126219553093, + "grad_norm": 19891.15234375, + "learning_rate": 9.121017134981036e-05, + "loss": 0.4706, + "step": 51200 + }, + { + "epoch": 0.2644192321781437, + "grad_norm": 20441.30078125, + "learning_rate": 9.118947355583228e-05, + "loss": 0.4707, + "step": 51250 + }, + { + "epoch": 0.2646772021607566, + "grad_norm": 22182.67578125, + "learning_rate": 9.11687537757108e-05, + "loss": 0.4633, + "step": 51300 + }, + { + "epoch": 0.2649351721433694, + "grad_norm": 18211.728515625, + "learning_rate": 9.114801202050574e-05, + "loss": 0.4677, + "step": 51350 + }, + { + "epoch": 0.2651931421259822, + "grad_norm": 20691.697265625, + "learning_rate": 9.112724830128865e-05, + "loss": 0.4634, + "step": 51400 + }, + { + "epoch": 0.26545111210859507, + "grad_norm": 19717.75390625, + "learning_rate": 9.110646262914279e-05, + "loss": 0.4647, + "step": 51450 + }, + { + "epoch": 0.26570908209120786, + "grad_norm": 19860.55078125, + "learning_rate": 9.108565501516318e-05, + "loss": 0.4665, + "step": 51500 + }, + { + "epoch": 0.2659670520738207, + "grad_norm": 20122.984375, + "learning_rate": 9.106482547045648e-05, + "loss": 0.4663, + "step": 51550 + }, + { + "epoch": 0.2662250220564335, + "grad_norm": 21214.724609375, + "learning_rate": 9.104397400614112e-05, + "loss": 0.4676, + "step": 51600 + }, + { + "epoch": 0.26648299203904635, + "grad_norm": 24545.041015625, + "learning_rate": 9.102310063334722e-05, + "loss": 0.4705, + "step": 51650 + }, + { + "epoch": 0.26674096202165914, + "grad_norm": 22479.380859375, + "learning_rate": 9.100220536321655e-05, + "loss": 0.4616, + "step": 51700 + }, + { + "epoch": 0.266998932004272, + "grad_norm": 20262.27734375, + "learning_rate": 9.098128820690264e-05, + "loss": 0.4569, + "step": 51750 + }, + { + "epoch": 0.2672569019868848, + "grad_norm": 20906.880859375, + "learning_rate": 9.096034917557062e-05, + "loss": 0.468, + "step": 51800 + }, + { + "epoch": 0.26751487196949764, + "grad_norm": 20986.455078125, + "learning_rate": 9.093938828039737e-05, + "loss": 0.4697, + "step": 51850 + }, + { + "epoch": 0.26777284195211043, + "grad_norm": 22425.681640625, + "learning_rate": 9.09184055325714e-05, + "loss": 0.4692, + "step": 51900 + }, + { + "epoch": 0.2680308119347233, + "grad_norm": 21817.744140625, + "learning_rate": 9.089740094329288e-05, + "loss": 0.4726, + "step": 51950 + }, + { + "epoch": 0.26828878191733607, + "grad_norm": 20527.017578125, + "learning_rate": 9.087637452377369e-05, + "loss": 0.459, + "step": 52000 + }, + { + "epoch": 0.2685467518999489, + "grad_norm": 24486.521484375, + "learning_rate": 9.08553262852373e-05, + "loss": 0.4624, + "step": 52050 + }, + { + "epoch": 0.26880472188256177, + "grad_norm": 20964.537109375, + "learning_rate": 9.083425623891885e-05, + "loss": 0.4657, + "step": 52100 + }, + { + "epoch": 0.26906269186517456, + "grad_norm": 20966.478515625, + "learning_rate": 9.081316439606513e-05, + "loss": 0.4723, + "step": 52150 + }, + { + "epoch": 0.2693206618477874, + "grad_norm": 20067.330078125, + "learning_rate": 9.079205076793457e-05, + "loss": 0.4644, + "step": 52200 + }, + { + "epoch": 0.2695786318304002, + "grad_norm": 21526.298828125, + "learning_rate": 9.077091536579719e-05, + "loss": 0.4602, + "step": 52250 + }, + { + "epoch": 0.26983660181301306, + "grad_norm": 20446.767578125, + "learning_rate": 9.074975820093468e-05, + "loss": 0.4671, + "step": 52300 + }, + { + "epoch": 0.27009457179562585, + "grad_norm": 19936.599609375, + "learning_rate": 9.072857928464029e-05, + "loss": 0.4626, + "step": 52350 + }, + { + "epoch": 0.2703525417782387, + "grad_norm": 21716.60546875, + "learning_rate": 9.070737862821896e-05, + "loss": 0.4642, + "step": 52400 + }, + { + "epoch": 0.2706105117608515, + "grad_norm": 17588.40625, + "learning_rate": 9.068615624298717e-05, + "loss": 0.4595, + "step": 52450 + }, + { + "epoch": 0.27086848174346434, + "grad_norm": 21721.138671875, + "learning_rate": 9.066491214027302e-05, + "loss": 0.4639, + "step": 52500 + }, + { + "epoch": 0.27112645172607713, + "grad_norm": 19480.875, + "learning_rate": 9.06436463314162e-05, + "loss": 0.4654, + "step": 52550 + }, + { + "epoch": 0.27138442170869, + "grad_norm": 22658.076171875, + "learning_rate": 9.062235882776797e-05, + "loss": 0.4653, + "step": 52600 + }, + { + "epoch": 0.2716423916913028, + "grad_norm": 22396.4140625, + "learning_rate": 9.060104964069121e-05, + "loss": 0.4634, + "step": 52650 + }, + { + "epoch": 0.2719003616739156, + "grad_norm": 22354.28125, + "learning_rate": 9.057971878156036e-05, + "loss": 0.4626, + "step": 52700 + }, + { + "epoch": 0.2721583316565285, + "grad_norm": 19845.22265625, + "learning_rate": 9.05583662617614e-05, + "loss": 0.4666, + "step": 52750 + }, + { + "epoch": 0.27241630163914127, + "grad_norm": 19933.978515625, + "learning_rate": 9.053699209269188e-05, + "loss": 0.4601, + "step": 52800 + }, + { + "epoch": 0.2726742716217541, + "grad_norm": 21288.86328125, + "learning_rate": 9.051559628576094e-05, + "loss": 0.4622, + "step": 52850 + }, + { + "epoch": 0.2729322416043669, + "grad_norm": 20604.05078125, + "learning_rate": 9.049417885238927e-05, + "loss": 0.4618, + "step": 52900 + }, + { + "epoch": 0.27319021158697976, + "grad_norm": 18641.544921875, + "learning_rate": 9.047273980400903e-05, + "loss": 0.46, + "step": 52950 + }, + { + "epoch": 0.27344818156959255, + "grad_norm": 22482.8125, + "learning_rate": 9.045127915206398e-05, + "loss": 0.4673, + "step": 53000 + }, + { + "epoch": 0.2737061515522054, + "grad_norm": 20967.9375, + "learning_rate": 9.042979690800943e-05, + "loss": 0.4607, + "step": 53050 + }, + { + "epoch": 0.2739641215348182, + "grad_norm": 22371.90234375, + "learning_rate": 9.040829308331216e-05, + "loss": 0.4624, + "step": 53100 + }, + { + "epoch": 0.27422209151743104, + "grad_norm": 19802.947265625, + "learning_rate": 9.03867676894505e-05, + "loss": 0.4542, + "step": 53150 + }, + { + "epoch": 0.27448006150004384, + "grad_norm": 21255.974609375, + "learning_rate": 9.03652207379143e-05, + "loss": 0.4636, + "step": 53200 + }, + { + "epoch": 0.2747380314826567, + "grad_norm": 21687.16796875, + "learning_rate": 9.034365224020489e-05, + "loss": 0.4626, + "step": 53250 + }, + { + "epoch": 0.2749960014652695, + "grad_norm": 21386.275390625, + "learning_rate": 9.032206220783512e-05, + "loss": 0.4659, + "step": 53300 + }, + { + "epoch": 0.27525397144788233, + "grad_norm": 19433.888671875, + "learning_rate": 9.030045065232935e-05, + "loss": 0.4585, + "step": 53350 + }, + { + "epoch": 0.2755119414304951, + "grad_norm": 20615.021484375, + "learning_rate": 9.027881758522339e-05, + "loss": 0.4619, + "step": 53400 + }, + { + "epoch": 0.27576991141310797, + "grad_norm": 20498.369140625, + "learning_rate": 9.025716301806454e-05, + "loss": 0.4658, + "step": 53450 + }, + { + "epoch": 0.2760278813957208, + "grad_norm": 20348.955078125, + "learning_rate": 9.023548696241162e-05, + "loss": 0.4637, + "step": 53500 + }, + { + "epoch": 0.2762858513783336, + "grad_norm": 18524.3203125, + "learning_rate": 9.021378942983487e-05, + "loss": 0.4636, + "step": 53550 + }, + { + "epoch": 0.27654382136094646, + "grad_norm": 20778.064453125, + "learning_rate": 9.019207043191602e-05, + "loss": 0.4604, + "step": 53600 + }, + { + "epoch": 0.27680179134355926, + "grad_norm": 19481.369140625, + "learning_rate": 9.017032998024823e-05, + "loss": 0.4629, + "step": 53650 + }, + { + "epoch": 0.2770597613261721, + "grad_norm": 20873.8515625, + "learning_rate": 9.014856808643617e-05, + "loss": 0.4647, + "step": 53700 + }, + { + "epoch": 0.2773177313087849, + "grad_norm": 21859.05078125, + "learning_rate": 9.012678476209591e-05, + "loss": 0.4621, + "step": 53750 + }, + { + "epoch": 0.27757570129139775, + "grad_norm": 20832.587890625, + "learning_rate": 9.010498001885492e-05, + "loss": 0.463, + "step": 53800 + }, + { + "epoch": 0.27783367127401054, + "grad_norm": 18435.703125, + "learning_rate": 9.00831538683522e-05, + "loss": 0.466, + "step": 53850 + }, + { + "epoch": 0.2780916412566234, + "grad_norm": 21496.61328125, + "learning_rate": 9.006130632223811e-05, + "loss": 0.4611, + "step": 53900 + }, + { + "epoch": 0.2783496112392362, + "grad_norm": 21796.873046875, + "learning_rate": 9.003943739217444e-05, + "loss": 0.4587, + "step": 53950 + }, + { + "epoch": 0.27860758122184903, + "grad_norm": 21053.099609375, + "learning_rate": 9.001754708983443e-05, + "loss": 0.4659, + "step": 54000 + }, + { + "epoch": 0.2788655512044618, + "grad_norm": 20332.98828125, + "learning_rate": 8.999563542690266e-05, + "loss": 0.4586, + "step": 54050 + }, + { + "epoch": 0.2791235211870747, + "grad_norm": 19829.93359375, + "learning_rate": 8.997370241507516e-05, + "loss": 0.4608, + "step": 54100 + }, + { + "epoch": 0.27938149116968747, + "grad_norm": 21215.3515625, + "learning_rate": 8.995174806605937e-05, + "loss": 0.4672, + "step": 54150 + }, + { + "epoch": 0.2796394611523003, + "grad_norm": 19068.890625, + "learning_rate": 8.992977239157408e-05, + "loss": 0.4637, + "step": 54200 + }, + { + "epoch": 0.27989743113491317, + "grad_norm": 20632.857421875, + "learning_rate": 8.99077754033495e-05, + "loss": 0.4615, + "step": 54250 + }, + { + "epoch": 0.28015540111752596, + "grad_norm": 20244.943359375, + "learning_rate": 8.988575711312714e-05, + "loss": 0.4665, + "step": 54300 + }, + { + "epoch": 0.2804133711001388, + "grad_norm": 21873.34375, + "learning_rate": 8.986371753266001e-05, + "loss": 0.4636, + "step": 54350 + }, + { + "epoch": 0.2806713410827516, + "grad_norm": 18075.001953125, + "learning_rate": 8.984165667371236e-05, + "loss": 0.4626, + "step": 54400 + }, + { + "epoch": 0.28092931106536445, + "grad_norm": 19815.0546875, + "learning_rate": 8.981957454805987e-05, + "loss": 0.4535, + "step": 54450 + }, + { + "epoch": 0.28118728104797724, + "grad_norm": 22713.48046875, + "learning_rate": 8.979747116748955e-05, + "loss": 0.4592, + "step": 54500 + }, + { + "epoch": 0.2814452510305901, + "grad_norm": 23360.1953125, + "learning_rate": 8.977534654379976e-05, + "loss": 0.4646, + "step": 54550 + }, + { + "epoch": 0.2817032210132029, + "grad_norm": 21626.36328125, + "learning_rate": 8.975320068880018e-05, + "loss": 0.4644, + "step": 54600 + }, + { + "epoch": 0.28196119099581574, + "grad_norm": 20061.873046875, + "learning_rate": 8.973103361431184e-05, + "loss": 0.4674, + "step": 54650 + }, + { + "epoch": 0.28221916097842853, + "grad_norm": 21295.0625, + "learning_rate": 8.970884533216713e-05, + "loss": 0.4674, + "step": 54700 + }, + { + "epoch": 0.2824771309610414, + "grad_norm": 19434.23828125, + "learning_rate": 8.968663585420967e-05, + "loss": 0.46, + "step": 54750 + }, + { + "epoch": 0.28273510094365417, + "grad_norm": 23654.849609375, + "learning_rate": 8.966440519229449e-05, + "loss": 0.4649, + "step": 54800 + }, + { + "epoch": 0.282993070926267, + "grad_norm": 22763.603515625, + "learning_rate": 8.964215335828787e-05, + "loss": 0.4578, + "step": 54850 + }, + { + "epoch": 0.28325104090887987, + "grad_norm": 23262.849609375, + "learning_rate": 8.961988036406741e-05, + "loss": 0.4674, + "step": 54900 + }, + { + "epoch": 0.28350901089149266, + "grad_norm": 20148.380859375, + "learning_rate": 8.959758622152201e-05, + "loss": 0.4642, + "step": 54950 + }, + { + "epoch": 0.2837669808741055, + "grad_norm": 22515.548828125, + "learning_rate": 8.957527094255186e-05, + "loss": 0.4697, + "step": 55000 + }, + { + "epoch": 0.2837669808741055, + "eval_loss": 0.4508056044578552, + "eval_runtime": 3347.9938, + "eval_samples_per_second": 926.262, + "eval_steps_per_second": 1.809, + "step": 55000 + }, + { + "epoch": 0.2840249508567183, + "grad_norm": 21158.09375, + "learning_rate": 8.95529345390684e-05, + "loss": 0.4617, + "step": 55050 + }, + { + "epoch": 0.28428292083933115, + "grad_norm": 20892.517578125, + "learning_rate": 8.953057702299437e-05, + "loss": 0.4612, + "step": 55100 + }, + { + "epoch": 0.28454089082194395, + "grad_norm": 21489.740234375, + "learning_rate": 8.950819840626381e-05, + "loss": 0.4578, + "step": 55150 + }, + { + "epoch": 0.2847988608045568, + "grad_norm": 20703.072265625, + "learning_rate": 8.948579870082197e-05, + "loss": 0.4632, + "step": 55200 + }, + { + "epoch": 0.2850568307871696, + "grad_norm": 21731.775390625, + "learning_rate": 8.946337791862537e-05, + "loss": 0.4621, + "step": 55250 + }, + { + "epoch": 0.28531480076978244, + "grad_norm": 24507.076171875, + "learning_rate": 8.94409360716418e-05, + "loss": 0.4542, + "step": 55300 + }, + { + "epoch": 0.28557277075239523, + "grad_norm": 20686.79296875, + "learning_rate": 8.94184731718503e-05, + "loss": 0.4575, + "step": 55350 + }, + { + "epoch": 0.2858307407350081, + "grad_norm": 20055.396484375, + "learning_rate": 8.93959892312411e-05, + "loss": 0.4595, + "step": 55400 + }, + { + "epoch": 0.2860887107176209, + "grad_norm": 21203.28515625, + "learning_rate": 8.93734842618157e-05, + "loss": 0.457, + "step": 55450 + }, + { + "epoch": 0.2863466807002337, + "grad_norm": 21738.6328125, + "learning_rate": 8.935095827558684e-05, + "loss": 0.4639, + "step": 55500 + }, + { + "epoch": 0.2866046506828465, + "grad_norm": 21593.056640625, + "learning_rate": 8.932841128457844e-05, + "loss": 0.4566, + "step": 55550 + }, + { + "epoch": 0.28686262066545937, + "grad_norm": 20362.564453125, + "learning_rate": 8.930584330082564e-05, + "loss": 0.4613, + "step": 55600 + }, + { + "epoch": 0.2871205906480722, + "grad_norm": 20415.390625, + "learning_rate": 8.928325433637482e-05, + "loss": 0.4591, + "step": 55650 + }, + { + "epoch": 0.287378560630685, + "grad_norm": 21615.1953125, + "learning_rate": 8.926064440328348e-05, + "loss": 0.4645, + "step": 55700 + }, + { + "epoch": 0.28763653061329786, + "grad_norm": 19537.873046875, + "learning_rate": 8.92380135136204e-05, + "loss": 0.4595, + "step": 55750 + }, + { + "epoch": 0.28789450059591065, + "grad_norm": 21288.21484375, + "learning_rate": 8.921536167946552e-05, + "loss": 0.4565, + "step": 55800 + }, + { + "epoch": 0.2881524705785235, + "grad_norm": 25019.783203125, + "learning_rate": 8.919268891290992e-05, + "loss": 0.4635, + "step": 55850 + }, + { + "epoch": 0.2884104405611363, + "grad_norm": 23099.5625, + "learning_rate": 8.916999522605592e-05, + "loss": 0.4561, + "step": 55900 + }, + { + "epoch": 0.28866841054374914, + "grad_norm": 22477.849609375, + "learning_rate": 8.914728063101694e-05, + "loss": 0.458, + "step": 55950 + }, + { + "epoch": 0.28892638052636194, + "grad_norm": 19823.103515625, + "learning_rate": 8.91245451399176e-05, + "loss": 0.457, + "step": 56000 + }, + { + "epoch": 0.2891843505089748, + "grad_norm": 20293.353515625, + "learning_rate": 8.910178876489368e-05, + "loss": 0.4614, + "step": 56050 + }, + { + "epoch": 0.2894423204915876, + "grad_norm": 19020.892578125, + "learning_rate": 8.907901151809205e-05, + "loss": 0.4597, + "step": 56100 + }, + { + "epoch": 0.28970029047420043, + "grad_norm": 20133.603515625, + "learning_rate": 8.905621341167082e-05, + "loss": 0.4577, + "step": 56150 + }, + { + "epoch": 0.2899582604568132, + "grad_norm": 21008.95703125, + "learning_rate": 8.903339445779915e-05, + "loss": 0.4596, + "step": 56200 + }, + { + "epoch": 0.29021623043942607, + "grad_norm": 21339.892578125, + "learning_rate": 8.901055466865735e-05, + "loss": 0.4631, + "step": 56250 + }, + { + "epoch": 0.29047420042203886, + "grad_norm": 20088.455078125, + "learning_rate": 8.898769405643686e-05, + "loss": 0.4571, + "step": 56300 + }, + { + "epoch": 0.2907321704046517, + "grad_norm": 21779.341796875, + "learning_rate": 8.896481263334023e-05, + "loss": 0.4541, + "step": 56350 + }, + { + "epoch": 0.29099014038726456, + "grad_norm": 24433.103515625, + "learning_rate": 8.894191041158113e-05, + "loss": 0.4627, + "step": 56400 + }, + { + "epoch": 0.29124811036987736, + "grad_norm": 22214.70703125, + "learning_rate": 8.891898740338432e-05, + "loss": 0.4585, + "step": 56450 + }, + { + "epoch": 0.2915060803524902, + "grad_norm": 20558.955078125, + "learning_rate": 8.889604362098567e-05, + "loss": 0.4547, + "step": 56500 + }, + { + "epoch": 0.291764050335103, + "grad_norm": 22438.3828125, + "learning_rate": 8.88730790766321e-05, + "loss": 0.4581, + "step": 56550 + }, + { + "epoch": 0.29202202031771585, + "grad_norm": 22429.658203125, + "learning_rate": 8.885009378258164e-05, + "loss": 0.4556, + "step": 56600 + }, + { + "epoch": 0.29227999030032864, + "grad_norm": 18076.814453125, + "learning_rate": 8.882708775110342e-05, + "loss": 0.4571, + "step": 56650 + }, + { + "epoch": 0.2925379602829415, + "grad_norm": 19816.873046875, + "learning_rate": 8.88040609944776e-05, + "loss": 0.4584, + "step": 56700 + }, + { + "epoch": 0.2927959302655543, + "grad_norm": 20448.5234375, + "learning_rate": 8.878101352499542e-05, + "loss": 0.4575, + "step": 56750 + }, + { + "epoch": 0.29305390024816713, + "grad_norm": 19950.4609375, + "learning_rate": 8.875794535495915e-05, + "loss": 0.4558, + "step": 56800 + }, + { + "epoch": 0.2933118702307799, + "grad_norm": 20185.0625, + "learning_rate": 8.873485649668218e-05, + "loss": 0.4523, + "step": 56850 + }, + { + "epoch": 0.2935698402133928, + "grad_norm": 22338.080078125, + "learning_rate": 8.871174696248888e-05, + "loss": 0.4648, + "step": 56900 + }, + { + "epoch": 0.29382781019600557, + "grad_norm": 22531.541015625, + "learning_rate": 8.868861676471463e-05, + "loss": 0.4628, + "step": 56950 + }, + { + "epoch": 0.2940857801786184, + "grad_norm": 19558.10546875, + "learning_rate": 8.866546591570592e-05, + "loss": 0.4565, + "step": 57000 + }, + { + "epoch": 0.29434375016123127, + "grad_norm": 20166.33203125, + "learning_rate": 8.864229442782023e-05, + "loss": 0.4527, + "step": 57050 + }, + { + "epoch": 0.29460172014384406, + "grad_norm": 20262.185546875, + "learning_rate": 8.861910231342603e-05, + "loss": 0.4575, + "step": 57100 + }, + { + "epoch": 0.2948596901264569, + "grad_norm": 19107.080078125, + "learning_rate": 8.859588958490283e-05, + "loss": 0.4564, + "step": 57150 + }, + { + "epoch": 0.2951176601090697, + "grad_norm": 19690.37109375, + "learning_rate": 8.857265625464113e-05, + "loss": 0.4576, + "step": 57200 + }, + { + "epoch": 0.29537563009168255, + "grad_norm": 21793.189453125, + "learning_rate": 8.854940233504245e-05, + "loss": 0.4616, + "step": 57250 + }, + { + "epoch": 0.29563360007429534, + "grad_norm": 21543.033203125, + "learning_rate": 8.852612783851926e-05, + "loss": 0.4559, + "step": 57300 + }, + { + "epoch": 0.2958915700569082, + "grad_norm": 21455.56640625, + "learning_rate": 8.850283277749504e-05, + "loss": 0.4583, + "step": 57350 + }, + { + "epoch": 0.296149540039521, + "grad_norm": 21236.935546875, + "learning_rate": 8.847951716440426e-05, + "loss": 0.46, + "step": 57400 + }, + { + "epoch": 0.29640751002213384, + "grad_norm": 22411.130859375, + "learning_rate": 8.845618101169232e-05, + "loss": 0.4563, + "step": 57450 + }, + { + "epoch": 0.29666548000474663, + "grad_norm": 19269.26171875, + "learning_rate": 8.843282433181561e-05, + "loss": 0.4634, + "step": 57500 + }, + { + "epoch": 0.2969234499873595, + "grad_norm": 22179.669921875, + "learning_rate": 8.840944713724149e-05, + "loss": 0.4582, + "step": 57550 + }, + { + "epoch": 0.29718141996997227, + "grad_norm": 19867.076171875, + "learning_rate": 8.838604944044825e-05, + "loss": 0.4591, + "step": 57600 + }, + { + "epoch": 0.2974393899525851, + "grad_norm": 19806.09375, + "learning_rate": 8.836263125392511e-05, + "loss": 0.4571, + "step": 57650 + }, + { + "epoch": 0.2976973599351979, + "grad_norm": 21762.22265625, + "learning_rate": 8.833919259017225e-05, + "loss": 0.4526, + "step": 57700 + }, + { + "epoch": 0.29795532991781076, + "grad_norm": 21031.263671875, + "learning_rate": 8.83157334617008e-05, + "loss": 0.4577, + "step": 57750 + }, + { + "epoch": 0.2982132999004236, + "grad_norm": 22886.556640625, + "learning_rate": 8.829225388103276e-05, + "loss": 0.4553, + "step": 57800 + }, + { + "epoch": 0.2984712698830364, + "grad_norm": 19710.173828125, + "learning_rate": 8.826875386070108e-05, + "loss": 0.4556, + "step": 57850 + }, + { + "epoch": 0.29872923986564925, + "grad_norm": 20607.244140625, + "learning_rate": 8.824523341324963e-05, + "loss": 0.458, + "step": 57900 + }, + { + "epoch": 0.29898720984826205, + "grad_norm": 20672.05859375, + "learning_rate": 8.822169255123317e-05, + "loss": 0.4531, + "step": 57950 + }, + { + "epoch": 0.2992451798308749, + "grad_norm": 21375.76953125, + "learning_rate": 8.819813128721732e-05, + "loss": 0.4602, + "step": 58000 + }, + { + "epoch": 0.2995031498134877, + "grad_norm": 20848.328125, + "learning_rate": 8.817454963377865e-05, + "loss": 0.4557, + "step": 58050 + }, + { + "epoch": 0.29976111979610054, + "grad_norm": 20778.619140625, + "learning_rate": 8.81509476035046e-05, + "loss": 0.4588, + "step": 58100 + }, + { + "epoch": 0.30001908977871333, + "grad_norm": 19791.296875, + "learning_rate": 8.812732520899347e-05, + "loss": 0.4609, + "step": 58150 + }, + { + "epoch": 0.3002770597613262, + "grad_norm": 21814.482421875, + "learning_rate": 8.810368246285445e-05, + "loss": 0.4597, + "step": 58200 + }, + { + "epoch": 0.300535029743939, + "grad_norm": 22417.65625, + "learning_rate": 8.808001937770755e-05, + "loss": 0.461, + "step": 58250 + }, + { + "epoch": 0.3007929997265518, + "grad_norm": 21347.53515625, + "learning_rate": 8.80563359661837e-05, + "loss": 0.4523, + "step": 58300 + }, + { + "epoch": 0.3010509697091646, + "grad_norm": 21612.689453125, + "learning_rate": 8.803263224092461e-05, + "loss": 0.4588, + "step": 58350 + }, + { + "epoch": 0.30130893969177747, + "grad_norm": 19139.7109375, + "learning_rate": 8.80089082145829e-05, + "loss": 0.4576, + "step": 58400 + }, + { + "epoch": 0.3015669096743903, + "grad_norm": 21629.78125, + "learning_rate": 8.798516389982197e-05, + "loss": 0.4514, + "step": 58450 + }, + { + "epoch": 0.3018248796570031, + "grad_norm": 20307.630859375, + "learning_rate": 8.79613993093161e-05, + "loss": 0.4606, + "step": 58500 + }, + { + "epoch": 0.30208284963961596, + "grad_norm": 17832.3359375, + "learning_rate": 8.793761445575037e-05, + "loss": 0.4654, + "step": 58550 + }, + { + "epoch": 0.30234081962222875, + "grad_norm": 19975.20703125, + "learning_rate": 8.791380935182065e-05, + "loss": 0.4519, + "step": 58600 + }, + { + "epoch": 0.3025987896048416, + "grad_norm": 23387.681640625, + "learning_rate": 8.788998401023365e-05, + "loss": 0.4576, + "step": 58650 + }, + { + "epoch": 0.3028567595874544, + "grad_norm": 18704.669921875, + "learning_rate": 8.78661384437069e-05, + "loss": 0.4634, + "step": 58700 + }, + { + "epoch": 0.30311472957006724, + "grad_norm": 21739.806640625, + "learning_rate": 8.784227266496868e-05, + "loss": 0.4471, + "step": 58750 + }, + { + "epoch": 0.30337269955268004, + "grad_norm": 22190.74609375, + "learning_rate": 8.781838668675806e-05, + "loss": 0.4508, + "step": 58800 + }, + { + "epoch": 0.3036306695352929, + "grad_norm": 19186.9609375, + "learning_rate": 8.779448052182495e-05, + "loss": 0.4575, + "step": 58850 + }, + { + "epoch": 0.3038886395179057, + "grad_norm": 21925.8984375, + "learning_rate": 8.777055418293e-05, + "loss": 0.4614, + "step": 58900 + }, + { + "epoch": 0.3041466095005185, + "grad_norm": 21280.16796875, + "learning_rate": 8.774660768284459e-05, + "loss": 0.4621, + "step": 58950 + }, + { + "epoch": 0.3044045794831313, + "grad_norm": 19872.3828125, + "learning_rate": 8.772264103435094e-05, + "loss": 0.4617, + "step": 59000 + }, + { + "epoch": 0.30466254946574417, + "grad_norm": 17518.58984375, + "learning_rate": 8.769865425024195e-05, + "loss": 0.4548, + "step": 59050 + }, + { + "epoch": 0.30492051944835696, + "grad_norm": 25605.537109375, + "learning_rate": 8.767464734332131e-05, + "loss": 0.4532, + "step": 59100 + }, + { + "epoch": 0.3051784894309698, + "grad_norm": 20151.53515625, + "learning_rate": 8.765062032640346e-05, + "loss": 0.4558, + "step": 59150 + }, + { + "epoch": 0.30543645941358266, + "grad_norm": 19346.048828125, + "learning_rate": 8.762657321231353e-05, + "loss": 0.4624, + "step": 59200 + }, + { + "epoch": 0.30569442939619546, + "grad_norm": 21447.115234375, + "learning_rate": 8.760250601388741e-05, + "loss": 0.4632, + "step": 59250 + }, + { + "epoch": 0.3059523993788083, + "grad_norm": 19053.896484375, + "learning_rate": 8.757841874397172e-05, + "loss": 0.454, + "step": 59300 + }, + { + "epoch": 0.3062103693614211, + "grad_norm": 20928.8515625, + "learning_rate": 8.755431141542376e-05, + "loss": 0.4509, + "step": 59350 + }, + { + "epoch": 0.30646833934403395, + "grad_norm": 20900.40234375, + "learning_rate": 8.753018404111157e-05, + "loss": 0.4523, + "step": 59400 + }, + { + "epoch": 0.30672630932664674, + "grad_norm": 19776.572265625, + "learning_rate": 8.750603663391385e-05, + "loss": 0.458, + "step": 59450 + }, + { + "epoch": 0.3069842793092596, + "grad_norm": 21503.505859375, + "learning_rate": 8.748186920672005e-05, + "loss": 0.4496, + "step": 59500 + }, + { + "epoch": 0.3072422492918724, + "grad_norm": 20588.5078125, + "learning_rate": 8.745768177243027e-05, + "loss": 0.4578, + "step": 59550 + }, + { + "epoch": 0.30750021927448523, + "grad_norm": 20516.150390625, + "learning_rate": 8.743347434395528e-05, + "loss": 0.46, + "step": 59600 + }, + { + "epoch": 0.307758189257098, + "grad_norm": 20487.498046875, + "learning_rate": 8.740924693421655e-05, + "loss": 0.4574, + "step": 59650 + }, + { + "epoch": 0.3080161592397109, + "grad_norm": 21070.3671875, + "learning_rate": 8.738499955614619e-05, + "loss": 0.4564, + "step": 59700 + }, + { + "epoch": 0.30827412922232367, + "grad_norm": 19067.427734375, + "learning_rate": 8.736073222268697e-05, + "loss": 0.4523, + "step": 59750 + }, + { + "epoch": 0.3085320992049365, + "grad_norm": 22084.68359375, + "learning_rate": 8.733644494679236e-05, + "loss": 0.4558, + "step": 59800 + }, + { + "epoch": 0.3087900691875493, + "grad_norm": 22324.9140625, + "learning_rate": 8.731213774142639e-05, + "loss": 0.4585, + "step": 59850 + }, + { + "epoch": 0.30904803917016216, + "grad_norm": 19219.47265625, + "learning_rate": 8.728781061956383e-05, + "loss": 0.4571, + "step": 59900 + }, + { + "epoch": 0.309306009152775, + "grad_norm": 20598.125, + "learning_rate": 8.726346359418998e-05, + "loss": 0.4581, + "step": 59950 + }, + { + "epoch": 0.3095639791353878, + "grad_norm": 22155.720703125, + "learning_rate": 8.723909667830082e-05, + "loss": 0.4578, + "step": 60000 + }, + { + "epoch": 0.3095639791353878, + "eval_loss": 0.44494956731796265, + "eval_runtime": 3261.5111, + "eval_samples_per_second": 950.823, + "eval_steps_per_second": 1.857, + "step": 60000 + }, + { + "epoch": 0.30982194911800065, + "grad_norm": 22012.822265625, + "learning_rate": 8.721470988490297e-05, + "loss": 0.4533, + "step": 60050 + }, + { + "epoch": 0.31007991910061344, + "grad_norm": 20934.453125, + "learning_rate": 8.719030322701358e-05, + "loss": 0.4538, + "step": 60100 + }, + { + "epoch": 0.3103378890832263, + "grad_norm": 20173.20703125, + "learning_rate": 8.716587671766049e-05, + "loss": 0.4559, + "step": 60150 + }, + { + "epoch": 0.3105958590658391, + "grad_norm": 19343.833984375, + "learning_rate": 8.714143036988208e-05, + "loss": 0.4579, + "step": 60200 + }, + { + "epoch": 0.31085382904845194, + "grad_norm": 20720.435546875, + "learning_rate": 8.711696419672734e-05, + "loss": 0.4529, + "step": 60250 + }, + { + "epoch": 0.31111179903106473, + "grad_norm": 22050.85546875, + "learning_rate": 8.709247821125583e-05, + "loss": 0.4505, + "step": 60300 + }, + { + "epoch": 0.3113697690136776, + "grad_norm": 22470.55078125, + "learning_rate": 8.706797242653773e-05, + "loss": 0.4616, + "step": 60350 + }, + { + "epoch": 0.31162773899629037, + "grad_norm": 21057.978515625, + "learning_rate": 8.70434468556537e-05, + "loss": 0.4568, + "step": 60400 + }, + { + "epoch": 0.3118857089789032, + "grad_norm": 21035.34375, + "learning_rate": 8.701890151169507e-05, + "loss": 0.4551, + "step": 60450 + }, + { + "epoch": 0.312143678961516, + "grad_norm": 20412.056640625, + "learning_rate": 8.699433640776363e-05, + "loss": 0.4521, + "step": 60500 + }, + { + "epoch": 0.31240164894412886, + "grad_norm": 19888.26953125, + "learning_rate": 8.696975155697175e-05, + "loss": 0.4565, + "step": 60550 + }, + { + "epoch": 0.3126596189267417, + "grad_norm": 22491.900390625, + "learning_rate": 8.694514697244238e-05, + "loss": 0.4578, + "step": 60600 + }, + { + "epoch": 0.3129175889093545, + "grad_norm": 20026.357421875, + "learning_rate": 8.692052266730897e-05, + "loss": 0.4554, + "step": 60650 + }, + { + "epoch": 0.31317555889196735, + "grad_norm": 22979.109375, + "learning_rate": 8.689587865471547e-05, + "loss": 0.461, + "step": 60700 + }, + { + "epoch": 0.31343352887458015, + "grad_norm": 21558.291015625, + "learning_rate": 8.68712149478164e-05, + "loss": 0.4546, + "step": 60750 + }, + { + "epoch": 0.313691498857193, + "grad_norm": 22115.384765625, + "learning_rate": 8.684653155977676e-05, + "loss": 0.4518, + "step": 60800 + }, + { + "epoch": 0.3139494688398058, + "grad_norm": 21422.41015625, + "learning_rate": 8.682182850377205e-05, + "loss": 0.4602, + "step": 60850 + }, + { + "epoch": 0.31420743882241864, + "grad_norm": 21101.02734375, + "learning_rate": 8.679710579298832e-05, + "loss": 0.4579, + "step": 60900 + }, + { + "epoch": 0.31446540880503143, + "grad_norm": 18844.361328125, + "learning_rate": 8.677236344062203e-05, + "loss": 0.4569, + "step": 60950 + }, + { + "epoch": 0.3147233787876443, + "grad_norm": 20492.796875, + "learning_rate": 8.67476014598802e-05, + "loss": 0.4542, + "step": 61000 + }, + { + "epoch": 0.3149813487702571, + "grad_norm": 28102.55078125, + "learning_rate": 8.67228198639803e-05, + "loss": 0.4516, + "step": 61050 + }, + { + "epoch": 0.3152393187528699, + "grad_norm": 20697.494140625, + "learning_rate": 8.669801866615024e-05, + "loss": 0.4551, + "step": 61100 + }, + { + "epoch": 0.3154972887354827, + "grad_norm": 20726.90625, + "learning_rate": 8.667319787962842e-05, + "loss": 0.4576, + "step": 61150 + }, + { + "epoch": 0.31575525871809557, + "grad_norm": 20007.04296875, + "learning_rate": 8.664835751766371e-05, + "loss": 0.4544, + "step": 61200 + }, + { + "epoch": 0.31601322870070836, + "grad_norm": 23061.224609375, + "learning_rate": 8.662349759351542e-05, + "loss": 0.458, + "step": 61250 + }, + { + "epoch": 0.3162711986833212, + "grad_norm": 19895.3125, + "learning_rate": 8.65986181204533e-05, + "loss": 0.4555, + "step": 61300 + }, + { + "epoch": 0.31652916866593406, + "grad_norm": 22702.5234375, + "learning_rate": 8.65737191117575e-05, + "loss": 0.4586, + "step": 61350 + }, + { + "epoch": 0.31678713864854685, + "grad_norm": 20045.404296875, + "learning_rate": 8.654880058071866e-05, + "loss": 0.4583, + "step": 61400 + }, + { + "epoch": 0.3170451086311597, + "grad_norm": 21180.455078125, + "learning_rate": 8.652386254063778e-05, + "loss": 0.4594, + "step": 61450 + }, + { + "epoch": 0.3173030786137725, + "grad_norm": 19104.767578125, + "learning_rate": 8.649890500482633e-05, + "loss": 0.4532, + "step": 61500 + }, + { + "epoch": 0.31756104859638534, + "grad_norm": 23137.869140625, + "learning_rate": 8.647392798660613e-05, + "loss": 0.4535, + "step": 61550 + }, + { + "epoch": 0.31781901857899814, + "grad_norm": 21784.001953125, + "learning_rate": 8.644893149930949e-05, + "loss": 0.4518, + "step": 61600 + }, + { + "epoch": 0.318076988561611, + "grad_norm": 20489.796875, + "learning_rate": 8.642391555627897e-05, + "loss": 0.4572, + "step": 61650 + }, + { + "epoch": 0.3183349585442238, + "grad_norm": 21743.728515625, + "learning_rate": 8.639888017086764e-05, + "loss": 0.4601, + "step": 61700 + }, + { + "epoch": 0.3185929285268366, + "grad_norm": 21714.6171875, + "learning_rate": 8.63738253564389e-05, + "loss": 0.4597, + "step": 61750 + }, + { + "epoch": 0.3188508985094494, + "grad_norm": 19896.208984375, + "learning_rate": 8.634875112636653e-05, + "loss": 0.4532, + "step": 61800 + }, + { + "epoch": 0.31910886849206227, + "grad_norm": 22215.173828125, + "learning_rate": 8.632365749403465e-05, + "loss": 0.4532, + "step": 61850 + }, + { + "epoch": 0.31936683847467506, + "grad_norm": 22466.958984375, + "learning_rate": 8.629854447283778e-05, + "loss": 0.4539, + "step": 61900 + }, + { + "epoch": 0.3196248084572879, + "grad_norm": 21345.197265625, + "learning_rate": 8.627341207618073e-05, + "loss": 0.4551, + "step": 61950 + }, + { + "epoch": 0.3198827784399007, + "grad_norm": 20988.8203125, + "learning_rate": 8.624826031747872e-05, + "loss": 0.4593, + "step": 62000 + }, + { + "epoch": 0.32014074842251355, + "grad_norm": 23295.70703125, + "learning_rate": 8.622308921015726e-05, + "loss": 0.4547, + "step": 62050 + }, + { + "epoch": 0.3203987184051264, + "grad_norm": 22620.431640625, + "learning_rate": 8.619789876765221e-05, + "loss": 0.4601, + "step": 62100 + }, + { + "epoch": 0.3206566883877392, + "grad_norm": 21914.44140625, + "learning_rate": 8.61726890034097e-05, + "loss": 0.4474, + "step": 62150 + }, + { + "epoch": 0.32091465837035205, + "grad_norm": 20521.265625, + "learning_rate": 8.614745993088626e-05, + "loss": 0.4565, + "step": 62200 + }, + { + "epoch": 0.32117262835296484, + "grad_norm": 22810.072265625, + "learning_rate": 8.612221156354868e-05, + "loss": 0.453, + "step": 62250 + }, + { + "epoch": 0.3214305983355777, + "grad_norm": 20862.349609375, + "learning_rate": 8.609694391487402e-05, + "loss": 0.4543, + "step": 62300 + }, + { + "epoch": 0.3216885683181905, + "grad_norm": 22115.298828125, + "learning_rate": 8.607165699834967e-05, + "loss": 0.453, + "step": 62350 + }, + { + "epoch": 0.32194653830080333, + "grad_norm": 22504.859375, + "learning_rate": 8.60463508274733e-05, + "loss": 0.4552, + "step": 62400 + }, + { + "epoch": 0.3222045082834161, + "grad_norm": 21758.9453125, + "learning_rate": 8.602102541575286e-05, + "loss": 0.4526, + "step": 62450 + }, + { + "epoch": 0.322462478266029, + "grad_norm": 20388.23828125, + "learning_rate": 8.599568077670654e-05, + "loss": 0.4522, + "step": 62500 + }, + { + "epoch": 0.32272044824864177, + "grad_norm": 22393.857421875, + "learning_rate": 8.597031692386286e-05, + "loss": 0.4457, + "step": 62550 + }, + { + "epoch": 0.3229784182312546, + "grad_norm": 22233.978515625, + "learning_rate": 8.594493387076052e-05, + "loss": 0.449, + "step": 62600 + }, + { + "epoch": 0.3232363882138674, + "grad_norm": 19831.12109375, + "learning_rate": 8.591953163094852e-05, + "loss": 0.4556, + "step": 62650 + }, + { + "epoch": 0.32349435819648026, + "grad_norm": 19109.783203125, + "learning_rate": 8.589411021798608e-05, + "loss": 0.4552, + "step": 62700 + }, + { + "epoch": 0.3237523281790931, + "grad_norm": 23053.642578125, + "learning_rate": 8.586866964544265e-05, + "loss": 0.4552, + "step": 62750 + }, + { + "epoch": 0.3240102981617059, + "grad_norm": 17938.240234375, + "learning_rate": 8.584320992689791e-05, + "loss": 0.4512, + "step": 62800 + }, + { + "epoch": 0.32426826814431875, + "grad_norm": 19569.431640625, + "learning_rate": 8.581773107594179e-05, + "loss": 0.4557, + "step": 62850 + }, + { + "epoch": 0.32452623812693154, + "grad_norm": 19247.82421875, + "learning_rate": 8.579223310617439e-05, + "loss": 0.4599, + "step": 62900 + }, + { + "epoch": 0.3247842081095444, + "grad_norm": 21565.8671875, + "learning_rate": 8.576671603120603e-05, + "loss": 0.4573, + "step": 62950 + }, + { + "epoch": 0.3250421780921572, + "grad_norm": 19029.005859375, + "learning_rate": 8.574117986465723e-05, + "loss": 0.455, + "step": 63000 + }, + { + "epoch": 0.32530014807477003, + "grad_norm": 21574.626953125, + "learning_rate": 8.57156246201587e-05, + "loss": 0.4512, + "step": 63050 + }, + { + "epoch": 0.32555811805738283, + "grad_norm": 21181.8203125, + "learning_rate": 8.569005031135136e-05, + "loss": 0.4513, + "step": 63100 + }, + { + "epoch": 0.3258160880399957, + "grad_norm": 22689.93359375, + "learning_rate": 8.566445695188624e-05, + "loss": 0.4515, + "step": 63150 + }, + { + "epoch": 0.32607405802260847, + "grad_norm": 22001.9921875, + "learning_rate": 8.563884455542461e-05, + "loss": 0.4459, + "step": 63200 + }, + { + "epoch": 0.3263320280052213, + "grad_norm": 20342.96875, + "learning_rate": 8.561321313563786e-05, + "loss": 0.4526, + "step": 63250 + }, + { + "epoch": 0.3265899979878341, + "grad_norm": 20673.75390625, + "learning_rate": 8.558756270620756e-05, + "loss": 0.4581, + "step": 63300 + }, + { + "epoch": 0.32684796797044696, + "grad_norm": 23113.490234375, + "learning_rate": 8.556189328082538e-05, + "loss": 0.4525, + "step": 63350 + }, + { + "epoch": 0.32710593795305976, + "grad_norm": 21878.384765625, + "learning_rate": 8.55362048731932e-05, + "loss": 0.4536, + "step": 63400 + }, + { + "epoch": 0.3273639079356726, + "grad_norm": 22787.79296875, + "learning_rate": 8.551049749702297e-05, + "loss": 0.4586, + "step": 63450 + }, + { + "epoch": 0.32762187791828545, + "grad_norm": 20422.0625, + "learning_rate": 8.548477116603679e-05, + "loss": 0.4496, + "step": 63500 + }, + { + "epoch": 0.32787984790089825, + "grad_norm": 21936.8828125, + "learning_rate": 8.54590258939669e-05, + "loss": 0.4509, + "step": 63550 + }, + { + "epoch": 0.3281378178835111, + "grad_norm": 21049.275390625, + "learning_rate": 8.54332616945556e-05, + "loss": 0.4514, + "step": 63600 + }, + { + "epoch": 0.3283957878661239, + "grad_norm": 22976.1015625, + "learning_rate": 8.540747858155533e-05, + "loss": 0.4611, + "step": 63650 + }, + { + "epoch": 0.32865375784873674, + "grad_norm": 21968.18359375, + "learning_rate": 8.538167656872861e-05, + "loss": 0.4557, + "step": 63700 + }, + { + "epoch": 0.32891172783134953, + "grad_norm": 22231.755859375, + "learning_rate": 8.53558556698481e-05, + "loss": 0.4556, + "step": 63750 + }, + { + "epoch": 0.3291696978139624, + "grad_norm": 21183.978515625, + "learning_rate": 8.533001589869643e-05, + "loss": 0.4479, + "step": 63800 + }, + { + "epoch": 0.3294276677965752, + "grad_norm": 23931.5234375, + "learning_rate": 8.530415726906642e-05, + "loss": 0.4533, + "step": 63850 + }, + { + "epoch": 0.329685637779188, + "grad_norm": 21073.62890625, + "learning_rate": 8.527827979476087e-05, + "loss": 0.4577, + "step": 63900 + }, + { + "epoch": 0.3299436077618008, + "grad_norm": 19957.09375, + "learning_rate": 8.525238348959268e-05, + "loss": 0.4486, + "step": 63950 + }, + { + "epoch": 0.33020157774441367, + "grad_norm": 18999.962890625, + "learning_rate": 8.522646836738482e-05, + "loss": 0.4525, + "step": 64000 + }, + { + "epoch": 0.33045954772702646, + "grad_norm": 24102.1640625, + "learning_rate": 8.520053444197026e-05, + "loss": 0.4545, + "step": 64050 + }, + { + "epoch": 0.3307175177096393, + "grad_norm": 20205.65234375, + "learning_rate": 8.517458172719203e-05, + "loss": 0.4539, + "step": 64100 + }, + { + "epoch": 0.33097548769225216, + "grad_norm": 24099.8203125, + "learning_rate": 8.514861023690321e-05, + "loss": 0.4465, + "step": 64150 + }, + { + "epoch": 0.33123345767486495, + "grad_norm": 19802.203125, + "learning_rate": 8.512261998496685e-05, + "loss": 0.4546, + "step": 64200 + }, + { + "epoch": 0.3314914276574778, + "grad_norm": 23137.609375, + "learning_rate": 8.509661098525603e-05, + "loss": 0.4539, + "step": 64250 + }, + { + "epoch": 0.3317493976400906, + "grad_norm": 23578.609375, + "learning_rate": 8.507058325165391e-05, + "loss": 0.4513, + "step": 64300 + }, + { + "epoch": 0.33200736762270344, + "grad_norm": 19172.0859375, + "learning_rate": 8.504453679805353e-05, + "loss": 0.456, + "step": 64350 + }, + { + "epoch": 0.33226533760531624, + "grad_norm": 19165.775390625, + "learning_rate": 8.5018471638358e-05, + "loss": 0.4578, + "step": 64400 + }, + { + "epoch": 0.3325233075879291, + "grad_norm": 18070.72265625, + "learning_rate": 8.49923877864804e-05, + "loss": 0.4608, + "step": 64450 + }, + { + "epoch": 0.3327812775705419, + "grad_norm": 20918.525390625, + "learning_rate": 8.49662852563438e-05, + "loss": 0.4526, + "step": 64500 + }, + { + "epoch": 0.3330392475531547, + "grad_norm": 21165.05078125, + "learning_rate": 8.494016406188121e-05, + "loss": 0.4503, + "step": 64550 + }, + { + "epoch": 0.3332972175357675, + "grad_norm": 19273.013671875, + "learning_rate": 8.491402421703562e-05, + "loss": 0.4572, + "step": 64600 + }, + { + "epoch": 0.33355518751838037, + "grad_norm": 21221.681640625, + "learning_rate": 8.488786573575998e-05, + "loss": 0.456, + "step": 64650 + }, + { + "epoch": 0.33381315750099316, + "grad_norm": 19485.8125, + "learning_rate": 8.486168863201716e-05, + "loss": 0.4423, + "step": 64700 + }, + { + "epoch": 0.334071127483606, + "grad_norm": 23241.580078125, + "learning_rate": 8.483549291978001e-05, + "loss": 0.4531, + "step": 64750 + }, + { + "epoch": 0.3343290974662188, + "grad_norm": 21281.111328125, + "learning_rate": 8.48092786130313e-05, + "loss": 0.452, + "step": 64800 + }, + { + "epoch": 0.33458706744883165, + "grad_norm": 21610.2578125, + "learning_rate": 8.47830457257637e-05, + "loss": 0.4488, + "step": 64850 + }, + { + "epoch": 0.3348450374314445, + "grad_norm": 19343.466796875, + "learning_rate": 8.475679427197982e-05, + "loss": 0.4514, + "step": 64900 + }, + { + "epoch": 0.3351030074140573, + "grad_norm": 19489.1875, + "learning_rate": 8.473052426569219e-05, + "loss": 0.447, + "step": 64950 + }, + { + "epoch": 0.33536097739667015, + "grad_norm": 24805.84765625, + "learning_rate": 8.470423572092323e-05, + "loss": 0.4594, + "step": 65000 + }, + { + "epoch": 0.33536097739667015, + "eval_loss": 0.440469890832901, + "eval_runtime": 3318.76, + "eval_samples_per_second": 934.421, + "eval_steps_per_second": 1.825, + "step": 65000 + }, + { + "epoch": 0.33561894737928294, + "grad_norm": 22912.732421875, + "learning_rate": 8.467792865170525e-05, + "loss": 0.4435, + "step": 65050 + }, + { + "epoch": 0.3358769173618958, + "grad_norm": 19958.994140625, + "learning_rate": 8.465160307208045e-05, + "loss": 0.4588, + "step": 65100 + }, + { + "epoch": 0.3361348873445086, + "grad_norm": 20914.193359375, + "learning_rate": 8.462525899610092e-05, + "loss": 0.4497, + "step": 65150 + }, + { + "epoch": 0.33639285732712143, + "grad_norm": 20505.814453125, + "learning_rate": 8.459889643782861e-05, + "loss": 0.4569, + "step": 65200 + }, + { + "epoch": 0.3366508273097342, + "grad_norm": 19486.068359375, + "learning_rate": 8.457251541133535e-05, + "loss": 0.4505, + "step": 65250 + }, + { + "epoch": 0.3369087972923471, + "grad_norm": 21967.84765625, + "learning_rate": 8.454611593070284e-05, + "loss": 0.4556, + "step": 65300 + }, + { + "epoch": 0.33716676727495987, + "grad_norm": 21949.767578125, + "learning_rate": 8.451969801002258e-05, + "loss": 0.4491, + "step": 65350 + }, + { + "epoch": 0.3374247372575727, + "grad_norm": 19765.14453125, + "learning_rate": 8.449326166339595e-05, + "loss": 0.4507, + "step": 65400 + }, + { + "epoch": 0.3376827072401855, + "grad_norm": 21396.982421875, + "learning_rate": 8.446680690493417e-05, + "loss": 0.4548, + "step": 65450 + }, + { + "epoch": 0.33794067722279836, + "grad_norm": 22511.8359375, + "learning_rate": 8.444033374875828e-05, + "loss": 0.454, + "step": 65500 + }, + { + "epoch": 0.33819864720541115, + "grad_norm": 21264.076171875, + "learning_rate": 8.441384220899912e-05, + "loss": 0.4486, + "step": 65550 + }, + { + "epoch": 0.338456617188024, + "grad_norm": 20736.046875, + "learning_rate": 8.438733229979741e-05, + "loss": 0.4505, + "step": 65600 + }, + { + "epoch": 0.33871458717063685, + "grad_norm": 20183.8359375, + "learning_rate": 8.436080403530356e-05, + "loss": 0.4485, + "step": 65650 + }, + { + "epoch": 0.33897255715324964, + "grad_norm": 21947.3671875, + "learning_rate": 8.433425742967787e-05, + "loss": 0.4499, + "step": 65700 + }, + { + "epoch": 0.3392305271358625, + "grad_norm": 22621.236328125, + "learning_rate": 8.430769249709042e-05, + "loss": 0.4503, + "step": 65750 + }, + { + "epoch": 0.3394884971184753, + "grad_norm": 21537.947265625, + "learning_rate": 8.428110925172103e-05, + "loss": 0.4634, + "step": 65800 + }, + { + "epoch": 0.33974646710108813, + "grad_norm": 20869.759765625, + "learning_rate": 8.425450770775936e-05, + "loss": 0.4504, + "step": 65850 + }, + { + "epoch": 0.34000443708370093, + "grad_norm": 20865.12109375, + "learning_rate": 8.422788787940477e-05, + "loss": 0.4509, + "step": 65900 + }, + { + "epoch": 0.3402624070663138, + "grad_norm": 23897.974609375, + "learning_rate": 8.42012497808664e-05, + "loss": 0.4512, + "step": 65950 + }, + { + "epoch": 0.34052037704892657, + "grad_norm": 23978.56640625, + "learning_rate": 8.417459342636318e-05, + "loss": 0.4513, + "step": 66000 + }, + { + "epoch": 0.3407783470315394, + "grad_norm": 22806.99609375, + "learning_rate": 8.414791883012374e-05, + "loss": 0.4468, + "step": 66050 + }, + { + "epoch": 0.3410363170141522, + "grad_norm": 20348.841796875, + "learning_rate": 8.412122600638646e-05, + "loss": 0.4484, + "step": 66100 + }, + { + "epoch": 0.34129428699676506, + "grad_norm": 21868.353515625, + "learning_rate": 8.409451496939945e-05, + "loss": 0.4601, + "step": 66150 + }, + { + "epoch": 0.34155225697937786, + "grad_norm": 20312.36328125, + "learning_rate": 8.406778573342055e-05, + "loss": 0.4485, + "step": 66200 + }, + { + "epoch": 0.3418102269619907, + "grad_norm": 25603.419921875, + "learning_rate": 8.404103831271733e-05, + "loss": 0.4487, + "step": 66250 + }, + { + "epoch": 0.34206819694460355, + "grad_norm": 21330.416015625, + "learning_rate": 8.4014272721567e-05, + "loss": 0.449, + "step": 66300 + }, + { + "epoch": 0.34232616692721635, + "grad_norm": 20045.4453125, + "learning_rate": 8.398748897425656e-05, + "loss": 0.447, + "step": 66350 + }, + { + "epoch": 0.3425841369098292, + "grad_norm": 21575.642578125, + "learning_rate": 8.396068708508262e-05, + "loss": 0.4495, + "step": 66400 + }, + { + "epoch": 0.342842106892442, + "grad_norm": 20396.5390625, + "learning_rate": 8.393386706835154e-05, + "loss": 0.4478, + "step": 66450 + }, + { + "epoch": 0.34310007687505484, + "grad_norm": 20366.8046875, + "learning_rate": 8.390702893837929e-05, + "loss": 0.4531, + "step": 66500 + }, + { + "epoch": 0.34335804685766763, + "grad_norm": 23514.521484375, + "learning_rate": 8.388017270949158e-05, + "loss": 0.4496, + "step": 66550 + }, + { + "epoch": 0.3436160168402805, + "grad_norm": 23656.869140625, + "learning_rate": 8.385329839602372e-05, + "loss": 0.448, + "step": 66600 + }, + { + "epoch": 0.3438739868228933, + "grad_norm": 23712.216796875, + "learning_rate": 8.382640601232071e-05, + "loss": 0.4502, + "step": 66650 + }, + { + "epoch": 0.3441319568055061, + "grad_norm": 23220.240234375, + "learning_rate": 8.379949557273717e-05, + "loss": 0.4469, + "step": 66700 + }, + { + "epoch": 0.3443899267881189, + "grad_norm": 21469.244140625, + "learning_rate": 8.37725670916374e-05, + "loss": 0.4506, + "step": 66750 + }, + { + "epoch": 0.34464789677073177, + "grad_norm": 19195.431640625, + "learning_rate": 8.374562058339528e-05, + "loss": 0.4494, + "step": 66800 + }, + { + "epoch": 0.34490586675334456, + "grad_norm": 21464.130859375, + "learning_rate": 8.371865606239433e-05, + "loss": 0.4552, + "step": 66850 + }, + { + "epoch": 0.3451638367359574, + "grad_norm": 23449.76953125, + "learning_rate": 8.36916735430277e-05, + "loss": 0.4513, + "step": 66900 + }, + { + "epoch": 0.3454218067185702, + "grad_norm": 20593.39453125, + "learning_rate": 8.366467303969814e-05, + "loss": 0.447, + "step": 66950 + }, + { + "epoch": 0.34567977670118305, + "grad_norm": 21341.72265625, + "learning_rate": 8.3637654566818e-05, + "loss": 0.4448, + "step": 67000 + }, + { + "epoch": 0.3459377466837959, + "grad_norm": 20746.919921875, + "learning_rate": 8.361061813880919e-05, + "loss": 0.4511, + "step": 67050 + }, + { + "epoch": 0.3461957166664087, + "grad_norm": 19786.162109375, + "learning_rate": 8.358356377010325e-05, + "loss": 0.452, + "step": 67100 + }, + { + "epoch": 0.34645368664902154, + "grad_norm": 20875.25, + "learning_rate": 8.355649147514128e-05, + "loss": 0.4491, + "step": 67150 + }, + { + "epoch": 0.34671165663163434, + "grad_norm": 22833.728515625, + "learning_rate": 8.352940126837394e-05, + "loss": 0.4545, + "step": 67200 + }, + { + "epoch": 0.3469696266142472, + "grad_norm": 21289.896484375, + "learning_rate": 8.350229316426146e-05, + "loss": 0.4451, + "step": 67250 + }, + { + "epoch": 0.34722759659686, + "grad_norm": 23276.080078125, + "learning_rate": 8.347516717727363e-05, + "loss": 0.4468, + "step": 67300 + }, + { + "epoch": 0.3474855665794728, + "grad_norm": 22568.234375, + "learning_rate": 8.344802332188977e-05, + "loss": 0.4455, + "step": 67350 + }, + { + "epoch": 0.3477435365620856, + "grad_norm": 19527.234375, + "learning_rate": 8.342086161259874e-05, + "loss": 0.4511, + "step": 67400 + }, + { + "epoch": 0.34800150654469847, + "grad_norm": 21764.56640625, + "learning_rate": 8.339368206389895e-05, + "loss": 0.4481, + "step": 67450 + }, + { + "epoch": 0.34825947652731126, + "grad_norm": 21142.33984375, + "learning_rate": 8.336648469029829e-05, + "loss": 0.4539, + "step": 67500 + }, + { + "epoch": 0.3485174465099241, + "grad_norm": 21612.60546875, + "learning_rate": 8.333926950631421e-05, + "loss": 0.4497, + "step": 67550 + }, + { + "epoch": 0.3487754164925369, + "grad_norm": 20772.0390625, + "learning_rate": 8.331203652647364e-05, + "loss": 0.458, + "step": 67600 + }, + { + "epoch": 0.34903338647514975, + "grad_norm": 22197.166015625, + "learning_rate": 8.328478576531303e-05, + "loss": 0.4499, + "step": 67650 + }, + { + "epoch": 0.34929135645776255, + "grad_norm": 20853.865234375, + "learning_rate": 8.32575172373783e-05, + "loss": 0.4473, + "step": 67700 + }, + { + "epoch": 0.3495493264403754, + "grad_norm": 19692.892578125, + "learning_rate": 8.323023095722486e-05, + "loss": 0.4516, + "step": 67750 + }, + { + "epoch": 0.34980729642298825, + "grad_norm": 22032.115234375, + "learning_rate": 8.32029269394176e-05, + "loss": 0.4452, + "step": 67800 + }, + { + "epoch": 0.35006526640560104, + "grad_norm": 23928.783203125, + "learning_rate": 8.317560519853089e-05, + "loss": 0.4489, + "step": 67850 + }, + { + "epoch": 0.3503232363882139, + "grad_norm": 20832.560546875, + "learning_rate": 8.314826574914853e-05, + "loss": 0.4493, + "step": 67900 + }, + { + "epoch": 0.3505812063708267, + "grad_norm": 23453.634765625, + "learning_rate": 8.31209086058638e-05, + "loss": 0.4487, + "step": 67950 + }, + { + "epoch": 0.35083917635343953, + "grad_norm": 23585.826171875, + "learning_rate": 8.309353378327938e-05, + "loss": 0.4473, + "step": 68000 + }, + { + "epoch": 0.3510971463360523, + "grad_norm": 21680.953125, + "learning_rate": 8.306614129600745e-05, + "loss": 0.4494, + "step": 68050 + }, + { + "epoch": 0.3513551163186652, + "grad_norm": 19228.56640625, + "learning_rate": 8.303873115866958e-05, + "loss": 0.4483, + "step": 68100 + }, + { + "epoch": 0.35161308630127797, + "grad_norm": 22056.6328125, + "learning_rate": 8.301130338589679e-05, + "loss": 0.4485, + "step": 68150 + }, + { + "epoch": 0.3518710562838908, + "grad_norm": 22030.484375, + "learning_rate": 8.298385799232947e-05, + "loss": 0.4462, + "step": 68200 + }, + { + "epoch": 0.3521290262665036, + "grad_norm": 19658.33984375, + "learning_rate": 8.295639499261745e-05, + "loss": 0.4444, + "step": 68250 + }, + { + "epoch": 0.35238699624911646, + "grad_norm": 19667.8125, + "learning_rate": 8.292891440141997e-05, + "loss": 0.4482, + "step": 68300 + }, + { + "epoch": 0.35264496623172925, + "grad_norm": 20248.193359375, + "learning_rate": 8.290141623340558e-05, + "loss": 0.454, + "step": 68350 + }, + { + "epoch": 0.3529029362143421, + "grad_norm": 21358.89453125, + "learning_rate": 8.287390050325232e-05, + "loss": 0.4485, + "step": 68400 + }, + { + "epoch": 0.35316090619695495, + "grad_norm": 19209.328125, + "learning_rate": 8.284636722564754e-05, + "loss": 0.4505, + "step": 68450 + }, + { + "epoch": 0.35341887617956774, + "grad_norm": 21890.7109375, + "learning_rate": 8.281881641528795e-05, + "loss": 0.4531, + "step": 68500 + }, + { + "epoch": 0.3536768461621806, + "grad_norm": 20904.052734375, + "learning_rate": 8.279124808687967e-05, + "loss": 0.4494, + "step": 68550 + }, + { + "epoch": 0.3539348161447934, + "grad_norm": 22519.888671875, + "learning_rate": 8.276366225513812e-05, + "loss": 0.4422, + "step": 68600 + }, + { + "epoch": 0.35419278612740623, + "grad_norm": 20027.009765625, + "learning_rate": 8.27360589347881e-05, + "loss": 0.4484, + "step": 68650 + }, + { + "epoch": 0.354450756110019, + "grad_norm": 22069.64453125, + "learning_rate": 8.27084381405637e-05, + "loss": 0.443, + "step": 68700 + }, + { + "epoch": 0.3547087260926319, + "grad_norm": 23096.74609375, + "learning_rate": 8.26807998872084e-05, + "loss": 0.4437, + "step": 68750 + }, + { + "epoch": 0.35496669607524467, + "grad_norm": 19204.626953125, + "learning_rate": 8.265314418947494e-05, + "loss": 0.4496, + "step": 68800 + }, + { + "epoch": 0.3552246660578575, + "grad_norm": 26871.888671875, + "learning_rate": 8.262547106212541e-05, + "loss": 0.446, + "step": 68850 + }, + { + "epoch": 0.3554826360404703, + "grad_norm": 21342.556640625, + "learning_rate": 8.259778051993118e-05, + "loss": 0.4525, + "step": 68900 + }, + { + "epoch": 0.35574060602308316, + "grad_norm": 23054.814453125, + "learning_rate": 8.25700725776729e-05, + "loss": 0.4427, + "step": 68950 + }, + { + "epoch": 0.35599857600569595, + "grad_norm": 20473.818359375, + "learning_rate": 8.254234725014061e-05, + "loss": 0.4452, + "step": 69000 + }, + { + "epoch": 0.3562565459883088, + "grad_norm": 22081.576171875, + "learning_rate": 8.251460455213347e-05, + "loss": 0.4533, + "step": 69050 + }, + { + "epoch": 0.3565145159709216, + "grad_norm": 21840.048828125, + "learning_rate": 8.248684449846004e-05, + "loss": 0.4503, + "step": 69100 + }, + { + "epoch": 0.35677248595353445, + "grad_norm": 21595.234375, + "learning_rate": 8.245906710393808e-05, + "loss": 0.4459, + "step": 69150 + }, + { + "epoch": 0.3570304559361473, + "grad_norm": 22540.302734375, + "learning_rate": 8.243127238339463e-05, + "loss": 0.4461, + "step": 69200 + }, + { + "epoch": 0.3572884259187601, + "grad_norm": 20646.5859375, + "learning_rate": 8.2403460351666e-05, + "loss": 0.4522, + "step": 69250 + }, + { + "epoch": 0.35754639590137294, + "grad_norm": 20219.978515625, + "learning_rate": 8.237563102359767e-05, + "loss": 0.4464, + "step": 69300 + }, + { + "epoch": 0.35780436588398573, + "grad_norm": 21399.888671875, + "learning_rate": 8.234778441404441e-05, + "loss": 0.451, + "step": 69350 + }, + { + "epoch": 0.3580623358665986, + "grad_norm": 23263.193359375, + "learning_rate": 8.231992053787024e-05, + "loss": 0.4491, + "step": 69400 + }, + { + "epoch": 0.3583203058492114, + "grad_norm": 20740.455078125, + "learning_rate": 8.229203940994829e-05, + "loss": 0.4456, + "step": 69450 + }, + { + "epoch": 0.3585782758318242, + "grad_norm": 21715.078125, + "learning_rate": 8.226414104516102e-05, + "loss": 0.4467, + "step": 69500 + }, + { + "epoch": 0.358836245814437, + "grad_norm": 19771.517578125, + "learning_rate": 8.223622545840001e-05, + "loss": 0.4505, + "step": 69550 + }, + { + "epoch": 0.35909421579704986, + "grad_norm": 20944.298828125, + "learning_rate": 8.220829266456608e-05, + "loss": 0.4481, + "step": 69600 + }, + { + "epoch": 0.35935218577966266, + "grad_norm": 22313.017578125, + "learning_rate": 8.21803426785692e-05, + "loss": 0.4503, + "step": 69650 + }, + { + "epoch": 0.3596101557622755, + "grad_norm": 22525.5859375, + "learning_rate": 8.215237551532853e-05, + "loss": 0.4488, + "step": 69700 + }, + { + "epoch": 0.3598681257448883, + "grad_norm": 22731.85546875, + "learning_rate": 8.21243911897724e-05, + "loss": 0.4476, + "step": 69750 + }, + { + "epoch": 0.36012609572750115, + "grad_norm": 20872.9375, + "learning_rate": 8.20963897168383e-05, + "loss": 0.4485, + "step": 69800 + }, + { + "epoch": 0.360384065710114, + "grad_norm": 21066.095703125, + "learning_rate": 8.206837111147289e-05, + "loss": 0.4511, + "step": 69850 + }, + { + "epoch": 0.3606420356927268, + "grad_norm": 21823.62890625, + "learning_rate": 8.204033538863197e-05, + "loss": 0.4415, + "step": 69900 + }, + { + "epoch": 0.36090000567533964, + "grad_norm": 19639.724609375, + "learning_rate": 8.201228256328042e-05, + "loss": 0.4456, + "step": 69950 + }, + { + "epoch": 0.36115797565795243, + "grad_norm": 25321.20703125, + "learning_rate": 8.198421265039231e-05, + "loss": 0.4506, + "step": 70000 + }, + { + "epoch": 0.36115797565795243, + "eval_loss": 0.43597322702407837, + "eval_runtime": 3285.9769, + "eval_samples_per_second": 943.744, + "eval_steps_per_second": 1.843, + "step": 70000 + }, + { + "epoch": 0.3614159456405653, + "grad_norm": 19558.943359375, + "learning_rate": 8.195612566495084e-05, + "loss": 0.4502, + "step": 70050 + }, + { + "epoch": 0.3616739156231781, + "grad_norm": 21766.482421875, + "learning_rate": 8.192802162194828e-05, + "loss": 0.4444, + "step": 70100 + }, + { + "epoch": 0.3619318856057909, + "grad_norm": 23117.017578125, + "learning_rate": 8.189990053638603e-05, + "loss": 0.4476, + "step": 70150 + }, + { + "epoch": 0.3621898555884037, + "grad_norm": 19175.60546875, + "learning_rate": 8.18717624232746e-05, + "loss": 0.4479, + "step": 70200 + }, + { + "epoch": 0.36244782557101657, + "grad_norm": 22124.80078125, + "learning_rate": 8.184360729763351e-05, + "loss": 0.449, + "step": 70250 + }, + { + "epoch": 0.36270579555362936, + "grad_norm": 21717.501953125, + "learning_rate": 8.181543517449147e-05, + "loss": 0.4488, + "step": 70300 + }, + { + "epoch": 0.3629637655362422, + "grad_norm": 20235.162109375, + "learning_rate": 8.178724606888621e-05, + "loss": 0.4496, + "step": 70350 + }, + { + "epoch": 0.363221735518855, + "grad_norm": 22513.677734375, + "learning_rate": 8.175903999586455e-05, + "loss": 0.4463, + "step": 70400 + }, + { + "epoch": 0.36347970550146785, + "grad_norm": 21388.1953125, + "learning_rate": 8.173081697048228e-05, + "loss": 0.4446, + "step": 70450 + }, + { + "epoch": 0.36373767548408065, + "grad_norm": 20549.271484375, + "learning_rate": 8.170257700780435e-05, + "loss": 0.4421, + "step": 70500 + }, + { + "epoch": 0.3639956454666935, + "grad_norm": 21219.158203125, + "learning_rate": 8.16743201229047e-05, + "loss": 0.4472, + "step": 70550 + }, + { + "epoch": 0.36425361544930634, + "grad_norm": 20570.34375, + "learning_rate": 8.164604633086632e-05, + "loss": 0.4487, + "step": 70600 + }, + { + "epoch": 0.36451158543191914, + "grad_norm": 17376.671875, + "learning_rate": 8.161775564678118e-05, + "loss": 0.4413, + "step": 70650 + }, + { + "epoch": 0.364769555414532, + "grad_norm": 21676.33984375, + "learning_rate": 8.158944808575032e-05, + "loss": 0.4433, + "step": 70700 + }, + { + "epoch": 0.3650275253971448, + "grad_norm": 21901.001953125, + "learning_rate": 8.156112366288378e-05, + "loss": 0.4465, + "step": 70750 + }, + { + "epoch": 0.36528549537975763, + "grad_norm": 20330.720703125, + "learning_rate": 8.153278239330056e-05, + "loss": 0.4456, + "step": 70800 + }, + { + "epoch": 0.3655434653623704, + "grad_norm": 22179.904296875, + "learning_rate": 8.15044242921287e-05, + "loss": 0.4465, + "step": 70850 + }, + { + "epoch": 0.3658014353449833, + "grad_norm": 21384.66015625, + "learning_rate": 8.14760493745052e-05, + "loss": 0.4476, + "step": 70900 + }, + { + "epoch": 0.36605940532759607, + "grad_norm": 21706.103515625, + "learning_rate": 8.144765765557604e-05, + "loss": 0.4475, + "step": 70950 + }, + { + "epoch": 0.3663173753102089, + "grad_norm": 20332.5, + "learning_rate": 8.141924915049617e-05, + "loss": 0.449, + "step": 71000 + }, + { + "epoch": 0.3665753452928217, + "grad_norm": 22648.640625, + "learning_rate": 8.139082387442951e-05, + "loss": 0.4566, + "step": 71050 + }, + { + "epoch": 0.36683331527543456, + "grad_norm": 21496.291015625, + "learning_rate": 8.136238184254892e-05, + "loss": 0.4493, + "step": 71100 + }, + { + "epoch": 0.36709128525804735, + "grad_norm": 22114.169921875, + "learning_rate": 8.133392307003618e-05, + "loss": 0.4441, + "step": 71150 + }, + { + "epoch": 0.3673492552406602, + "grad_norm": 22476.390625, + "learning_rate": 8.130544757208205e-05, + "loss": 0.4391, + "step": 71200 + }, + { + "epoch": 0.367607225223273, + "grad_norm": 22175.044921875, + "learning_rate": 8.127695536388623e-05, + "loss": 0.4439, + "step": 71250 + }, + { + "epoch": 0.36786519520588584, + "grad_norm": 19715.728515625, + "learning_rate": 8.124844646065724e-05, + "loss": 0.448, + "step": 71300 + }, + { + "epoch": 0.3681231651884987, + "grad_norm": 19609.146484375, + "learning_rate": 8.121992087761266e-05, + "loss": 0.4476, + "step": 71350 + }, + { + "epoch": 0.3683811351711115, + "grad_norm": 21872.12890625, + "learning_rate": 8.119137862997883e-05, + "loss": 0.4536, + "step": 71400 + }, + { + "epoch": 0.36863910515372433, + "grad_norm": 19710.619140625, + "learning_rate": 8.116281973299107e-05, + "loss": 0.4466, + "step": 71450 + }, + { + "epoch": 0.3688970751363371, + "grad_norm": 21783.138671875, + "learning_rate": 8.113424420189357e-05, + "loss": 0.4422, + "step": 71500 + }, + { + "epoch": 0.36915504511895, + "grad_norm": 20527.984375, + "learning_rate": 8.110565205193941e-05, + "loss": 0.4499, + "step": 71550 + }, + { + "epoch": 0.36941301510156277, + "grad_norm": 21693.171875, + "learning_rate": 8.10770432983905e-05, + "loss": 0.4465, + "step": 71600 + }, + { + "epoch": 0.3696709850841756, + "grad_norm": 19817.142578125, + "learning_rate": 8.104841795651765e-05, + "loss": 0.4471, + "step": 71650 + }, + { + "epoch": 0.3699289550667884, + "grad_norm": 20883.767578125, + "learning_rate": 8.101977604160052e-05, + "loss": 0.4507, + "step": 71700 + }, + { + "epoch": 0.37018692504940126, + "grad_norm": 21206.943359375, + "learning_rate": 8.099111756892759e-05, + "loss": 0.4415, + "step": 71750 + }, + { + "epoch": 0.37044489503201405, + "grad_norm": 21431.19140625, + "learning_rate": 8.096244255379621e-05, + "loss": 0.4542, + "step": 71800 + }, + { + "epoch": 0.3707028650146269, + "grad_norm": 23020.34375, + "learning_rate": 8.093375101151255e-05, + "loss": 0.4481, + "step": 71850 + }, + { + "epoch": 0.3709608349972397, + "grad_norm": 20704.1171875, + "learning_rate": 8.09050429573916e-05, + "loss": 0.4427, + "step": 71900 + }, + { + "epoch": 0.37121880497985255, + "grad_norm": 20195.037109375, + "learning_rate": 8.087631840675715e-05, + "loss": 0.4416, + "step": 71950 + }, + { + "epoch": 0.3714767749624654, + "grad_norm": 21187.99609375, + "learning_rate": 8.084757737494184e-05, + "loss": 0.452, + "step": 72000 + }, + { + "epoch": 0.3717347449450782, + "grad_norm": 20694.912109375, + "learning_rate": 8.081881987728703e-05, + "loss": 0.4416, + "step": 72050 + }, + { + "epoch": 0.37199271492769104, + "grad_norm": 23006.939453125, + "learning_rate": 8.079004592914297e-05, + "loss": 0.4426, + "step": 72100 + }, + { + "epoch": 0.37225068491030383, + "grad_norm": 21854.025390625, + "learning_rate": 8.076125554586859e-05, + "loss": 0.4453, + "step": 72150 + }, + { + "epoch": 0.3725086548929167, + "grad_norm": 19155.400390625, + "learning_rate": 8.073244874283166e-05, + "loss": 0.4539, + "step": 72200 + }, + { + "epoch": 0.3727666248755295, + "grad_norm": 22085.5625, + "learning_rate": 8.070362553540869e-05, + "loss": 0.4474, + "step": 72250 + }, + { + "epoch": 0.3730245948581423, + "grad_norm": 21225.626953125, + "learning_rate": 8.067478593898495e-05, + "loss": 0.4431, + "step": 72300 + }, + { + "epoch": 0.3732825648407551, + "grad_norm": 21605.546875, + "learning_rate": 8.064592996895446e-05, + "loss": 0.4534, + "step": 72350 + }, + { + "epoch": 0.37354053482336796, + "grad_norm": 20774.87109375, + "learning_rate": 8.061705764071999e-05, + "loss": 0.4462, + "step": 72400 + }, + { + "epoch": 0.37379850480598076, + "grad_norm": 21871.390625, + "learning_rate": 8.0588168969693e-05, + "loss": 0.4445, + "step": 72450 + }, + { + "epoch": 0.3740564747885936, + "grad_norm": 22102.560546875, + "learning_rate": 8.05592639712937e-05, + "loss": 0.4478, + "step": 72500 + }, + { + "epoch": 0.3743144447712064, + "grad_norm": 21172.283203125, + "learning_rate": 8.053034266095105e-05, + "loss": 0.4469, + "step": 72550 + }, + { + "epoch": 0.37457241475381925, + "grad_norm": 21827.390625, + "learning_rate": 8.050140505410268e-05, + "loss": 0.4485, + "step": 72600 + }, + { + "epoch": 0.37483038473643204, + "grad_norm": 21271.87890625, + "learning_rate": 8.047245116619492e-05, + "loss": 0.45, + "step": 72650 + }, + { + "epoch": 0.3750883547190449, + "grad_norm": 21192.6484375, + "learning_rate": 8.04434810126828e-05, + "loss": 0.442, + "step": 72700 + }, + { + "epoch": 0.37534632470165774, + "grad_norm": 21529.736328125, + "learning_rate": 8.041449460903001e-05, + "loss": 0.4462, + "step": 72750 + }, + { + "epoch": 0.37560429468427053, + "grad_norm": 18609.474609375, + "learning_rate": 8.038549197070893e-05, + "loss": 0.4436, + "step": 72800 + }, + { + "epoch": 0.3758622646668834, + "grad_norm": 21631.82421875, + "learning_rate": 8.035647311320062e-05, + "loss": 0.4507, + "step": 72850 + }, + { + "epoch": 0.3761202346494962, + "grad_norm": 22347.056640625, + "learning_rate": 8.03274380519948e-05, + "loss": 0.4472, + "step": 72900 + }, + { + "epoch": 0.376378204632109, + "grad_norm": 20416.37109375, + "learning_rate": 8.029838680258979e-05, + "loss": 0.4475, + "step": 72950 + }, + { + "epoch": 0.3766361746147218, + "grad_norm": 21952.27734375, + "learning_rate": 8.026931938049259e-05, + "loss": 0.4449, + "step": 73000 + }, + { + "epoch": 0.37689414459733467, + "grad_norm": 23068.12109375, + "learning_rate": 8.024023580121885e-05, + "loss": 0.4477, + "step": 73050 + }, + { + "epoch": 0.37715211457994746, + "grad_norm": 21956.462890625, + "learning_rate": 8.021113608029281e-05, + "loss": 0.4459, + "step": 73100 + }, + { + "epoch": 0.3774100845625603, + "grad_norm": 20933.28125, + "learning_rate": 8.018202023324733e-05, + "loss": 0.4481, + "step": 73150 + }, + { + "epoch": 0.3776680545451731, + "grad_norm": 23138.638671875, + "learning_rate": 8.015288827562389e-05, + "loss": 0.437, + "step": 73200 + }, + { + "epoch": 0.37792602452778595, + "grad_norm": 20973.119140625, + "learning_rate": 8.012374022297255e-05, + "loss": 0.4454, + "step": 73250 + }, + { + "epoch": 0.37818399451039875, + "grad_norm": 21328.29296875, + "learning_rate": 8.0094576090852e-05, + "loss": 0.4426, + "step": 73300 + }, + { + "epoch": 0.3784419644930116, + "grad_norm": 20653.591796875, + "learning_rate": 8.006539589482949e-05, + "loss": 0.4448, + "step": 73350 + }, + { + "epoch": 0.3786999344756244, + "grad_norm": 21520.181640625, + "learning_rate": 8.003619965048083e-05, + "loss": 0.4428, + "step": 73400 + }, + { + "epoch": 0.37895790445823724, + "grad_norm": 20736.89453125, + "learning_rate": 8.000698737339041e-05, + "loss": 0.4483, + "step": 73450 + }, + { + "epoch": 0.3792158744408501, + "grad_norm": 23887.587890625, + "learning_rate": 7.997775907915118e-05, + "loss": 0.4518, + "step": 73500 + }, + { + "epoch": 0.3794738444234629, + "grad_norm": 23771.8671875, + "learning_rate": 7.994851478336465e-05, + "loss": 0.4479, + "step": 73550 + }, + { + "epoch": 0.37973181440607573, + "grad_norm": 21563.27734375, + "learning_rate": 7.991925450164084e-05, + "loss": 0.4433, + "step": 73600 + }, + { + "epoch": 0.3799897843886885, + "grad_norm": 21403.751953125, + "learning_rate": 7.988997824959832e-05, + "loss": 0.4443, + "step": 73650 + }, + { + "epoch": 0.38024775437130137, + "grad_norm": 22136.51171875, + "learning_rate": 7.986068604286421e-05, + "loss": 0.446, + "step": 73700 + }, + { + "epoch": 0.38050572435391417, + "grad_norm": 22143.857421875, + "learning_rate": 7.98313778970741e-05, + "loss": 0.4416, + "step": 73750 + }, + { + "epoch": 0.380763694336527, + "grad_norm": 22035.1171875, + "learning_rate": 7.980205382787211e-05, + "loss": 0.4413, + "step": 73800 + }, + { + "epoch": 0.3810216643191398, + "grad_norm": 21744.25390625, + "learning_rate": 7.97727138509109e-05, + "loss": 0.4463, + "step": 73850 + }, + { + "epoch": 0.38127963430175266, + "grad_norm": 21739.26171875, + "learning_rate": 7.974335798185153e-05, + "loss": 0.4415, + "step": 73900 + }, + { + "epoch": 0.38153760428436545, + "grad_norm": 20974.59765625, + "learning_rate": 7.971398623636361e-05, + "loss": 0.4457, + "step": 73950 + }, + { + "epoch": 0.3817955742669783, + "grad_norm": 19807.79296875, + "learning_rate": 7.968459863012523e-05, + "loss": 0.4423, + "step": 74000 + }, + { + "epoch": 0.3820535442495911, + "grad_norm": 21711.158203125, + "learning_rate": 7.96551951788229e-05, + "loss": 0.4466, + "step": 74050 + }, + { + "epoch": 0.38231151423220394, + "grad_norm": 19187.47265625, + "learning_rate": 7.962577589815163e-05, + "loss": 0.4387, + "step": 74100 + }, + { + "epoch": 0.3825694842148168, + "grad_norm": 19402.611328125, + "learning_rate": 7.959634080381486e-05, + "loss": 0.444, + "step": 74150 + }, + { + "epoch": 0.3828274541974296, + "grad_norm": 21287.9765625, + "learning_rate": 7.956688991152445e-05, + "loss": 0.4386, + "step": 74200 + }, + { + "epoch": 0.38308542418004243, + "grad_norm": 20430.591796875, + "learning_rate": 7.953742323700075e-05, + "loss": 0.4453, + "step": 74250 + }, + { + "epoch": 0.3833433941626552, + "grad_norm": 23246.041015625, + "learning_rate": 7.950794079597248e-05, + "loss": 0.4448, + "step": 74300 + }, + { + "epoch": 0.3836013641452681, + "grad_norm": 23098.74609375, + "learning_rate": 7.94784426041768e-05, + "loss": 0.4449, + "step": 74350 + }, + { + "epoch": 0.38385933412788087, + "grad_norm": 21504.71484375, + "learning_rate": 7.944892867735929e-05, + "loss": 0.4423, + "step": 74400 + }, + { + "epoch": 0.3841173041104937, + "grad_norm": 20115.0859375, + "learning_rate": 7.941939903127386e-05, + "loss": 0.4462, + "step": 74450 + }, + { + "epoch": 0.3843752740931065, + "grad_norm": 20473.681640625, + "learning_rate": 7.938985368168293e-05, + "loss": 0.4541, + "step": 74500 + }, + { + "epoch": 0.38463324407571936, + "grad_norm": 19664.6640625, + "learning_rate": 7.93602926443572e-05, + "loss": 0.4439, + "step": 74550 + }, + { + "epoch": 0.38489121405833215, + "grad_norm": 20806.474609375, + "learning_rate": 7.933071593507579e-05, + "loss": 0.439, + "step": 74600 + }, + { + "epoch": 0.385149184040945, + "grad_norm": 20905.197265625, + "learning_rate": 7.930112356962618e-05, + "loss": 0.444, + "step": 74650 + }, + { + "epoch": 0.3854071540235578, + "grad_norm": 26333.470703125, + "learning_rate": 7.927151556380417e-05, + "loss": 0.4462, + "step": 74700 + }, + { + "epoch": 0.38566512400617065, + "grad_norm": 20478.18359375, + "learning_rate": 7.924189193341396e-05, + "loss": 0.4456, + "step": 74750 + }, + { + "epoch": 0.38592309398878344, + "grad_norm": 20605.662109375, + "learning_rate": 7.921225269426808e-05, + "loss": 0.4412, + "step": 74800 + }, + { + "epoch": 0.3861810639713963, + "grad_norm": 23029.943359375, + "learning_rate": 7.918259786218738e-05, + "loss": 0.4427, + "step": 74850 + }, + { + "epoch": 0.38643903395400914, + "grad_norm": 23275.130859375, + "learning_rate": 7.915292745300103e-05, + "loss": 0.4436, + "step": 74900 + }, + { + "epoch": 0.38669700393662193, + "grad_norm": 22123.671875, + "learning_rate": 7.91232414825465e-05, + "loss": 0.4456, + "step": 74950 + }, + { + "epoch": 0.3869549739192348, + "grad_norm": 22476.365234375, + "learning_rate": 7.909353996666961e-05, + "loss": 0.4424, + "step": 75000 + }, + { + "epoch": 0.3869549739192348, + "eval_loss": 0.43277591466903687, + "eval_runtime": 3260.4686, + "eval_samples_per_second": 951.127, + "eval_steps_per_second": 1.858, + "step": 75000 + }, + { + "epoch": 0.3872129439018476, + "grad_norm": 22150.966796875, + "learning_rate": 7.906382292122448e-05, + "loss": 0.4407, + "step": 75050 + }, + { + "epoch": 0.3874709138844604, + "grad_norm": 20100.5625, + "learning_rate": 7.903409036207343e-05, + "loss": 0.4443, + "step": 75100 + }, + { + "epoch": 0.3877288838670732, + "grad_norm": 22078.353515625, + "learning_rate": 7.900434230508715e-05, + "loss": 0.4468, + "step": 75150 + }, + { + "epoch": 0.38798685384968606, + "grad_norm": 20395.498046875, + "learning_rate": 7.897457876614461e-05, + "loss": 0.4424, + "step": 75200 + }, + { + "epoch": 0.38824482383229886, + "grad_norm": 23190.4140625, + "learning_rate": 7.894479976113298e-05, + "loss": 0.4394, + "step": 75250 + }, + { + "epoch": 0.3885027938149117, + "grad_norm": 21523.7265625, + "learning_rate": 7.891500530594771e-05, + "loss": 0.4441, + "step": 75300 + }, + { + "epoch": 0.3887607637975245, + "grad_norm": 22941.23828125, + "learning_rate": 7.888519541649253e-05, + "loss": 0.443, + "step": 75350 + }, + { + "epoch": 0.38901873378013735, + "grad_norm": 21467.90234375, + "learning_rate": 7.885537010867936e-05, + "loss": 0.4478, + "step": 75400 + }, + { + "epoch": 0.38927670376275014, + "grad_norm": 22635.732421875, + "learning_rate": 7.882552939842837e-05, + "loss": 0.4415, + "step": 75450 + }, + { + "epoch": 0.389534673745363, + "grad_norm": 21242.326171875, + "learning_rate": 7.879567330166797e-05, + "loss": 0.4352, + "step": 75500 + }, + { + "epoch": 0.38979264372797584, + "grad_norm": 20005.158203125, + "learning_rate": 7.876580183433475e-05, + "loss": 0.4393, + "step": 75550 + }, + { + "epoch": 0.39005061371058863, + "grad_norm": 23355.044921875, + "learning_rate": 7.873591501237351e-05, + "loss": 0.4465, + "step": 75600 + }, + { + "epoch": 0.3903085836932015, + "grad_norm": 21217.359375, + "learning_rate": 7.870601285173731e-05, + "loss": 0.4437, + "step": 75650 + }, + { + "epoch": 0.3905665536758143, + "grad_norm": 22424.580078125, + "learning_rate": 7.867609536838729e-05, + "loss": 0.4397, + "step": 75700 + }, + { + "epoch": 0.3908245236584271, + "grad_norm": 20943.65234375, + "learning_rate": 7.864616257829285e-05, + "loss": 0.4427, + "step": 75750 + }, + { + "epoch": 0.3910824936410399, + "grad_norm": 23246.5625, + "learning_rate": 7.861621449743152e-05, + "loss": 0.4479, + "step": 75800 + }, + { + "epoch": 0.39134046362365277, + "grad_norm": 21575.830078125, + "learning_rate": 7.858625114178902e-05, + "loss": 0.4384, + "step": 75850 + }, + { + "epoch": 0.39159843360626556, + "grad_norm": 22053.5546875, + "learning_rate": 7.855627252735918e-05, + "loss": 0.4364, + "step": 75900 + }, + { + "epoch": 0.3918564035888784, + "grad_norm": 21934.55078125, + "learning_rate": 7.852627867014406e-05, + "loss": 0.4466, + "step": 75950 + }, + { + "epoch": 0.3921143735714912, + "grad_norm": 20184.078125, + "learning_rate": 7.849626958615374e-05, + "loss": 0.4422, + "step": 76000 + }, + { + "epoch": 0.39237234355410405, + "grad_norm": 21770.923828125, + "learning_rate": 7.846624529140652e-05, + "loss": 0.4382, + "step": 76050 + }, + { + "epoch": 0.39263031353671685, + "grad_norm": 21592.16796875, + "learning_rate": 7.843620580192877e-05, + "loss": 0.4404, + "step": 76100 + }, + { + "epoch": 0.3928882835193297, + "grad_norm": 19634.1875, + "learning_rate": 7.8406151133755e-05, + "loss": 0.4443, + "step": 76150 + }, + { + "epoch": 0.3931462535019425, + "grad_norm": 24045.01171875, + "learning_rate": 7.837608130292782e-05, + "loss": 0.438, + "step": 76200 + }, + { + "epoch": 0.39340422348455534, + "grad_norm": 21739.921875, + "learning_rate": 7.83459963254979e-05, + "loss": 0.4474, + "step": 76250 + }, + { + "epoch": 0.3936621934671682, + "grad_norm": 20915.56640625, + "learning_rate": 7.831589621752405e-05, + "loss": 0.4463, + "step": 76300 + }, + { + "epoch": 0.393920163449781, + "grad_norm": 18799.80078125, + "learning_rate": 7.828578099507308e-05, + "loss": 0.4401, + "step": 76350 + }, + { + "epoch": 0.39417813343239383, + "grad_norm": 19029.51171875, + "learning_rate": 7.825565067421995e-05, + "loss": 0.4428, + "step": 76400 + }, + { + "epoch": 0.3944361034150066, + "grad_norm": 22817.376953125, + "learning_rate": 7.822550527104762e-05, + "loss": 0.4467, + "step": 76450 + }, + { + "epoch": 0.39469407339761947, + "grad_norm": 19165.529296875, + "learning_rate": 7.819534480164713e-05, + "loss": 0.4365, + "step": 76500 + }, + { + "epoch": 0.39495204338023226, + "grad_norm": 22980.056640625, + "learning_rate": 7.816516928211756e-05, + "loss": 0.4386, + "step": 76550 + }, + { + "epoch": 0.3952100133628451, + "grad_norm": 21261.7109375, + "learning_rate": 7.813497872856603e-05, + "loss": 0.4358, + "step": 76600 + }, + { + "epoch": 0.3954679833454579, + "grad_norm": 21533.779296875, + "learning_rate": 7.810477315710763e-05, + "loss": 0.4444, + "step": 76650 + }, + { + "epoch": 0.39572595332807076, + "grad_norm": 20503.556640625, + "learning_rate": 7.807455258386556e-05, + "loss": 0.4446, + "step": 76700 + }, + { + "epoch": 0.39598392331068355, + "grad_norm": 21180.939453125, + "learning_rate": 7.804431702497093e-05, + "loss": 0.4486, + "step": 76750 + }, + { + "epoch": 0.3962418932932964, + "grad_norm": 24126.484375, + "learning_rate": 7.801406649656294e-05, + "loss": 0.4419, + "step": 76800 + }, + { + "epoch": 0.3964998632759092, + "grad_norm": 19791.345703125, + "learning_rate": 7.79838010147887e-05, + "loss": 0.4499, + "step": 76850 + }, + { + "epoch": 0.39675783325852204, + "grad_norm": 21118.822265625, + "learning_rate": 7.795352059580334e-05, + "loss": 0.4403, + "step": 76900 + }, + { + "epoch": 0.39701580324113483, + "grad_norm": 20787.6015625, + "learning_rate": 7.792322525577e-05, + "loss": 0.4394, + "step": 76950 + }, + { + "epoch": 0.3972737732237477, + "grad_norm": 21575.86328125, + "learning_rate": 7.789291501085972e-05, + "loss": 0.4482, + "step": 77000 + }, + { + "epoch": 0.39753174320636053, + "grad_norm": 21271.287109375, + "learning_rate": 7.78625898772515e-05, + "loss": 0.4413, + "step": 77050 + }, + { + "epoch": 0.3977897131889733, + "grad_norm": 21294.7890625, + "learning_rate": 7.783224987113235e-05, + "loss": 0.4393, + "step": 77100 + }, + { + "epoch": 0.3980476831715862, + "grad_norm": 21880.341796875, + "learning_rate": 7.780189500869716e-05, + "loss": 0.4464, + "step": 77150 + }, + { + "epoch": 0.39830565315419897, + "grad_norm": 22501.482421875, + "learning_rate": 7.777152530614876e-05, + "loss": 0.4384, + "step": 77200 + }, + { + "epoch": 0.3985636231368118, + "grad_norm": 20404.89453125, + "learning_rate": 7.774114077969792e-05, + "loss": 0.4355, + "step": 77250 + }, + { + "epoch": 0.3988215931194246, + "grad_norm": 21435.66015625, + "learning_rate": 7.77107414455633e-05, + "loss": 0.4468, + "step": 77300 + }, + { + "epoch": 0.39907956310203746, + "grad_norm": 20239.091796875, + "learning_rate": 7.768032731997148e-05, + "loss": 0.4453, + "step": 77350 + }, + { + "epoch": 0.39933753308465025, + "grad_norm": 19040.37109375, + "learning_rate": 7.764989841915694e-05, + "loss": 0.4487, + "step": 77400 + }, + { + "epoch": 0.3995955030672631, + "grad_norm": 22501.13671875, + "learning_rate": 7.761945475936203e-05, + "loss": 0.4488, + "step": 77450 + }, + { + "epoch": 0.3998534730498759, + "grad_norm": 20773.27734375, + "learning_rate": 7.7588996356837e-05, + "loss": 0.4384, + "step": 77500 + }, + { + "epoch": 0.40011144303248874, + "grad_norm": 22598.4140625, + "learning_rate": 7.755852322783994e-05, + "loss": 0.4358, + "step": 77550 + }, + { + "epoch": 0.40036941301510154, + "grad_norm": 20656.033203125, + "learning_rate": 7.752803538863683e-05, + "loss": 0.4434, + "step": 77600 + }, + { + "epoch": 0.4006273829977144, + "grad_norm": 20882.3125, + "learning_rate": 7.749753285550146e-05, + "loss": 0.4408, + "step": 77650 + }, + { + "epoch": 0.40088535298032724, + "grad_norm": 19519.408203125, + "learning_rate": 7.746701564471553e-05, + "loss": 0.439, + "step": 77700 + }, + { + "epoch": 0.40114332296294003, + "grad_norm": 21141.80859375, + "learning_rate": 7.74364837725685e-05, + "loss": 0.4422, + "step": 77750 + }, + { + "epoch": 0.4014012929455529, + "grad_norm": 21487.45703125, + "learning_rate": 7.74059372553577e-05, + "loss": 0.429, + "step": 77800 + }, + { + "epoch": 0.4016592629281657, + "grad_norm": 19889.447265625, + "learning_rate": 7.737537610938829e-05, + "loss": 0.4474, + "step": 77850 + }, + { + "epoch": 0.4019172329107785, + "grad_norm": 21914.947265625, + "learning_rate": 7.73448003509732e-05, + "loss": 0.4403, + "step": 77900 + }, + { + "epoch": 0.4021752028933913, + "grad_norm": 24025.521484375, + "learning_rate": 7.731420999643319e-05, + "loss": 0.4432, + "step": 77950 + }, + { + "epoch": 0.40243317287600416, + "grad_norm": 19703.50390625, + "learning_rate": 7.728360506209679e-05, + "loss": 0.443, + "step": 78000 + }, + { + "epoch": 0.40269114285861696, + "grad_norm": 21566.37890625, + "learning_rate": 7.725298556430034e-05, + "loss": 0.448, + "step": 78050 + }, + { + "epoch": 0.4029491128412298, + "grad_norm": 21902.564453125, + "learning_rate": 7.72223515193879e-05, + "loss": 0.438, + "step": 78100 + }, + { + "epoch": 0.4032070828238426, + "grad_norm": 20892.7578125, + "learning_rate": 7.719170294371136e-05, + "loss": 0.4382, + "step": 78150 + }, + { + "epoch": 0.40346505280645545, + "grad_norm": 21648.673828125, + "learning_rate": 7.716103985363033e-05, + "loss": 0.4378, + "step": 78200 + }, + { + "epoch": 0.40372302278906824, + "grad_norm": 23124.40625, + "learning_rate": 7.713036226551215e-05, + "loss": 0.442, + "step": 78250 + }, + { + "epoch": 0.4039809927716811, + "grad_norm": 25006.751953125, + "learning_rate": 7.709967019573195e-05, + "loss": 0.4397, + "step": 78300 + }, + { + "epoch": 0.4042389627542939, + "grad_norm": 20722.802734375, + "learning_rate": 7.706896366067256e-05, + "loss": 0.4388, + "step": 78350 + }, + { + "epoch": 0.40449693273690673, + "grad_norm": 20202.013671875, + "learning_rate": 7.703824267672452e-05, + "loss": 0.4404, + "step": 78400 + }, + { + "epoch": 0.4047549027195196, + "grad_norm": 21261.9375, + "learning_rate": 7.700750726028609e-05, + "loss": 0.4369, + "step": 78450 + }, + { + "epoch": 0.4050128727021324, + "grad_norm": 25343.57421875, + "learning_rate": 7.69767574277633e-05, + "loss": 0.4444, + "step": 78500 + }, + { + "epoch": 0.4052708426847452, + "grad_norm": 20222.767578125, + "learning_rate": 7.694599319556972e-05, + "loss": 0.4425, + "step": 78550 + }, + { + "epoch": 0.405528812667358, + "grad_norm": 22934.466796875, + "learning_rate": 7.691521458012678e-05, + "loss": 0.4411, + "step": 78600 + }, + { + "epoch": 0.40578678264997087, + "grad_norm": 22235.30078125, + "learning_rate": 7.688442159786346e-05, + "loss": 0.4445, + "step": 78650 + }, + { + "epoch": 0.40604475263258366, + "grad_norm": 21313.986328125, + "learning_rate": 7.68536142652165e-05, + "loss": 0.4341, + "step": 78700 + }, + { + "epoch": 0.4063027226151965, + "grad_norm": 20130.53515625, + "learning_rate": 7.68227925986302e-05, + "loss": 0.4395, + "step": 78750 + }, + { + "epoch": 0.4065606925978093, + "grad_norm": 19342.740234375, + "learning_rate": 7.679195661455664e-05, + "loss": 0.4424, + "step": 78800 + }, + { + "epoch": 0.40681866258042215, + "grad_norm": 21876.705078125, + "learning_rate": 7.676110632945543e-05, + "loss": 0.4415, + "step": 78850 + }, + { + "epoch": 0.40707663256303495, + "grad_norm": 23199.501953125, + "learning_rate": 7.673024175979384e-05, + "loss": 0.4423, + "step": 78900 + }, + { + "epoch": 0.4073346025456478, + "grad_norm": 22781.091796875, + "learning_rate": 7.669936292204683e-05, + "loss": 0.4398, + "step": 78950 + }, + { + "epoch": 0.4075925725282606, + "grad_norm": 24025.9375, + "learning_rate": 7.666846983269688e-05, + "loss": 0.4326, + "step": 79000 + }, + { + "epoch": 0.40785054251087344, + "grad_norm": 20797.056640625, + "learning_rate": 7.663756250823413e-05, + "loss": 0.4388, + "step": 79050 + }, + { + "epoch": 0.40810851249348623, + "grad_norm": 25106.67578125, + "learning_rate": 7.660664096515632e-05, + "loss": 0.4385, + "step": 79100 + }, + { + "epoch": 0.4083664824760991, + "grad_norm": 22217.36328125, + "learning_rate": 7.657570521996877e-05, + "loss": 0.4455, + "step": 79150 + }, + { + "epoch": 0.40862445245871193, + "grad_norm": 21679.291015625, + "learning_rate": 7.654475528918439e-05, + "loss": 0.4409, + "step": 79200 + }, + { + "epoch": 0.4088824224413247, + "grad_norm": 20133.583984375, + "learning_rate": 7.651379118932364e-05, + "loss": 0.4391, + "step": 79250 + }, + { + "epoch": 0.40914039242393757, + "grad_norm": 23019.171875, + "learning_rate": 7.648281293691457e-05, + "loss": 0.446, + "step": 79300 + }, + { + "epoch": 0.40939836240655036, + "grad_norm": 24098.38671875, + "learning_rate": 7.645182054849276e-05, + "loss": 0.4417, + "step": 79350 + }, + { + "epoch": 0.4096563323891632, + "grad_norm": 23057.240234375, + "learning_rate": 7.642081404060136e-05, + "loss": 0.4424, + "step": 79400 + }, + { + "epoch": 0.409914302371776, + "grad_norm": 20033.328125, + "learning_rate": 7.638979342979103e-05, + "loss": 0.4386, + "step": 79450 + }, + { + "epoch": 0.41017227235438886, + "grad_norm": 20978.68359375, + "learning_rate": 7.635875873261995e-05, + "loss": 0.4363, + "step": 79500 + }, + { + "epoch": 0.41043024233700165, + "grad_norm": 21347.068359375, + "learning_rate": 7.63277099656539e-05, + "loss": 0.4431, + "step": 79550 + }, + { + "epoch": 0.4106882123196145, + "grad_norm": 22031.8125, + "learning_rate": 7.629664714546604e-05, + "loss": 0.4313, + "step": 79600 + }, + { + "epoch": 0.4109461823022273, + "grad_norm": 23963.99609375, + "learning_rate": 7.626557028863717e-05, + "loss": 0.4363, + "step": 79650 + }, + { + "epoch": 0.41120415228484014, + "grad_norm": 20183.259765625, + "learning_rate": 7.623447941175548e-05, + "loss": 0.4419, + "step": 79700 + }, + { + "epoch": 0.41146212226745293, + "grad_norm": 23588.68359375, + "learning_rate": 7.620337453141667e-05, + "loss": 0.4388, + "step": 79750 + }, + { + "epoch": 0.4117200922500658, + "grad_norm": 22210.7265625, + "learning_rate": 7.617225566422395e-05, + "loss": 0.442, + "step": 79800 + }, + { + "epoch": 0.41197806223267863, + "grad_norm": 18647.93359375, + "learning_rate": 7.614112282678794e-05, + "loss": 0.4349, + "step": 79850 + }, + { + "epoch": 0.4122360322152914, + "grad_norm": 20993.388671875, + "learning_rate": 7.610997603572675e-05, + "loss": 0.4386, + "step": 79900 + }, + { + "epoch": 0.4124940021979043, + "grad_norm": 23693.26171875, + "learning_rate": 7.607881530766596e-05, + "loss": 0.4385, + "step": 79950 + }, + { + "epoch": 0.41275197218051707, + "grad_norm": 22608.26953125, + "learning_rate": 7.604764065923852e-05, + "loss": 0.4415, + "step": 80000 + }, + { + "epoch": 0.41275197218051707, + "eval_loss": 0.4290848970413208, + "eval_runtime": 3332.9887, + "eval_samples_per_second": 930.432, + "eval_steps_per_second": 1.817, + "step": 80000 + }, + { + "epoch": 0.4130099421631299, + "grad_norm": 23348.44921875, + "learning_rate": 7.60164521070849e-05, + "loss": 0.4392, + "step": 80050 + }, + { + "epoch": 0.4132679121457427, + "grad_norm": 19942.9921875, + "learning_rate": 7.598524966785293e-05, + "loss": 0.4362, + "step": 80100 + }, + { + "epoch": 0.41352588212835556, + "grad_norm": 22776.587890625, + "learning_rate": 7.595403335819786e-05, + "loss": 0.4402, + "step": 80150 + }, + { + "epoch": 0.41378385211096835, + "grad_norm": 22519.923828125, + "learning_rate": 7.592280319478233e-05, + "loss": 0.4412, + "step": 80200 + }, + { + "epoch": 0.4140418220935812, + "grad_norm": 22480.52734375, + "learning_rate": 7.589155919427645e-05, + "loss": 0.4393, + "step": 80250 + }, + { + "epoch": 0.414299792076194, + "grad_norm": 20900.625, + "learning_rate": 7.586030137335762e-05, + "loss": 0.4344, + "step": 80300 + }, + { + "epoch": 0.41455776205880684, + "grad_norm": 21272.306640625, + "learning_rate": 7.582902974871069e-05, + "loss": 0.4385, + "step": 80350 + }, + { + "epoch": 0.41481573204141964, + "grad_norm": 21448.478515625, + "learning_rate": 7.57977443370278e-05, + "loss": 0.4395, + "step": 80400 + }, + { + "epoch": 0.4150737020240325, + "grad_norm": 21854.537109375, + "learning_rate": 7.576644515500855e-05, + "loss": 0.4411, + "step": 80450 + }, + { + "epoch": 0.4153316720066453, + "grad_norm": 21458.689453125, + "learning_rate": 7.573513221935979e-05, + "loss": 0.4429, + "step": 80500 + }, + { + "epoch": 0.41558964198925813, + "grad_norm": 21895.71875, + "learning_rate": 7.57038055467958e-05, + "loss": 0.4391, + "step": 80550 + }, + { + "epoch": 0.415847611971871, + "grad_norm": 23495.921875, + "learning_rate": 7.567246515403812e-05, + "loss": 0.4398, + "step": 80600 + }, + { + "epoch": 0.41610558195448377, + "grad_norm": 26117.8671875, + "learning_rate": 7.564111105781568e-05, + "loss": 0.4407, + "step": 80650 + }, + { + "epoch": 0.4163635519370966, + "grad_norm": 21881.818359375, + "learning_rate": 7.560974327486466e-05, + "loss": 0.4336, + "step": 80700 + }, + { + "epoch": 0.4166215219197094, + "grad_norm": 21309.1015625, + "learning_rate": 7.557836182192859e-05, + "loss": 0.4371, + "step": 80750 + }, + { + "epoch": 0.41687949190232226, + "grad_norm": 21723.498046875, + "learning_rate": 7.554696671575826e-05, + "loss": 0.4384, + "step": 80800 + }, + { + "epoch": 0.41713746188493506, + "grad_norm": 19767.9609375, + "learning_rate": 7.55155579731118e-05, + "loss": 0.4375, + "step": 80850 + }, + { + "epoch": 0.4173954318675479, + "grad_norm": 18992.958984375, + "learning_rate": 7.548413561075456e-05, + "loss": 0.4419, + "step": 80900 + }, + { + "epoch": 0.4176534018501607, + "grad_norm": 21593.255859375, + "learning_rate": 7.545269964545921e-05, + "loss": 0.4372, + "step": 80950 + }, + { + "epoch": 0.41791137183277355, + "grad_norm": 19369.3125, + "learning_rate": 7.542125009400565e-05, + "loss": 0.4402, + "step": 81000 + }, + { + "epoch": 0.41816934181538634, + "grad_norm": 20552.06640625, + "learning_rate": 7.538978697318105e-05, + "loss": 0.4418, + "step": 81050 + }, + { + "epoch": 0.4184273117979992, + "grad_norm": 21554.94140625, + "learning_rate": 7.53583102997798e-05, + "loss": 0.4406, + "step": 81100 + }, + { + "epoch": 0.418685281780612, + "grad_norm": 21098.296875, + "learning_rate": 7.532682009060356e-05, + "loss": 0.443, + "step": 81150 + }, + { + "epoch": 0.41894325176322483, + "grad_norm": 24148.71484375, + "learning_rate": 7.529531636246116e-05, + "loss": 0.4345, + "step": 81200 + }, + { + "epoch": 0.4192012217458376, + "grad_norm": 20404.298828125, + "learning_rate": 7.526379913216872e-05, + "loss": 0.4335, + "step": 81250 + }, + { + "epoch": 0.4194591917284505, + "grad_norm": 22061.607421875, + "learning_rate": 7.52322684165495e-05, + "loss": 0.4385, + "step": 81300 + }, + { + "epoch": 0.4197171617110633, + "grad_norm": 18455.380859375, + "learning_rate": 7.520072423243398e-05, + "loss": 0.4337, + "step": 81350 + }, + { + "epoch": 0.4199751316936761, + "grad_norm": 23344.2734375, + "learning_rate": 7.516916659665987e-05, + "loss": 0.4401, + "step": 81400 + }, + { + "epoch": 0.42023310167628897, + "grad_norm": 20872.77734375, + "learning_rate": 7.5137595526072e-05, + "loss": 0.4394, + "step": 81450 + }, + { + "epoch": 0.42049107165890176, + "grad_norm": 21003.841796875, + "learning_rate": 7.51060110375224e-05, + "loss": 0.4402, + "step": 81500 + }, + { + "epoch": 0.4207490416415146, + "grad_norm": 22772.330078125, + "learning_rate": 7.507441314787025e-05, + "loss": 0.4438, + "step": 81550 + }, + { + "epoch": 0.4210070116241274, + "grad_norm": 19593.216796875, + "learning_rate": 7.504280187398189e-05, + "loss": 0.4375, + "step": 81600 + }, + { + "epoch": 0.42126498160674025, + "grad_norm": 20914.66796875, + "learning_rate": 7.501117723273084e-05, + "loss": 0.4397, + "step": 81650 + }, + { + "epoch": 0.42152295158935305, + "grad_norm": 20479.12109375, + "learning_rate": 7.497953924099768e-05, + "loss": 0.4365, + "step": 81700 + }, + { + "epoch": 0.4217809215719659, + "grad_norm": 20309.25, + "learning_rate": 7.494788791567017e-05, + "loss": 0.4461, + "step": 81750 + }, + { + "epoch": 0.4220388915545787, + "grad_norm": 21467.72265625, + "learning_rate": 7.491622327364318e-05, + "loss": 0.4354, + "step": 81800 + }, + { + "epoch": 0.42229686153719154, + "grad_norm": 20826.80859375, + "learning_rate": 7.488454533181871e-05, + "loss": 0.4398, + "step": 81850 + }, + { + "epoch": 0.42255483151980433, + "grad_norm": 20537.826171875, + "learning_rate": 7.485285410710577e-05, + "loss": 0.4443, + "step": 81900 + }, + { + "epoch": 0.4228128015024172, + "grad_norm": 19521.810546875, + "learning_rate": 7.482114961642057e-05, + "loss": 0.4379, + "step": 81950 + }, + { + "epoch": 0.42307077148503003, + "grad_norm": 19407.5234375, + "learning_rate": 7.478943187668633e-05, + "loss": 0.4429, + "step": 82000 + }, + { + "epoch": 0.4233287414676428, + "grad_norm": 23058.337890625, + "learning_rate": 7.475770090483338e-05, + "loss": 0.4362, + "step": 82050 + }, + { + "epoch": 0.42358671145025567, + "grad_norm": 27362.29296875, + "learning_rate": 7.472595671779907e-05, + "loss": 0.4413, + "step": 82100 + }, + { + "epoch": 0.42384468143286846, + "grad_norm": 20389.08203125, + "learning_rate": 7.469419933252789e-05, + "loss": 0.4386, + "step": 82150 + }, + { + "epoch": 0.4241026514154813, + "grad_norm": 21554.896484375, + "learning_rate": 7.466242876597125e-05, + "loss": 0.4387, + "step": 82200 + }, + { + "epoch": 0.4243606213980941, + "grad_norm": 23449.822265625, + "learning_rate": 7.463064503508772e-05, + "loss": 0.4402, + "step": 82250 + }, + { + "epoch": 0.42461859138070696, + "grad_norm": 23945.1328125, + "learning_rate": 7.459884815684279e-05, + "loss": 0.4393, + "step": 82300 + }, + { + "epoch": 0.42487656136331975, + "grad_norm": 21705.064453125, + "learning_rate": 7.456703814820904e-05, + "loss": 0.4374, + "step": 82350 + }, + { + "epoch": 0.4251345313459326, + "grad_norm": 20050.66796875, + "learning_rate": 7.453521502616607e-05, + "loss": 0.4433, + "step": 82400 + }, + { + "epoch": 0.4253925013285454, + "grad_norm": 24757.845703125, + "learning_rate": 7.45033788077004e-05, + "loss": 0.4362, + "step": 82450 + }, + { + "epoch": 0.42565047131115824, + "grad_norm": 21754.42578125, + "learning_rate": 7.44715295098056e-05, + "loss": 0.4386, + "step": 82500 + }, + { + "epoch": 0.42590844129377103, + "grad_norm": 22891.12890625, + "learning_rate": 7.443966714948222e-05, + "loss": 0.4438, + "step": 82550 + }, + { + "epoch": 0.4261664112763839, + "grad_norm": 22174.580078125, + "learning_rate": 7.440779174373776e-05, + "loss": 0.4388, + "step": 82600 + }, + { + "epoch": 0.4264243812589967, + "grad_norm": 20407.677734375, + "learning_rate": 7.43759033095867e-05, + "loss": 0.4412, + "step": 82650 + }, + { + "epoch": 0.4266823512416095, + "grad_norm": 21960.552734375, + "learning_rate": 7.434400186405045e-05, + "loss": 0.4394, + "step": 82700 + }, + { + "epoch": 0.4269403212242224, + "grad_norm": 20736.583984375, + "learning_rate": 7.431208742415741e-05, + "loss": 0.4382, + "step": 82750 + }, + { + "epoch": 0.42719829120683517, + "grad_norm": 21133.63671875, + "learning_rate": 7.428016000694286e-05, + "loss": 0.4379, + "step": 82800 + }, + { + "epoch": 0.427456261189448, + "grad_norm": 23741.525390625, + "learning_rate": 7.424821962944908e-05, + "loss": 0.4398, + "step": 82850 + }, + { + "epoch": 0.4277142311720608, + "grad_norm": 21936.802734375, + "learning_rate": 7.42162663087252e-05, + "loss": 0.4383, + "step": 82900 + }, + { + "epoch": 0.42797220115467366, + "grad_norm": 24459.85546875, + "learning_rate": 7.418430006182727e-05, + "loss": 0.4393, + "step": 82950 + }, + { + "epoch": 0.42823017113728645, + "grad_norm": 21729.9921875, + "learning_rate": 7.415232090581828e-05, + "loss": 0.4421, + "step": 83000 + }, + { + "epoch": 0.4284881411198993, + "grad_norm": 21081.5703125, + "learning_rate": 7.412032885776807e-05, + "loss": 0.4414, + "step": 83050 + }, + { + "epoch": 0.4287461111025121, + "grad_norm": 20296.740234375, + "learning_rate": 7.408832393475338e-05, + "loss": 0.4316, + "step": 83100 + }, + { + "epoch": 0.42900408108512494, + "grad_norm": 20874.30078125, + "learning_rate": 7.405630615385781e-05, + "loss": 0.433, + "step": 83150 + }, + { + "epoch": 0.42926205106773774, + "grad_norm": 20673.11328125, + "learning_rate": 7.402427553217183e-05, + "loss": 0.4386, + "step": 83200 + }, + { + "epoch": 0.4295200210503506, + "grad_norm": 22462.07421875, + "learning_rate": 7.39922320867928e-05, + "loss": 0.4464, + "step": 83250 + }, + { + "epoch": 0.4297779910329634, + "grad_norm": 20411.771484375, + "learning_rate": 7.396017583482487e-05, + "loss": 0.444, + "step": 83300 + }, + { + "epoch": 0.43003596101557623, + "grad_norm": 21137.6953125, + "learning_rate": 7.392810679337902e-05, + "loss": 0.4416, + "step": 83350 + }, + { + "epoch": 0.4302939309981891, + "grad_norm": 23059.064453125, + "learning_rate": 7.38960249795731e-05, + "loss": 0.4401, + "step": 83400 + }, + { + "epoch": 0.43055190098080187, + "grad_norm": 20305.22265625, + "learning_rate": 7.386393041053176e-05, + "loss": 0.4399, + "step": 83450 + }, + { + "epoch": 0.4308098709634147, + "grad_norm": 22247.779296875, + "learning_rate": 7.38318231033865e-05, + "loss": 0.4362, + "step": 83500 + }, + { + "epoch": 0.4310678409460275, + "grad_norm": 22231.337890625, + "learning_rate": 7.379970307527552e-05, + "loss": 0.4417, + "step": 83550 + }, + { + "epoch": 0.43132581092864036, + "grad_norm": 21788.875, + "learning_rate": 7.376757034334388e-05, + "loss": 0.4374, + "step": 83600 + }, + { + "epoch": 0.43158378091125316, + "grad_norm": 22237.51953125, + "learning_rate": 7.373542492474343e-05, + "loss": 0.4372, + "step": 83650 + }, + { + "epoch": 0.431841750893866, + "grad_norm": 21732.943359375, + "learning_rate": 7.370326683663278e-05, + "loss": 0.4395, + "step": 83700 + }, + { + "epoch": 0.4320997208764788, + "grad_norm": 19517.212890625, + "learning_rate": 7.367109609617729e-05, + "loss": 0.4371, + "step": 83750 + }, + { + "epoch": 0.43235769085909165, + "grad_norm": 23681.388671875, + "learning_rate": 7.363891272054903e-05, + "loss": 0.4383, + "step": 83800 + }, + { + "epoch": 0.43261566084170444, + "grad_norm": 23889.822265625, + "learning_rate": 7.360671672692691e-05, + "loss": 0.441, + "step": 83850 + }, + { + "epoch": 0.4328736308243173, + "grad_norm": 21159.45703125, + "learning_rate": 7.357450813249654e-05, + "loss": 0.4328, + "step": 83900 + }, + { + "epoch": 0.4331316008069301, + "grad_norm": 20617.83984375, + "learning_rate": 7.354228695445023e-05, + "loss": 0.4395, + "step": 83950 + }, + { + "epoch": 0.43338957078954293, + "grad_norm": 19741.568359375, + "learning_rate": 7.351005320998699e-05, + "loss": 0.4356, + "step": 84000 + }, + { + "epoch": 0.4336475407721557, + "grad_norm": 21407.771484375, + "learning_rate": 7.347780691631259e-05, + "loss": 0.4322, + "step": 84050 + }, + { + "epoch": 0.4339055107547686, + "grad_norm": 22396.5625, + "learning_rate": 7.344554809063947e-05, + "loss": 0.4379, + "step": 84100 + }, + { + "epoch": 0.4341634807373814, + "grad_norm": 23536.361328125, + "learning_rate": 7.34132767501868e-05, + "loss": 0.4372, + "step": 84150 + }, + { + "epoch": 0.4344214507199942, + "grad_norm": 23622.90234375, + "learning_rate": 7.338099291218036e-05, + "loss": 0.4361, + "step": 84200 + }, + { + "epoch": 0.43467942070260707, + "grad_norm": 24463.931640625, + "learning_rate": 7.334869659385264e-05, + "loss": 0.4478, + "step": 84250 + }, + { + "epoch": 0.43493739068521986, + "grad_norm": 21666.328125, + "learning_rate": 7.331638781244283e-05, + "loss": 0.4387, + "step": 84300 + }, + { + "epoch": 0.4351953606678327, + "grad_norm": 21145.6875, + "learning_rate": 7.328406658519669e-05, + "loss": 0.4362, + "step": 84350 + }, + { + "epoch": 0.4354533306504455, + "grad_norm": 21766.228515625, + "learning_rate": 7.325173292936667e-05, + "loss": 0.4433, + "step": 84400 + }, + { + "epoch": 0.43571130063305835, + "grad_norm": 23118.056640625, + "learning_rate": 7.321938686221185e-05, + "loss": 0.4317, + "step": 84450 + }, + { + "epoch": 0.43596927061567115, + "grad_norm": 20925.833984375, + "learning_rate": 7.318702840099793e-05, + "loss": 0.4348, + "step": 84500 + }, + { + "epoch": 0.436227240598284, + "grad_norm": 21725.630859375, + "learning_rate": 7.315465756299727e-05, + "loss": 0.4363, + "step": 84550 + }, + { + "epoch": 0.4364852105808968, + "grad_norm": 20223.537109375, + "learning_rate": 7.312227436548875e-05, + "loss": 0.4363, + "step": 84600 + }, + { + "epoch": 0.43674318056350964, + "grad_norm": 22766.71484375, + "learning_rate": 7.308987882575793e-05, + "loss": 0.442, + "step": 84650 + }, + { + "epoch": 0.43700115054612243, + "grad_norm": 20453.341796875, + "learning_rate": 7.305747096109688e-05, + "loss": 0.4362, + "step": 84700 + }, + { + "epoch": 0.4372591205287353, + "grad_norm": 20761.466796875, + "learning_rate": 7.302505078880431e-05, + "loss": 0.435, + "step": 84750 + }, + { + "epoch": 0.4375170905113481, + "grad_norm": 20815.27734375, + "learning_rate": 7.299261832618551e-05, + "loss": 0.4398, + "step": 84800 + }, + { + "epoch": 0.4377750604939609, + "grad_norm": 22528.06640625, + "learning_rate": 7.296017359055224e-05, + "loss": 0.44, + "step": 84850 + }, + { + "epoch": 0.43803303047657377, + "grad_norm": 21391.71484375, + "learning_rate": 7.292771659922293e-05, + "loss": 0.4376, + "step": 84900 + }, + { + "epoch": 0.43829100045918656, + "grad_norm": 21485.966796875, + "learning_rate": 7.289524736952245e-05, + "loss": 0.4424, + "step": 84950 + }, + { + "epoch": 0.4385489704417994, + "grad_norm": 21160.314453125, + "learning_rate": 7.286276591878228e-05, + "loss": 0.4473, + "step": 85000 + }, + { + "epoch": 0.4385489704417994, + "eval_loss": 0.4252757728099823, + "eval_runtime": 3252.991, + "eval_samples_per_second": 953.313, + "eval_steps_per_second": 1.862, + "step": 85000 + }, + { + "epoch": 0.4388069404244122, + "grad_norm": 29667.109375, + "learning_rate": 7.283027226434036e-05, + "loss": 0.4414, + "step": 85050 + }, + { + "epoch": 0.43906491040702506, + "grad_norm": 24990.86328125, + "learning_rate": 7.27977664235412e-05, + "loss": 0.4321, + "step": 85100 + }, + { + "epoch": 0.43932288038963785, + "grad_norm": 21708.86328125, + "learning_rate": 7.276524841373576e-05, + "loss": 0.4331, + "step": 85150 + }, + { + "epoch": 0.4395808503722507, + "grad_norm": 22323.1015625, + "learning_rate": 7.273271825228157e-05, + "loss": 0.4372, + "step": 85200 + }, + { + "epoch": 0.4398388203548635, + "grad_norm": 21696.2734375, + "learning_rate": 7.270017595654255e-05, + "loss": 0.4271, + "step": 85250 + }, + { + "epoch": 0.44009679033747634, + "grad_norm": 23364.560546875, + "learning_rate": 7.266762154388917e-05, + "loss": 0.4327, + "step": 85300 + }, + { + "epoch": 0.44035476032008913, + "grad_norm": 21834.607421875, + "learning_rate": 7.263505503169834e-05, + "loss": 0.4337, + "step": 85350 + }, + { + "epoch": 0.440612730302702, + "grad_norm": 18636.244140625, + "learning_rate": 7.260247643735343e-05, + "loss": 0.4393, + "step": 85400 + }, + { + "epoch": 0.4408707002853148, + "grad_norm": 20385.875, + "learning_rate": 7.256988577824427e-05, + "loss": 0.4398, + "step": 85450 + }, + { + "epoch": 0.4411286702679276, + "grad_norm": 21459.576171875, + "learning_rate": 7.253728307176713e-05, + "loss": 0.435, + "step": 85500 + }, + { + "epoch": 0.4413866402505405, + "grad_norm": 22838.716796875, + "learning_rate": 7.25046683353247e-05, + "loss": 0.4368, + "step": 85550 + }, + { + "epoch": 0.44164461023315327, + "grad_norm": 23016.4140625, + "learning_rate": 7.247204158632608e-05, + "loss": 0.4353, + "step": 85600 + }, + { + "epoch": 0.4419025802157661, + "grad_norm": 22318.193359375, + "learning_rate": 7.243940284218682e-05, + "loss": 0.4374, + "step": 85650 + }, + { + "epoch": 0.4421605501983789, + "grad_norm": 20475.376953125, + "learning_rate": 7.240675212032884e-05, + "loss": 0.4339, + "step": 85700 + }, + { + "epoch": 0.44241852018099176, + "grad_norm": 22276.287109375, + "learning_rate": 7.237408943818042e-05, + "loss": 0.4275, + "step": 85750 + }, + { + "epoch": 0.44267649016360455, + "grad_norm": 22131.654296875, + "learning_rate": 7.234141481317634e-05, + "loss": 0.4373, + "step": 85800 + }, + { + "epoch": 0.4429344601462174, + "grad_norm": 24779.14453125, + "learning_rate": 7.230872826275765e-05, + "loss": 0.4347, + "step": 85850 + }, + { + "epoch": 0.4431924301288302, + "grad_norm": 22474.443359375, + "learning_rate": 7.227602980437179e-05, + "loss": 0.4341, + "step": 85900 + }, + { + "epoch": 0.44345040011144304, + "grad_norm": 21620.056640625, + "learning_rate": 7.224331945547258e-05, + "loss": 0.4399, + "step": 85950 + }, + { + "epoch": 0.44370837009405584, + "grad_norm": 21546.8046875, + "learning_rate": 7.221059723352014e-05, + "loss": 0.4437, + "step": 86000 + }, + { + "epoch": 0.4439663400766687, + "grad_norm": 22283.0078125, + "learning_rate": 7.2177863155981e-05, + "loss": 0.4403, + "step": 86050 + }, + { + "epoch": 0.4442243100592815, + "grad_norm": 21332.576171875, + "learning_rate": 7.214511724032795e-05, + "loss": 0.4369, + "step": 86100 + }, + { + "epoch": 0.44448228004189433, + "grad_norm": 23106.01953125, + "learning_rate": 7.211235950404013e-05, + "loss": 0.4369, + "step": 86150 + }, + { + "epoch": 0.4447402500245071, + "grad_norm": 21826.2734375, + "learning_rate": 7.207958996460298e-05, + "loss": 0.4407, + "step": 86200 + }, + { + "epoch": 0.44499822000711997, + "grad_norm": 22308.90625, + "learning_rate": 7.204680863950825e-05, + "loss": 0.4349, + "step": 86250 + }, + { + "epoch": 0.4452561899897328, + "grad_norm": 24916.359375, + "learning_rate": 7.2014015546254e-05, + "loss": 0.436, + "step": 86300 + }, + { + "epoch": 0.4455141599723456, + "grad_norm": 22585.77734375, + "learning_rate": 7.198121070234453e-05, + "loss": 0.4311, + "step": 86350 + }, + { + "epoch": 0.44577212995495846, + "grad_norm": 22984.658203125, + "learning_rate": 7.194839412529042e-05, + "loss": 0.4324, + "step": 86400 + }, + { + "epoch": 0.44603009993757126, + "grad_norm": 22495.552734375, + "learning_rate": 7.191556583260853e-05, + "loss": 0.4306, + "step": 86450 + }, + { + "epoch": 0.4462880699201841, + "grad_norm": 21413.2578125, + "learning_rate": 7.188272584182196e-05, + "loss": 0.4404, + "step": 86500 + }, + { + "epoch": 0.4465460399027969, + "grad_norm": 23719.43359375, + "learning_rate": 7.184987417046007e-05, + "loss": 0.4321, + "step": 86550 + }, + { + "epoch": 0.44680400988540975, + "grad_norm": 22586.095703125, + "learning_rate": 7.181701083605846e-05, + "loss": 0.4349, + "step": 86600 + }, + { + "epoch": 0.44706197986802254, + "grad_norm": 20580.166015625, + "learning_rate": 7.178413585615891e-05, + "loss": 0.4323, + "step": 86650 + }, + { + "epoch": 0.4473199498506354, + "grad_norm": 21345.71875, + "learning_rate": 7.175124924830948e-05, + "loss": 0.4326, + "step": 86700 + }, + { + "epoch": 0.4475779198332482, + "grad_norm": 20615.333984375, + "learning_rate": 7.171835103006438e-05, + "loss": 0.4425, + "step": 86750 + }, + { + "epoch": 0.44783588981586103, + "grad_norm": 25518.546875, + "learning_rate": 7.168544121898407e-05, + "loss": 0.4307, + "step": 86800 + }, + { + "epoch": 0.4480938597984738, + "grad_norm": 23149.703125, + "learning_rate": 7.165251983263512e-05, + "loss": 0.4336, + "step": 86850 + }, + { + "epoch": 0.4483518297810867, + "grad_norm": 22026.19140625, + "learning_rate": 7.16195868885904e-05, + "loss": 0.4401, + "step": 86900 + }, + { + "epoch": 0.44860979976369947, + "grad_norm": 21140.90234375, + "learning_rate": 7.158664240442881e-05, + "loss": 0.436, + "step": 86950 + }, + { + "epoch": 0.4488677697463123, + "grad_norm": 25489.1796875, + "learning_rate": 7.155368639773552e-05, + "loss": 0.4379, + "step": 87000 + }, + { + "epoch": 0.44912573972892517, + "grad_norm": 21035.275390625, + "learning_rate": 7.152071888610176e-05, + "loss": 0.433, + "step": 87050 + }, + { + "epoch": 0.44938370971153796, + "grad_norm": 25905.03515625, + "learning_rate": 7.148773988712503e-05, + "loss": 0.4423, + "step": 87100 + }, + { + "epoch": 0.4496416796941508, + "grad_norm": 21237.857421875, + "learning_rate": 7.14547494184088e-05, + "loss": 0.4346, + "step": 87150 + }, + { + "epoch": 0.4498996496767636, + "grad_norm": 19255.748046875, + "learning_rate": 7.14217474975628e-05, + "loss": 0.4333, + "step": 87200 + }, + { + "epoch": 0.45015761965937645, + "grad_norm": 22115.05078125, + "learning_rate": 7.138873414220277e-05, + "loss": 0.4371, + "step": 87250 + }, + { + "epoch": 0.45041558964198924, + "grad_norm": 23271.462890625, + "learning_rate": 7.135570936995064e-05, + "loss": 0.4362, + "step": 87300 + }, + { + "epoch": 0.4506735596246021, + "grad_norm": 24245.02734375, + "learning_rate": 7.132267319843438e-05, + "loss": 0.4371, + "step": 87350 + }, + { + "epoch": 0.4509315296072149, + "grad_norm": 22234.224609375, + "learning_rate": 7.128962564528805e-05, + "loss": 0.4306, + "step": 87400 + }, + { + "epoch": 0.45118949958982774, + "grad_norm": 22704.115234375, + "learning_rate": 7.12565667281518e-05, + "loss": 0.4408, + "step": 87450 + }, + { + "epoch": 0.45144746957244053, + "grad_norm": 21906.650390625, + "learning_rate": 7.122349646467183e-05, + "loss": 0.4322, + "step": 87500 + }, + { + "epoch": 0.4517054395550534, + "grad_norm": 21960.501953125, + "learning_rate": 7.119041487250045e-05, + "loss": 0.4322, + "step": 87550 + }, + { + "epoch": 0.45196340953766617, + "grad_norm": 20264.14453125, + "learning_rate": 7.11573219692959e-05, + "loss": 0.4403, + "step": 87600 + }, + { + "epoch": 0.452221379520279, + "grad_norm": 20237.078125, + "learning_rate": 7.112421777272259e-05, + "loss": 0.4421, + "step": 87650 + }, + { + "epoch": 0.45247934950289187, + "grad_norm": 22111.3203125, + "learning_rate": 7.109110230045087e-05, + "loss": 0.4386, + "step": 87700 + }, + { + "epoch": 0.45273731948550466, + "grad_norm": 20690.015625, + "learning_rate": 7.105797557015715e-05, + "loss": 0.4315, + "step": 87750 + }, + { + "epoch": 0.4529952894681175, + "grad_norm": 23273.888671875, + "learning_rate": 7.102483759952384e-05, + "loss": 0.4397, + "step": 87800 + }, + { + "epoch": 0.4532532594507303, + "grad_norm": 20268.541015625, + "learning_rate": 7.099168840623935e-05, + "loss": 0.4381, + "step": 87850 + }, + { + "epoch": 0.45351122943334315, + "grad_norm": 21591.724609375, + "learning_rate": 7.095852800799806e-05, + "loss": 0.4368, + "step": 87900 + }, + { + "epoch": 0.45376919941595595, + "grad_norm": 20683.994140625, + "learning_rate": 7.092535642250035e-05, + "loss": 0.4315, + "step": 87950 + }, + { + "epoch": 0.4540271693985688, + "grad_norm": 22910.26953125, + "learning_rate": 7.089217366745258e-05, + "loss": 0.4415, + "step": 88000 + }, + { + "epoch": 0.4542851393811816, + "grad_norm": 22321.40234375, + "learning_rate": 7.085897976056706e-05, + "loss": 0.4386, + "step": 88050 + }, + { + "epoch": 0.45454310936379444, + "grad_norm": 20730.521484375, + "learning_rate": 7.082577471956206e-05, + "loss": 0.4335, + "step": 88100 + }, + { + "epoch": 0.45480107934640723, + "grad_norm": 23302.033203125, + "learning_rate": 7.079255856216177e-05, + "loss": 0.4366, + "step": 88150 + }, + { + "epoch": 0.4550590493290201, + "grad_norm": 21125.5625, + "learning_rate": 7.075933130609636e-05, + "loss": 0.4388, + "step": 88200 + }, + { + "epoch": 0.4553170193116329, + "grad_norm": 24245.548828125, + "learning_rate": 7.072609296910187e-05, + "loss": 0.4369, + "step": 88250 + }, + { + "epoch": 0.4555749892942457, + "grad_norm": 19609.1484375, + "learning_rate": 7.06928435689203e-05, + "loss": 0.4287, + "step": 88300 + }, + { + "epoch": 0.4558329592768585, + "grad_norm": 21653.08984375, + "learning_rate": 7.065958312329953e-05, + "loss": 0.4357, + "step": 88350 + }, + { + "epoch": 0.45609092925947137, + "grad_norm": 23725.236328125, + "learning_rate": 7.062631164999331e-05, + "loss": 0.4382, + "step": 88400 + }, + { + "epoch": 0.4563488992420842, + "grad_norm": 21436.92578125, + "learning_rate": 7.059302916676137e-05, + "loss": 0.4373, + "step": 88450 + }, + { + "epoch": 0.456606869224697, + "grad_norm": 20179.189453125, + "learning_rate": 7.05597356913692e-05, + "loss": 0.4304, + "step": 88500 + }, + { + "epoch": 0.45686483920730986, + "grad_norm": 22804.22265625, + "learning_rate": 7.052643124158824e-05, + "loss": 0.4343, + "step": 88550 + }, + { + "epoch": 0.45712280918992265, + "grad_norm": 21530.931640625, + "learning_rate": 7.049311583519574e-05, + "loss": 0.4364, + "step": 88600 + }, + { + "epoch": 0.4573807791725355, + "grad_norm": 21411.646484375, + "learning_rate": 7.045978948997486e-05, + "loss": 0.436, + "step": 88650 + }, + { + "epoch": 0.4576387491551483, + "grad_norm": 20853.962890625, + "learning_rate": 7.042645222371451e-05, + "loss": 0.436, + "step": 88700 + }, + { + "epoch": 0.45789671913776114, + "grad_norm": 20940.28125, + "learning_rate": 7.039310405420952e-05, + "loss": 0.4349, + "step": 88750 + }, + { + "epoch": 0.45815468912037394, + "grad_norm": 22368.05078125, + "learning_rate": 7.035974499926045e-05, + "loss": 0.4355, + "step": 88800 + }, + { + "epoch": 0.4584126591029868, + "grad_norm": 21155.3984375, + "learning_rate": 7.032637507667377e-05, + "loss": 0.4292, + "step": 88850 + }, + { + "epoch": 0.4586706290855996, + "grad_norm": 21627.353515625, + "learning_rate": 7.029299430426164e-05, + "loss": 0.4404, + "step": 88900 + }, + { + "epoch": 0.45892859906821243, + "grad_norm": 22008.23046875, + "learning_rate": 7.025960269984212e-05, + "loss": 0.431, + "step": 88950 + }, + { + "epoch": 0.4591865690508252, + "grad_norm": 21588.109375, + "learning_rate": 7.022620028123898e-05, + "loss": 0.4319, + "step": 89000 + }, + { + "epoch": 0.45944453903343807, + "grad_norm": 21680.646484375, + "learning_rate": 7.019278706628179e-05, + "loss": 0.4403, + "step": 89050 + }, + { + "epoch": 0.4597025090160509, + "grad_norm": 25427.423828125, + "learning_rate": 7.015936307280587e-05, + "loss": 0.435, + "step": 89100 + }, + { + "epoch": 0.4599604789986637, + "grad_norm": 22674.693359375, + "learning_rate": 7.01259283186523e-05, + "loss": 0.4377, + "step": 89150 + }, + { + "epoch": 0.46021844898127656, + "grad_norm": 24841.029296875, + "learning_rate": 7.009248282166793e-05, + "loss": 0.4387, + "step": 89200 + }, + { + "epoch": 0.46047641896388936, + "grad_norm": 21259.369140625, + "learning_rate": 7.005902659970528e-05, + "loss": 0.4355, + "step": 89250 + }, + { + "epoch": 0.4607343889465022, + "grad_norm": 19364.466796875, + "learning_rate": 7.002555967062265e-05, + "loss": 0.4353, + "step": 89300 + }, + { + "epoch": 0.460992358929115, + "grad_norm": 25116.47265625, + "learning_rate": 6.999208205228405e-05, + "loss": 0.4328, + "step": 89350 + }, + { + "epoch": 0.46125032891172785, + "grad_norm": 24426.4296875, + "learning_rate": 6.995859376255918e-05, + "loss": 0.4331, + "step": 89400 + }, + { + "epoch": 0.46150829889434064, + "grad_norm": 20802.759765625, + "learning_rate": 6.99250948193234e-05, + "loss": 0.4294, + "step": 89450 + }, + { + "epoch": 0.4617662688769535, + "grad_norm": 23164.2109375, + "learning_rate": 6.989158524045787e-05, + "loss": 0.4338, + "step": 89500 + }, + { + "epoch": 0.4620242388595663, + "grad_norm": 20543.28515625, + "learning_rate": 6.98580650438493e-05, + "loss": 0.4243, + "step": 89550 + }, + { + "epoch": 0.46228220884217913, + "grad_norm": 22468.732421875, + "learning_rate": 6.982453424739016e-05, + "loss": 0.4306, + "step": 89600 + }, + { + "epoch": 0.4625401788247919, + "grad_norm": 22903.12890625, + "learning_rate": 6.979099286897849e-05, + "loss": 0.4316, + "step": 89650 + }, + { + "epoch": 0.4627981488074048, + "grad_norm": 23074.068359375, + "learning_rate": 6.975744092651808e-05, + "loss": 0.4371, + "step": 89700 + }, + { + "epoch": 0.46305611879001757, + "grad_norm": 22003.00390625, + "learning_rate": 6.972387843791827e-05, + "loss": 0.4329, + "step": 89750 + }, + { + "epoch": 0.4633140887726304, + "grad_norm": 21524.93359375, + "learning_rate": 6.969030542109407e-05, + "loss": 0.4348, + "step": 89800 + }, + { + "epoch": 0.46357205875524327, + "grad_norm": 20501.130859375, + "learning_rate": 6.965672189396614e-05, + "loss": 0.4286, + "step": 89850 + }, + { + "epoch": 0.46383002873785606, + "grad_norm": 21559.396484375, + "learning_rate": 6.962312787446068e-05, + "loss": 0.434, + "step": 89900 + }, + { + "epoch": 0.4640879987204689, + "grad_norm": 21185.537109375, + "learning_rate": 6.958952338050955e-05, + "loss": 0.4326, + "step": 89950 + }, + { + "epoch": 0.4643459687030817, + "grad_norm": 23004.626953125, + "learning_rate": 6.955590843005016e-05, + "loss": 0.4272, + "step": 90000 + }, + { + "epoch": 0.4643459687030817, + "eval_loss": 0.4223860800266266, + "eval_runtime": 3251.8949, + "eval_samples_per_second": 953.635, + "eval_steps_per_second": 1.863, + "step": 90000 + }, + { + "epoch": 0.46460393868569455, + "grad_norm": 20333.259765625, + "learning_rate": 6.952228304102553e-05, + "loss": 0.4338, + "step": 90050 + }, + { + "epoch": 0.46486190866830734, + "grad_norm": 25967.029296875, + "learning_rate": 6.948864723138423e-05, + "loss": 0.4352, + "step": 90100 + }, + { + "epoch": 0.4651198786509202, + "grad_norm": 22849.9375, + "learning_rate": 6.945500101908043e-05, + "loss": 0.4358, + "step": 90150 + }, + { + "epoch": 0.465377848633533, + "grad_norm": 20628.9453125, + "learning_rate": 6.94213444220738e-05, + "loss": 0.4343, + "step": 90200 + }, + { + "epoch": 0.46563581861614584, + "grad_norm": 22179.84375, + "learning_rate": 6.938767745832959e-05, + "loss": 0.4314, + "step": 90250 + }, + { + "epoch": 0.46589378859875863, + "grad_norm": 24433.46484375, + "learning_rate": 6.935400014581858e-05, + "loss": 0.436, + "step": 90300 + }, + { + "epoch": 0.4661517585813715, + "grad_norm": 21914.666015625, + "learning_rate": 6.932031250251705e-05, + "loss": 0.431, + "step": 90350 + }, + { + "epoch": 0.46640972856398427, + "grad_norm": 19517.78125, + "learning_rate": 6.928661454640683e-05, + "loss": 0.4282, + "step": 90400 + }, + { + "epoch": 0.4666676985465971, + "grad_norm": 25924.5234375, + "learning_rate": 6.925290629547522e-05, + "loss": 0.4344, + "step": 90450 + }, + { + "epoch": 0.4669256685292099, + "grad_norm": 20866.927734375, + "learning_rate": 6.921918776771505e-05, + "loss": 0.4336, + "step": 90500 + }, + { + "epoch": 0.46718363851182276, + "grad_norm": 22734.5625, + "learning_rate": 6.91854589811246e-05, + "loss": 0.4375, + "step": 90550 + }, + { + "epoch": 0.4674416084944356, + "grad_norm": 21173.5703125, + "learning_rate": 6.915171995370766e-05, + "loss": 0.428, + "step": 90600 + }, + { + "epoch": 0.4676995784770484, + "grad_norm": 23864.681640625, + "learning_rate": 6.911797070347346e-05, + "loss": 0.4344, + "step": 90650 + }, + { + "epoch": 0.46795754845966125, + "grad_norm": 26236.091796875, + "learning_rate": 6.908421124843669e-05, + "loss": 0.4345, + "step": 90700 + }, + { + "epoch": 0.46821551844227405, + "grad_norm": 20788.6015625, + "learning_rate": 6.905044160661748e-05, + "loss": 0.4332, + "step": 90750 + }, + { + "epoch": 0.4684734884248869, + "grad_norm": 21382.2578125, + "learning_rate": 6.901666179604148e-05, + "loss": 0.4356, + "step": 90800 + }, + { + "epoch": 0.4687314584074997, + "grad_norm": 20230.220703125, + "learning_rate": 6.898287183473961e-05, + "loss": 0.4262, + "step": 90850 + }, + { + "epoch": 0.46898942839011254, + "grad_norm": 31838.697265625, + "learning_rate": 6.894907174074836e-05, + "loss": 0.4316, + "step": 90900 + }, + { + "epoch": 0.46924739837272533, + "grad_norm": 21029.5234375, + "learning_rate": 6.891526153210953e-05, + "loss": 0.4346, + "step": 90950 + }, + { + "epoch": 0.4695053683553382, + "grad_norm": 23617.826171875, + "learning_rate": 6.888144122687035e-05, + "loss": 0.4262, + "step": 91000 + }, + { + "epoch": 0.469763338337951, + "grad_norm": 23151.751953125, + "learning_rate": 6.884761084308349e-05, + "loss": 0.4296, + "step": 91050 + }, + { + "epoch": 0.4700213083205638, + "grad_norm": 19649.466796875, + "learning_rate": 6.881377039880692e-05, + "loss": 0.4325, + "step": 91100 + }, + { + "epoch": 0.4702792783031766, + "grad_norm": 20488.10546875, + "learning_rate": 6.8779919912104e-05, + "loss": 0.4352, + "step": 91150 + }, + { + "epoch": 0.47053724828578947, + "grad_norm": 21639.306640625, + "learning_rate": 6.874605940104349e-05, + "loss": 0.4319, + "step": 91200 + }, + { + "epoch": 0.4707952182684023, + "grad_norm": 21799.994140625, + "learning_rate": 6.871218888369947e-05, + "loss": 0.4315, + "step": 91250 + }, + { + "epoch": 0.4710531882510151, + "grad_norm": 22425.94140625, + "learning_rate": 6.867830837815137e-05, + "loss": 0.4381, + "step": 91300 + }, + { + "epoch": 0.47131115823362796, + "grad_norm": 22582.57421875, + "learning_rate": 6.864441790248396e-05, + "loss": 0.4297, + "step": 91350 + }, + { + "epoch": 0.47156912821624075, + "grad_norm": 21082.38671875, + "learning_rate": 6.861051747478726e-05, + "loss": 0.4292, + "step": 91400 + }, + { + "epoch": 0.4718270981988536, + "grad_norm": 23156.5546875, + "learning_rate": 6.857660711315672e-05, + "loss": 0.4276, + "step": 91450 + }, + { + "epoch": 0.4720850681814664, + "grad_norm": 21754.6796875, + "learning_rate": 6.854268683569302e-05, + "loss": 0.4369, + "step": 91500 + }, + { + "epoch": 0.47234303816407924, + "grad_norm": 22397.896484375, + "learning_rate": 6.850875666050216e-05, + "loss": 0.4312, + "step": 91550 + }, + { + "epoch": 0.47260100814669204, + "grad_norm": 21344.166015625, + "learning_rate": 6.847481660569537e-05, + "loss": 0.4291, + "step": 91600 + }, + { + "epoch": 0.4728589781293049, + "grad_norm": 23818.71484375, + "learning_rate": 6.844086668938923e-05, + "loss": 0.4352, + "step": 91650 + }, + { + "epoch": 0.4731169481119177, + "grad_norm": 21734.537109375, + "learning_rate": 6.840690692970554e-05, + "loss": 0.4326, + "step": 91700 + }, + { + "epoch": 0.47337491809453053, + "grad_norm": 22027.734375, + "learning_rate": 6.837293734477136e-05, + "loss": 0.4369, + "step": 91750 + }, + { + "epoch": 0.4736328880771433, + "grad_norm": 23111.103515625, + "learning_rate": 6.8338957952719e-05, + "loss": 0.4396, + "step": 91800 + }, + { + "epoch": 0.47389085805975617, + "grad_norm": 22521.767578125, + "learning_rate": 6.830496877168599e-05, + "loss": 0.4376, + "step": 91850 + }, + { + "epoch": 0.47414882804236896, + "grad_norm": 19730.158203125, + "learning_rate": 6.827096981981511e-05, + "loss": 0.4321, + "step": 91900 + }, + { + "epoch": 0.4744067980249818, + "grad_norm": 21871.134765625, + "learning_rate": 6.823696111525433e-05, + "loss": 0.4373, + "step": 91950 + }, + { + "epoch": 0.47466476800759466, + "grad_norm": 22332.384765625, + "learning_rate": 6.820294267615686e-05, + "loss": 0.4323, + "step": 92000 + }, + { + "epoch": 0.47492273799020746, + "grad_norm": 22426.59765625, + "learning_rate": 6.816891452068104e-05, + "loss": 0.4272, + "step": 92050 + }, + { + "epoch": 0.4751807079728203, + "grad_norm": 23286.05859375, + "learning_rate": 6.81348766669905e-05, + "loss": 0.4442, + "step": 92100 + }, + { + "epoch": 0.4754386779554331, + "grad_norm": 21696.1171875, + "learning_rate": 6.810082913325395e-05, + "loss": 0.4288, + "step": 92150 + }, + { + "epoch": 0.47569664793804595, + "grad_norm": 20548.908203125, + "learning_rate": 6.80667719376453e-05, + "loss": 0.4358, + "step": 92200 + }, + { + "epoch": 0.47595461792065874, + "grad_norm": 22605.1640625, + "learning_rate": 6.803270509834363e-05, + "loss": 0.4327, + "step": 92250 + }, + { + "epoch": 0.4762125879032716, + "grad_norm": 23604.30078125, + "learning_rate": 6.799862863353318e-05, + "loss": 0.441, + "step": 92300 + }, + { + "epoch": 0.4764705578858844, + "grad_norm": 22117.1796875, + "learning_rate": 6.796454256140328e-05, + "loss": 0.4289, + "step": 92350 + }, + { + "epoch": 0.47672852786849723, + "grad_norm": 22476.54296875, + "learning_rate": 6.793044690014842e-05, + "loss": 0.4319, + "step": 92400 + }, + { + "epoch": 0.47698649785111, + "grad_norm": 20855.140625, + "learning_rate": 6.789634166796821e-05, + "loss": 0.4326, + "step": 92450 + }, + { + "epoch": 0.4772444678337229, + "grad_norm": 23704.125, + "learning_rate": 6.786222688306734e-05, + "loss": 0.4374, + "step": 92500 + }, + { + "epoch": 0.47750243781633567, + "grad_norm": 20677.91015625, + "learning_rate": 6.782810256365568e-05, + "loss": 0.4261, + "step": 92550 + }, + { + "epoch": 0.4777604077989485, + "grad_norm": 21245.837890625, + "learning_rate": 6.779396872794807e-05, + "loss": 0.4309, + "step": 92600 + }, + { + "epoch": 0.4780183777815613, + "grad_norm": 25415.859375, + "learning_rate": 6.775982539416453e-05, + "loss": 0.437, + "step": 92650 + }, + { + "epoch": 0.47827634776417416, + "grad_norm": 20582.556640625, + "learning_rate": 6.772567258053007e-05, + "loss": 0.4349, + "step": 92700 + }, + { + "epoch": 0.478534317746787, + "grad_norm": 20002.013671875, + "learning_rate": 6.769151030527483e-05, + "loss": 0.4263, + "step": 92750 + }, + { + "epoch": 0.4787922877293998, + "grad_norm": 23287.6875, + "learning_rate": 6.765733858663397e-05, + "loss": 0.4332, + "step": 92800 + }, + { + "epoch": 0.47905025771201265, + "grad_norm": 22023.66796875, + "learning_rate": 6.76231574428477e-05, + "loss": 0.4339, + "step": 92850 + }, + { + "epoch": 0.47930822769462544, + "grad_norm": 21299.185546875, + "learning_rate": 6.758896689216122e-05, + "loss": 0.4293, + "step": 92900 + }, + { + "epoch": 0.4795661976772383, + "grad_norm": 21979.560546875, + "learning_rate": 6.755476695282479e-05, + "loss": 0.4314, + "step": 92950 + }, + { + "epoch": 0.4798241676598511, + "grad_norm": 21399.029296875, + "learning_rate": 6.752055764309372e-05, + "loss": 0.4374, + "step": 93000 + }, + { + "epoch": 0.48008213764246394, + "grad_norm": 23827.685546875, + "learning_rate": 6.748633898122823e-05, + "loss": 0.4348, + "step": 93050 + }, + { + "epoch": 0.48034010762507673, + "grad_norm": 21079.61328125, + "learning_rate": 6.74521109854936e-05, + "loss": 0.4312, + "step": 93100 + }, + { + "epoch": 0.4805980776076896, + "grad_norm": 20395.04296875, + "learning_rate": 6.741787367416006e-05, + "loss": 0.4246, + "step": 93150 + }, + { + "epoch": 0.48085604759030237, + "grad_norm": 21922.576171875, + "learning_rate": 6.738362706550284e-05, + "loss": 0.4355, + "step": 93200 + }, + { + "epoch": 0.4811140175729152, + "grad_norm": 21317.001953125, + "learning_rate": 6.734937117780211e-05, + "loss": 0.4302, + "step": 93250 + }, + { + "epoch": 0.481371987555528, + "grad_norm": 21387.46484375, + "learning_rate": 6.731510602934298e-05, + "loss": 0.434, + "step": 93300 + }, + { + "epoch": 0.48162995753814086, + "grad_norm": 24289.28515625, + "learning_rate": 6.728083163841554e-05, + "loss": 0.4338, + "step": 93350 + }, + { + "epoch": 0.4818879275207537, + "grad_norm": 23514.162109375, + "learning_rate": 6.72465480233148e-05, + "loss": 0.4357, + "step": 93400 + }, + { + "epoch": 0.4821458975033665, + "grad_norm": 21481.0859375, + "learning_rate": 6.721225520234068e-05, + "loss": 0.4307, + "step": 93450 + }, + { + "epoch": 0.48240386748597935, + "grad_norm": 25044.396484375, + "learning_rate": 6.717795319379805e-05, + "loss": 0.4335, + "step": 93500 + }, + { + "epoch": 0.48266183746859215, + "grad_norm": 21193.333984375, + "learning_rate": 6.714364201599662e-05, + "loss": 0.4243, + "step": 93550 + }, + { + "epoch": 0.482919807451205, + "grad_norm": 19113.275390625, + "learning_rate": 6.710932168725105e-05, + "loss": 0.4331, + "step": 93600 + }, + { + "epoch": 0.4831777774338178, + "grad_norm": 21924.162109375, + "learning_rate": 6.707499222588087e-05, + "loss": 0.4309, + "step": 93650 + }, + { + "epoch": 0.48343574741643064, + "grad_norm": 21123.498046875, + "learning_rate": 6.704065365021048e-05, + "loss": 0.4392, + "step": 93700 + }, + { + "epoch": 0.48369371739904343, + "grad_norm": 22201.29296875, + "learning_rate": 6.700630597856914e-05, + "loss": 0.4281, + "step": 93750 + }, + { + "epoch": 0.4839516873816563, + "grad_norm": 24237.494140625, + "learning_rate": 6.697194922929096e-05, + "loss": 0.4367, + "step": 93800 + }, + { + "epoch": 0.4842096573642691, + "grad_norm": 21306.8125, + "learning_rate": 6.693758342071495e-05, + "loss": 0.4374, + "step": 93850 + }, + { + "epoch": 0.4844676273468819, + "grad_norm": 22120.75, + "learning_rate": 6.690320857118488e-05, + "loss": 0.4309, + "step": 93900 + }, + { + "epoch": 0.4847255973294947, + "grad_norm": 20799.59765625, + "learning_rate": 6.686882469904939e-05, + "loss": 0.4262, + "step": 93950 + }, + { + "epoch": 0.48498356731210757, + "grad_norm": 22964.642578125, + "learning_rate": 6.683443182266192e-05, + "loss": 0.4338, + "step": 94000 + }, + { + "epoch": 0.48524153729472036, + "grad_norm": 22017.076171875, + "learning_rate": 6.68000299603807e-05, + "loss": 0.4317, + "step": 94050 + }, + { + "epoch": 0.4854995072773332, + "grad_norm": 21423.890625, + "learning_rate": 6.676561913056884e-05, + "loss": 0.4329, + "step": 94100 + }, + { + "epoch": 0.48575747725994606, + "grad_norm": 22123.390625, + "learning_rate": 6.67311993515941e-05, + "loss": 0.4309, + "step": 94150 + }, + { + "epoch": 0.48601544724255885, + "grad_norm": 23107.208984375, + "learning_rate": 6.669677064182915e-05, + "loss": 0.4316, + "step": 94200 + }, + { + "epoch": 0.4862734172251717, + "grad_norm": 21250.33203125, + "learning_rate": 6.666233301965132e-05, + "loss": 0.4289, + "step": 94250 + }, + { + "epoch": 0.4865313872077845, + "grad_norm": 21629.720703125, + "learning_rate": 6.66278865034428e-05, + "loss": 0.4301, + "step": 94300 + }, + { + "epoch": 0.48678935719039734, + "grad_norm": 23665.4609375, + "learning_rate": 6.659343111159043e-05, + "loss": 0.4267, + "step": 94350 + }, + { + "epoch": 0.48704732717301014, + "grad_norm": 23254.232421875, + "learning_rate": 6.655896686248583e-05, + "loss": 0.4266, + "step": 94400 + }, + { + "epoch": 0.487305297155623, + "grad_norm": 22491.404296875, + "learning_rate": 6.652449377452539e-05, + "loss": 0.4278, + "step": 94450 + }, + { + "epoch": 0.4875632671382358, + "grad_norm": 21071.74609375, + "learning_rate": 6.649001186611015e-05, + "loss": 0.4308, + "step": 94500 + }, + { + "epoch": 0.4878212371208486, + "grad_norm": 20860.861328125, + "learning_rate": 6.64555211556459e-05, + "loss": 0.4308, + "step": 94550 + }, + { + "epoch": 0.4880792071034614, + "grad_norm": 21733.033203125, + "learning_rate": 6.642102166154308e-05, + "loss": 0.4376, + "step": 94600 + }, + { + "epoch": 0.48833717708607427, + "grad_norm": 22799.3984375, + "learning_rate": 6.638651340221687e-05, + "loss": 0.4289, + "step": 94650 + }, + { + "epoch": 0.48859514706868706, + "grad_norm": 21678.296875, + "learning_rate": 6.635199639608709e-05, + "loss": 0.4301, + "step": 94700 + }, + { + "epoch": 0.4888531170512999, + "grad_norm": 20510.052734375, + "learning_rate": 6.631747066157831e-05, + "loss": 0.4276, + "step": 94750 + }, + { + "epoch": 0.48911108703391276, + "grad_norm": 21075.474609375, + "learning_rate": 6.628293621711964e-05, + "loss": 0.435, + "step": 94800 + }, + { + "epoch": 0.48936905701652555, + "grad_norm": 22063.083984375, + "learning_rate": 6.624839308114492e-05, + "loss": 0.434, + "step": 94850 + }, + { + "epoch": 0.4896270269991384, + "grad_norm": 20185.99609375, + "learning_rate": 6.621384127209261e-05, + "loss": 0.4246, + "step": 94900 + }, + { + "epoch": 0.4898849969817512, + "grad_norm": 22002.326171875, + "learning_rate": 6.61792808084058e-05, + "loss": 0.4272, + "step": 94950 + }, + { + "epoch": 0.49014296696436405, + "grad_norm": 22271.25, + "learning_rate": 6.614471170853218e-05, + "loss": 0.4323, + "step": 95000 + }, + { + "epoch": 0.49014296696436405, + "eval_loss": 0.4187907576560974, + "eval_runtime": 3274.3922, + "eval_samples_per_second": 947.083, + "eval_steps_per_second": 1.85, + "step": 95000 + }, + { + "epoch": 0.49040093694697684, + "grad_norm": 20668.224609375, + "learning_rate": 6.611013399092406e-05, + "loss": 0.4285, + "step": 95050 + }, + { + "epoch": 0.4906589069295897, + "grad_norm": 20890.05078125, + "learning_rate": 6.607554767403838e-05, + "loss": 0.4333, + "step": 95100 + }, + { + "epoch": 0.4909168769122025, + "grad_norm": 22767.6875, + "learning_rate": 6.604095277633664e-05, + "loss": 0.4284, + "step": 95150 + }, + { + "epoch": 0.49117484689481533, + "grad_norm": 22603.083984375, + "learning_rate": 6.600634931628493e-05, + "loss": 0.4332, + "step": 95200 + }, + { + "epoch": 0.4914328168774281, + "grad_norm": 25005.8984375, + "learning_rate": 6.597173731235388e-05, + "loss": 0.4284, + "step": 95250 + }, + { + "epoch": 0.491690786860041, + "grad_norm": 23687.4765625, + "learning_rate": 6.593711678301874e-05, + "loss": 0.4316, + "step": 95300 + }, + { + "epoch": 0.49194875684265377, + "grad_norm": 19670.087890625, + "learning_rate": 6.590248774675926e-05, + "loss": 0.4326, + "step": 95350 + }, + { + "epoch": 0.4922067268252666, + "grad_norm": 23065.818359375, + "learning_rate": 6.586785022205977e-05, + "loss": 0.4316, + "step": 95400 + }, + { + "epoch": 0.4924646968078794, + "grad_norm": 21279.01953125, + "learning_rate": 6.583320422740909e-05, + "loss": 0.4278, + "step": 95450 + }, + { + "epoch": 0.49272266679049226, + "grad_norm": 19707.6328125, + "learning_rate": 6.579854978130057e-05, + "loss": 0.4272, + "step": 95500 + }, + { + "epoch": 0.4929806367731051, + "grad_norm": 22938.3515625, + "learning_rate": 6.57638869022321e-05, + "loss": 0.4316, + "step": 95550 + }, + { + "epoch": 0.4932386067557179, + "grad_norm": 24812.65625, + "learning_rate": 6.572921560870607e-05, + "loss": 0.4315, + "step": 95600 + }, + { + "epoch": 0.49349657673833075, + "grad_norm": 21462.873046875, + "learning_rate": 6.569453591922931e-05, + "loss": 0.4299, + "step": 95650 + }, + { + "epoch": 0.49375454672094354, + "grad_norm": 22590.384765625, + "learning_rate": 6.565984785231318e-05, + "loss": 0.4294, + "step": 95700 + }, + { + "epoch": 0.4940125167035564, + "grad_norm": 23677.619140625, + "learning_rate": 6.56251514264735e-05, + "loss": 0.4379, + "step": 95750 + }, + { + "epoch": 0.4942704866861692, + "grad_norm": 22078.87109375, + "learning_rate": 6.559044666023057e-05, + "loss": 0.4276, + "step": 95800 + }, + { + "epoch": 0.49452845666878203, + "grad_norm": 22440.369140625, + "learning_rate": 6.55557335721091e-05, + "loss": 0.4279, + "step": 95850 + }, + { + "epoch": 0.49478642665139483, + "grad_norm": 24544.12109375, + "learning_rate": 6.552101218063826e-05, + "loss": 0.4305, + "step": 95900 + }, + { + "epoch": 0.4950443966340077, + "grad_norm": 21647.107421875, + "learning_rate": 6.548628250435167e-05, + "loss": 0.4328, + "step": 95950 + }, + { + "epoch": 0.49530236661662047, + "grad_norm": 21392.28125, + "learning_rate": 6.545154456178735e-05, + "loss": 0.4299, + "step": 96000 + }, + { + "epoch": 0.4955603365992333, + "grad_norm": 19458.55078125, + "learning_rate": 6.541679837148775e-05, + "loss": 0.4375, + "step": 96050 + }, + { + "epoch": 0.4958183065818461, + "grad_norm": 21774.14453125, + "learning_rate": 6.53820439519997e-05, + "loss": 0.4348, + "step": 96100 + }, + { + "epoch": 0.49607627656445896, + "grad_norm": 22902.63671875, + "learning_rate": 6.534728132187444e-05, + "loss": 0.4297, + "step": 96150 + }, + { + "epoch": 0.49633424654707176, + "grad_norm": 20869.306640625, + "learning_rate": 6.531251049966762e-05, + "loss": 0.4313, + "step": 96200 + }, + { + "epoch": 0.4965922165296846, + "grad_norm": 23554.537109375, + "learning_rate": 6.527773150393919e-05, + "loss": 0.4313, + "step": 96250 + }, + { + "epoch": 0.49685018651229745, + "grad_norm": 23000.92578125, + "learning_rate": 6.524294435325351e-05, + "loss": 0.4266, + "step": 96300 + }, + { + "epoch": 0.49710815649491025, + "grad_norm": 21331.72265625, + "learning_rate": 6.52081490661793e-05, + "loss": 0.4261, + "step": 96350 + }, + { + "epoch": 0.4973661264775231, + "grad_norm": 22540.75, + "learning_rate": 6.517334566128961e-05, + "loss": 0.4282, + "step": 96400 + }, + { + "epoch": 0.4976240964601359, + "grad_norm": 21733.560546875, + "learning_rate": 6.51385341571618e-05, + "loss": 0.43, + "step": 96450 + }, + { + "epoch": 0.49788206644274874, + "grad_norm": 23288.21875, + "learning_rate": 6.510371457237765e-05, + "loss": 0.4306, + "step": 96500 + }, + { + "epoch": 0.49814003642536153, + "grad_norm": 24475.9453125, + "learning_rate": 6.506888692552309e-05, + "loss": 0.4299, + "step": 96550 + }, + { + "epoch": 0.4983980064079744, + "grad_norm": 20756.5078125, + "learning_rate": 6.503405123518847e-05, + "loss": 0.4292, + "step": 96600 + }, + { + "epoch": 0.4986559763905872, + "grad_norm": 21059.365234375, + "learning_rate": 6.499920751996845e-05, + "loss": 0.4261, + "step": 96650 + }, + { + "epoch": 0.4989139463732, + "grad_norm": 22173.65625, + "learning_rate": 6.496435579846188e-05, + "loss": 0.4309, + "step": 96700 + }, + { + "epoch": 0.4991719163558128, + "grad_norm": 23941.49609375, + "learning_rate": 6.492949608927196e-05, + "loss": 0.4355, + "step": 96750 + }, + { + "epoch": 0.49942988633842567, + "grad_norm": 22027.400390625, + "learning_rate": 6.489462841100611e-05, + "loss": 0.433, + "step": 96800 + }, + { + "epoch": 0.49968785632103846, + "grad_norm": 21414.77734375, + "learning_rate": 6.485975278227605e-05, + "loss": 0.4291, + "step": 96850 + }, + { + "epoch": 0.4999458263036513, + "grad_norm": 23023.60546875, + "learning_rate": 6.482486922169767e-05, + "loss": 0.4309, + "step": 96900 + }, + { + "epoch": 0.5002037962862641, + "grad_norm": 23856.318359375, + "learning_rate": 6.478997774789119e-05, + "loss": 0.4314, + "step": 96950 + }, + { + "epoch": 0.500461766268877, + "grad_norm": 21834.822265625, + "learning_rate": 6.475507837948096e-05, + "loss": 0.4319, + "step": 97000 + }, + { + "epoch": 0.5007197362514898, + "grad_norm": 22487.779296875, + "learning_rate": 6.472017113509561e-05, + "loss": 0.4281, + "step": 97050 + }, + { + "epoch": 0.5009777062341026, + "grad_norm": 23955.73046875, + "learning_rate": 6.468525603336796e-05, + "loss": 0.4324, + "step": 97100 + }, + { + "epoch": 0.5012356762167154, + "grad_norm": 23631.203125, + "learning_rate": 6.4650333092935e-05, + "loss": 0.4333, + "step": 97150 + }, + { + "epoch": 0.5014936461993282, + "grad_norm": 21347.26953125, + "learning_rate": 6.461540233243792e-05, + "loss": 0.421, + "step": 97200 + }, + { + "epoch": 0.5017516161819411, + "grad_norm": 23590.9140625, + "learning_rate": 6.458046377052209e-05, + "loss": 0.4347, + "step": 97250 + }, + { + "epoch": 0.5020095861645539, + "grad_norm": 23192.708984375, + "learning_rate": 6.454551742583703e-05, + "loss": 0.4363, + "step": 97300 + }, + { + "epoch": 0.5022675561471667, + "grad_norm": 23588.974609375, + "learning_rate": 6.451056331703643e-05, + "loss": 0.4268, + "step": 97350 + }, + { + "epoch": 0.5025255261297795, + "grad_norm": 19536.3046875, + "learning_rate": 6.44756014627781e-05, + "loss": 0.4268, + "step": 97400 + }, + { + "epoch": 0.5027834961123924, + "grad_norm": 20248.345703125, + "learning_rate": 6.444063188172401e-05, + "loss": 0.4286, + "step": 97450 + }, + { + "epoch": 0.5030414660950052, + "grad_norm": 21598.1171875, + "learning_rate": 6.440565459254027e-05, + "loss": 0.4302, + "step": 97500 + }, + { + "epoch": 0.503299436077618, + "grad_norm": 25492.541015625, + "learning_rate": 6.437066961389704e-05, + "loss": 0.4223, + "step": 97550 + }, + { + "epoch": 0.5035574060602308, + "grad_norm": 22227.8125, + "learning_rate": 6.433567696446865e-05, + "loss": 0.4194, + "step": 97600 + }, + { + "epoch": 0.5038153760428437, + "grad_norm": 23799.134765625, + "learning_rate": 6.430067666293348e-05, + "loss": 0.4239, + "step": 97650 + }, + { + "epoch": 0.5040733460254565, + "grad_norm": 25147.080078125, + "learning_rate": 6.426566872797403e-05, + "loss": 0.4369, + "step": 97700 + }, + { + "epoch": 0.5043313160080694, + "grad_norm": 22497.68359375, + "learning_rate": 6.423065317827686e-05, + "loss": 0.4332, + "step": 97750 + }, + { + "epoch": 0.5045892859906821, + "grad_norm": 23273.966796875, + "learning_rate": 6.419563003253258e-05, + "loss": 0.4331, + "step": 97800 + }, + { + "epoch": 0.5048472559732949, + "grad_norm": 21943.7734375, + "learning_rate": 6.416059930943585e-05, + "loss": 0.4331, + "step": 97850 + }, + { + "epoch": 0.5051052259559078, + "grad_norm": 23134.685546875, + "learning_rate": 6.412556102768544e-05, + "loss": 0.4283, + "step": 97900 + }, + { + "epoch": 0.5053631959385206, + "grad_norm": 21504.177734375, + "learning_rate": 6.409051520598405e-05, + "loss": 0.4319, + "step": 97950 + }, + { + "epoch": 0.5056211659211334, + "grad_norm": 25481.029296875, + "learning_rate": 6.405546186303852e-05, + "loss": 0.4268, + "step": 98000 + }, + { + "epoch": 0.5058791359037462, + "grad_norm": 21170.70703125, + "learning_rate": 6.402040101755961e-05, + "loss": 0.4253, + "step": 98050 + }, + { + "epoch": 0.5061371058863591, + "grad_norm": 20005.333984375, + "learning_rate": 6.398533268826212e-05, + "loss": 0.4267, + "step": 98100 + }, + { + "epoch": 0.5063950758689719, + "grad_norm": 20913.32421875, + "learning_rate": 6.395025689386485e-05, + "loss": 0.4245, + "step": 98150 + }, + { + "epoch": 0.5066530458515847, + "grad_norm": 24310.720703125, + "learning_rate": 6.391517365309059e-05, + "loss": 0.4246, + "step": 98200 + }, + { + "epoch": 0.5069110158341975, + "grad_norm": 21981.455078125, + "learning_rate": 6.388008298466607e-05, + "loss": 0.4286, + "step": 98250 + }, + { + "epoch": 0.5071689858168104, + "grad_norm": 23764.30078125, + "learning_rate": 6.384498490732202e-05, + "loss": 0.4282, + "step": 98300 + }, + { + "epoch": 0.5074269557994232, + "grad_norm": 20518.447265625, + "learning_rate": 6.380987943979314e-05, + "loss": 0.4333, + "step": 98350 + }, + { + "epoch": 0.5076849257820361, + "grad_norm": 23327.80859375, + "learning_rate": 6.377476660081803e-05, + "loss": 0.4255, + "step": 98400 + }, + { + "epoch": 0.5079428957646488, + "grad_norm": 19600.84375, + "learning_rate": 6.373964640913924e-05, + "loss": 0.4277, + "step": 98450 + }, + { + "epoch": 0.5082008657472616, + "grad_norm": 23252.146484375, + "learning_rate": 6.370451888350322e-05, + "loss": 0.4311, + "step": 98500 + }, + { + "epoch": 0.5084588357298745, + "grad_norm": 21930.736328125, + "learning_rate": 6.366938404266041e-05, + "loss": 0.4329, + "step": 98550 + }, + { + "epoch": 0.5087168057124873, + "grad_norm": 21249.69140625, + "learning_rate": 6.36342419053651e-05, + "loss": 0.4257, + "step": 98600 + }, + { + "epoch": 0.5089747756951001, + "grad_norm": 21809.4609375, + "learning_rate": 6.359909249037548e-05, + "loss": 0.431, + "step": 98650 + }, + { + "epoch": 0.5092327456777129, + "grad_norm": 23142.6796875, + "learning_rate": 6.356393581645359e-05, + "loss": 0.4329, + "step": 98700 + }, + { + "epoch": 0.5094907156603258, + "grad_norm": 21783.541015625, + "learning_rate": 6.352877190236542e-05, + "loss": 0.4362, + "step": 98750 + }, + { + "epoch": 0.5097486856429386, + "grad_norm": 22534.080078125, + "learning_rate": 6.349360076688079e-05, + "loss": 0.4302, + "step": 98800 + }, + { + "epoch": 0.5100066556255514, + "grad_norm": 22630.03515625, + "learning_rate": 6.345842242877336e-05, + "loss": 0.4314, + "step": 98850 + }, + { + "epoch": 0.5102646256081642, + "grad_norm": 23446.0390625, + "learning_rate": 6.342323690682064e-05, + "loss": 0.428, + "step": 98900 + }, + { + "epoch": 0.5105225955907771, + "grad_norm": 25644.2734375, + "learning_rate": 6.338804421980398e-05, + "loss": 0.4219, + "step": 98950 + }, + { + "epoch": 0.5107805655733899, + "grad_norm": 23159.580078125, + "learning_rate": 6.335284438650856e-05, + "loss": 0.434, + "step": 99000 + }, + { + "epoch": 0.5110385355560026, + "grad_norm": 23536.556640625, + "learning_rate": 6.331763742572337e-05, + "loss": 0.4293, + "step": 99050 + }, + { + "epoch": 0.5112965055386155, + "grad_norm": 23240.662109375, + "learning_rate": 6.328242335624121e-05, + "loss": 0.434, + "step": 99100 + }, + { + "epoch": 0.5115544755212283, + "grad_norm": 22368.94921875, + "learning_rate": 6.324720219685866e-05, + "loss": 0.4295, + "step": 99150 + }, + { + "epoch": 0.5118124455038412, + "grad_norm": 23257.068359375, + "learning_rate": 6.321197396637608e-05, + "loss": 0.4198, + "step": 99200 + }, + { + "epoch": 0.512070415486454, + "grad_norm": 21806.6953125, + "learning_rate": 6.317673868359765e-05, + "loss": 0.4241, + "step": 99250 + }, + { + "epoch": 0.5123283854690668, + "grad_norm": 24117.416015625, + "learning_rate": 6.314149636733125e-05, + "loss": 0.4261, + "step": 99300 + }, + { + "epoch": 0.5125863554516796, + "grad_norm": 25262.626953125, + "learning_rate": 6.310624703638858e-05, + "loss": 0.4234, + "step": 99350 + }, + { + "epoch": 0.5128443254342925, + "grad_norm": 22739.923828125, + "learning_rate": 6.3070990709585e-05, + "loss": 0.4299, + "step": 99400 + }, + { + "epoch": 0.5131022954169053, + "grad_norm": 20651.646484375, + "learning_rate": 6.303572740573971e-05, + "loss": 0.4307, + "step": 99450 + }, + { + "epoch": 0.5133602653995181, + "grad_norm": 22125.037109375, + "learning_rate": 6.300045714367555e-05, + "loss": 0.4216, + "step": 99500 + }, + { + "epoch": 0.5136182353821309, + "grad_norm": 22210.080078125, + "learning_rate": 6.29651799422191e-05, + "loss": 0.429, + "step": 99550 + }, + { + "epoch": 0.5138762053647438, + "grad_norm": 23850.673828125, + "learning_rate": 6.292989582020063e-05, + "loss": 0.4337, + "step": 99600 + }, + { + "epoch": 0.5141341753473566, + "grad_norm": 21346.251953125, + "learning_rate": 6.289460479645417e-05, + "loss": 0.4352, + "step": 99650 + }, + { + "epoch": 0.5143921453299694, + "grad_norm": 22687.080078125, + "learning_rate": 6.285930688981735e-05, + "loss": 0.433, + "step": 99700 + }, + { + "epoch": 0.5146501153125822, + "grad_norm": 20447.666015625, + "learning_rate": 6.282400211913154e-05, + "loss": 0.4288, + "step": 99750 + }, + { + "epoch": 0.514908085295195, + "grad_norm": 21768.51953125, + "learning_rate": 6.278869050324168e-05, + "loss": 0.4363, + "step": 99800 + }, + { + "epoch": 0.5151660552778079, + "grad_norm": 21896.47265625, + "learning_rate": 6.27533720609965e-05, + "loss": 0.4307, + "step": 99850 + }, + { + "epoch": 0.5154240252604207, + "grad_norm": 22967.384765625, + "learning_rate": 6.271804681124827e-05, + "loss": 0.4295, + "step": 99900 + }, + { + "epoch": 0.5156819952430335, + "grad_norm": 20233.869140625, + "learning_rate": 6.268271477285292e-05, + "loss": 0.4329, + "step": 99950 + }, + { + "epoch": 0.5159399652256463, + "grad_norm": 20550.060546875, + "learning_rate": 6.264737596466998e-05, + "loss": 0.4267, + "step": 100000 + }, + { + "epoch": 0.5159399652256463, + "eval_loss": 0.4161209166049957, + "eval_runtime": 2887.0736, + "eval_samples_per_second": 1074.14, + "eval_steps_per_second": 2.098, + "step": 100000 + }, + { + "epoch": 0.5161979352082592, + "grad_norm": 22327.767578125, + "learning_rate": 6.261203040556267e-05, + "loss": 0.4272, + "step": 100050 + }, + { + "epoch": 0.516455905190872, + "grad_norm": 22512.1640625, + "learning_rate": 6.257667811439776e-05, + "loss": 0.4267, + "step": 100100 + }, + { + "epoch": 0.5167138751734848, + "grad_norm": 22710.8828125, + "learning_rate": 6.254131911004561e-05, + "loss": 0.42, + "step": 100150 + }, + { + "epoch": 0.5169718451560976, + "grad_norm": 21731.365234375, + "learning_rate": 6.250595341138014e-05, + "loss": 0.4259, + "step": 100200 + }, + { + "epoch": 0.5172298151387105, + "grad_norm": 21478.970703125, + "learning_rate": 6.247058103727892e-05, + "loss": 0.4217, + "step": 100250 + }, + { + "epoch": 0.5174877851213233, + "grad_norm": 22431.939453125, + "learning_rate": 6.243520200662303e-05, + "loss": 0.4272, + "step": 100300 + }, + { + "epoch": 0.5177457551039361, + "grad_norm": 22137.5078125, + "learning_rate": 6.239981633829709e-05, + "loss": 0.4301, + "step": 100350 + }, + { + "epoch": 0.5180037250865489, + "grad_norm": 22802.220703125, + "learning_rate": 6.23644240511893e-05, + "loss": 0.4346, + "step": 100400 + }, + { + "epoch": 0.5182616950691618, + "grad_norm": 20567.640625, + "learning_rate": 6.232902516419137e-05, + "loss": 0.4271, + "step": 100450 + }, + { + "epoch": 0.5185196650517746, + "grad_norm": 20855.70703125, + "learning_rate": 6.229361969619855e-05, + "loss": 0.4237, + "step": 100500 + }, + { + "epoch": 0.5187776350343875, + "grad_norm": 22052.44921875, + "learning_rate": 6.225820766610958e-05, + "loss": 0.4324, + "step": 100550 + }, + { + "epoch": 0.5190356050170002, + "grad_norm": 21984.818359375, + "learning_rate": 6.222278909282674e-05, + "loss": 0.4315, + "step": 100600 + }, + { + "epoch": 0.519293574999613, + "grad_norm": 22044.8359375, + "learning_rate": 6.218736399525575e-05, + "loss": 0.4324, + "step": 100650 + }, + { + "epoch": 0.5195515449822259, + "grad_norm": 22661.78515625, + "learning_rate": 6.215193239230586e-05, + "loss": 0.4273, + "step": 100700 + }, + { + "epoch": 0.5198095149648387, + "grad_norm": 22091.01171875, + "learning_rate": 6.211649430288976e-05, + "loss": 0.4252, + "step": 100750 + }, + { + "epoch": 0.5200674849474515, + "grad_norm": 22164.376953125, + "learning_rate": 6.208104974592364e-05, + "loss": 0.4272, + "step": 100800 + }, + { + "epoch": 0.5203254549300643, + "grad_norm": 23387.287109375, + "learning_rate": 6.20455987403271e-05, + "loss": 0.4281, + "step": 100850 + }, + { + "epoch": 0.5205834249126772, + "grad_norm": 22505.326171875, + "learning_rate": 6.201014130502317e-05, + "loss": 0.4285, + "step": 100900 + }, + { + "epoch": 0.52084139489529, + "grad_norm": 21150.341796875, + "learning_rate": 6.19746774589384e-05, + "loss": 0.4274, + "step": 100950 + }, + { + "epoch": 0.5210993648779028, + "grad_norm": 23076.650390625, + "learning_rate": 6.193920722100268e-05, + "loss": 0.4289, + "step": 101000 + }, + { + "epoch": 0.5213573348605156, + "grad_norm": 20890.41796875, + "learning_rate": 6.190373061014932e-05, + "loss": 0.4305, + "step": 101050 + }, + { + "epoch": 0.5216153048431285, + "grad_norm": 22231.6328125, + "learning_rate": 6.186824764531507e-05, + "loss": 0.4304, + "step": 101100 + }, + { + "epoch": 0.5218732748257413, + "grad_norm": 22094.197265625, + "learning_rate": 6.183275834544005e-05, + "loss": 0.4279, + "step": 101150 + }, + { + "epoch": 0.522131244808354, + "grad_norm": 23188.353515625, + "learning_rate": 6.179726272946774e-05, + "loss": 0.4272, + "step": 101200 + }, + { + "epoch": 0.5223892147909669, + "grad_norm": 22908.5, + "learning_rate": 6.176176081634504e-05, + "loss": 0.4229, + "step": 101250 + }, + { + "epoch": 0.5226471847735797, + "grad_norm": 21536.37109375, + "learning_rate": 6.172625262502215e-05, + "loss": 0.4267, + "step": 101300 + }, + { + "epoch": 0.5229051547561926, + "grad_norm": 22923.38671875, + "learning_rate": 6.169073817445268e-05, + "loss": 0.4256, + "step": 101350 + }, + { + "epoch": 0.5231631247388054, + "grad_norm": 22802.669921875, + "learning_rate": 6.165521748359356e-05, + "loss": 0.4241, + "step": 101400 + }, + { + "epoch": 0.5234210947214182, + "grad_norm": 22852.59765625, + "learning_rate": 6.161969057140504e-05, + "loss": 0.4275, + "step": 101450 + }, + { + "epoch": 0.523679064704031, + "grad_norm": 27410.056640625, + "learning_rate": 6.158415745685068e-05, + "loss": 0.4316, + "step": 101500 + }, + { + "epoch": 0.5239370346866439, + "grad_norm": 21783.482421875, + "learning_rate": 6.15486181588974e-05, + "loss": 0.4235, + "step": 101550 + }, + { + "epoch": 0.5241950046692567, + "grad_norm": 21013.259765625, + "learning_rate": 6.151307269651536e-05, + "loss": 0.426, + "step": 101600 + }, + { + "epoch": 0.5244529746518695, + "grad_norm": 23852.673828125, + "learning_rate": 6.147752108867807e-05, + "loss": 0.4226, + "step": 101650 + }, + { + "epoch": 0.5247109446344823, + "grad_norm": 24846.427734375, + "learning_rate": 6.144196335436225e-05, + "loss": 0.4277, + "step": 101700 + }, + { + "epoch": 0.5249689146170952, + "grad_norm": 21197.177734375, + "learning_rate": 6.140639951254796e-05, + "loss": 0.4247, + "step": 101750 + }, + { + "epoch": 0.525226884599708, + "grad_norm": 24620.37890625, + "learning_rate": 6.137082958221848e-05, + "loss": 0.429, + "step": 101800 + }, + { + "epoch": 0.5254848545823207, + "grad_norm": 22811.875, + "learning_rate": 6.133525358236036e-05, + "loss": 0.4274, + "step": 101850 + }, + { + "epoch": 0.5257428245649336, + "grad_norm": 20224.125, + "learning_rate": 6.129967153196336e-05, + "loss": 0.4338, + "step": 101900 + }, + { + "epoch": 0.5260007945475464, + "grad_norm": 21489.734375, + "learning_rate": 6.126408345002052e-05, + "loss": 0.4333, + "step": 101950 + }, + { + "epoch": 0.5262587645301593, + "grad_norm": 21771.20703125, + "learning_rate": 6.122848935552804e-05, + "loss": 0.4258, + "step": 102000 + }, + { + "epoch": 0.5265167345127721, + "grad_norm": 23362.43359375, + "learning_rate": 6.119288926748537e-05, + "loss": 0.4234, + "step": 102050 + }, + { + "epoch": 0.5267747044953849, + "grad_norm": 20869.46484375, + "learning_rate": 6.115728320489516e-05, + "loss": 0.4233, + "step": 102100 + }, + { + "epoch": 0.5270326744779977, + "grad_norm": 21146.568359375, + "learning_rate": 6.11216711867632e-05, + "loss": 0.4243, + "step": 102150 + }, + { + "epoch": 0.5272906444606106, + "grad_norm": 24031.97265625, + "learning_rate": 6.108605323209853e-05, + "loss": 0.4334, + "step": 102200 + }, + { + "epoch": 0.5275486144432234, + "grad_norm": 23461.306640625, + "learning_rate": 6.10504293599133e-05, + "loss": 0.4289, + "step": 102250 + }, + { + "epoch": 0.5278065844258362, + "grad_norm": 21013.169921875, + "learning_rate": 6.101479958922287e-05, + "loss": 0.4334, + "step": 102300 + }, + { + "epoch": 0.528064554408449, + "grad_norm": 23328.306640625, + "learning_rate": 6.0979163939045716e-05, + "loss": 0.4285, + "step": 102350 + }, + { + "epoch": 0.5283225243910619, + "grad_norm": 21542.20703125, + "learning_rate": 6.094352242840343e-05, + "loss": 0.4321, + "step": 102400 + }, + { + "epoch": 0.5285804943736747, + "grad_norm": 20556.357421875, + "learning_rate": 6.09078750763208e-05, + "loss": 0.4255, + "step": 102450 + }, + { + "epoch": 0.5288384643562875, + "grad_norm": 24925.21875, + "learning_rate": 6.0872221901825666e-05, + "loss": 0.4225, + "step": 102500 + }, + { + "epoch": 0.5290964343389003, + "grad_norm": 22750.419921875, + "learning_rate": 6.0836562923949016e-05, + "loss": 0.4287, + "step": 102550 + }, + { + "epoch": 0.5293544043215132, + "grad_norm": 21514.8984375, + "learning_rate": 6.080089816172489e-05, + "loss": 0.4254, + "step": 102600 + }, + { + "epoch": 0.529612374304126, + "grad_norm": 23347.03125, + "learning_rate": 6.07652276341905e-05, + "loss": 0.4346, + "step": 102650 + }, + { + "epoch": 0.5298703442867388, + "grad_norm": 23180.916015625, + "learning_rate": 6.072955136038604e-05, + "loss": 0.4244, + "step": 102700 + }, + { + "epoch": 0.5301283142693516, + "grad_norm": 20701.431640625, + "learning_rate": 6.069386935935484e-05, + "loss": 0.43, + "step": 102750 + }, + { + "epoch": 0.5303862842519644, + "grad_norm": 23350.99609375, + "learning_rate": 6.0658181650143245e-05, + "loss": 0.4217, + "step": 102800 + }, + { + "epoch": 0.5306442542345773, + "grad_norm": 21068.111328125, + "learning_rate": 6.062248825180066e-05, + "loss": 0.4278, + "step": 102850 + }, + { + "epoch": 0.5309022242171901, + "grad_norm": 23415.25, + "learning_rate": 6.0586789183379554e-05, + "loss": 0.4331, + "step": 102900 + }, + { + "epoch": 0.5311601941998029, + "grad_norm": 22186.048828125, + "learning_rate": 6.055108446393538e-05, + "loss": 0.4327, + "step": 102950 + }, + { + "epoch": 0.5314181641824157, + "grad_norm": 20644.166015625, + "learning_rate": 6.051537411252662e-05, + "loss": 0.4264, + "step": 103000 + }, + { + "epoch": 0.5316761341650286, + "grad_norm": 21755.712890625, + "learning_rate": 6.047965814821478e-05, + "loss": 0.4253, + "step": 103050 + }, + { + "epoch": 0.5319341041476414, + "grad_norm": 22319.177734375, + "learning_rate": 6.044393659006435e-05, + "loss": 0.4238, + "step": 103100 + }, + { + "epoch": 0.5321920741302542, + "grad_norm": 22544.064453125, + "learning_rate": 6.040820945714281e-05, + "loss": 0.4306, + "step": 103150 + }, + { + "epoch": 0.532450044112867, + "grad_norm": 21484.53125, + "learning_rate": 6.037247676852059e-05, + "loss": 0.4254, + "step": 103200 + }, + { + "epoch": 0.5327080140954799, + "grad_norm": 23923.201171875, + "learning_rate": 6.033673854327114e-05, + "loss": 0.4258, + "step": 103250 + }, + { + "epoch": 0.5329659840780927, + "grad_norm": 20412.08984375, + "learning_rate": 6.03009948004708e-05, + "loss": 0.4286, + "step": 103300 + }, + { + "epoch": 0.5332239540607056, + "grad_norm": 19932.908203125, + "learning_rate": 6.026524555919891e-05, + "loss": 0.4367, + "step": 103350 + }, + { + "epoch": 0.5334819240433183, + "grad_norm": 21761.033203125, + "learning_rate": 6.022949083853772e-05, + "loss": 0.4272, + "step": 103400 + }, + { + "epoch": 0.5337398940259311, + "grad_norm": 23392.29296875, + "learning_rate": 6.019373065757239e-05, + "loss": 0.4274, + "step": 103450 + }, + { + "epoch": 0.533997864008544, + "grad_norm": 26151.69921875, + "learning_rate": 6.015796503539103e-05, + "loss": 0.4189, + "step": 103500 + }, + { + "epoch": 0.5342558339911568, + "grad_norm": 22503.529296875, + "learning_rate": 6.012219399108463e-05, + "loss": 0.428, + "step": 103550 + }, + { + "epoch": 0.5345138039737696, + "grad_norm": 25906.685546875, + "learning_rate": 6.008641754374709e-05, + "loss": 0.4287, + "step": 103600 + }, + { + "epoch": 0.5347717739563824, + "grad_norm": 23784.685546875, + "learning_rate": 6.005063571247517e-05, + "loss": 0.4276, + "step": 103650 + }, + { + "epoch": 0.5350297439389953, + "grad_norm": 21574.30078125, + "learning_rate": 6.0014848516368515e-05, + "loss": 0.4344, + "step": 103700 + }, + { + "epoch": 0.5352877139216081, + "grad_norm": 22296.921875, + "learning_rate": 5.9979055974529675e-05, + "loss": 0.4322, + "step": 103750 + }, + { + "epoch": 0.5355456839042209, + "grad_norm": 21478.611328125, + "learning_rate": 5.994325810606397e-05, + "loss": 0.429, + "step": 103800 + }, + { + "epoch": 0.5358036538868337, + "grad_norm": 22572.37109375, + "learning_rate": 5.9907454930079645e-05, + "loss": 0.4281, + "step": 103850 + }, + { + "epoch": 0.5360616238694466, + "grad_norm": 23416.80859375, + "learning_rate": 5.98716464656877e-05, + "loss": 0.4266, + "step": 103900 + }, + { + "epoch": 0.5363195938520594, + "grad_norm": 23470.626953125, + "learning_rate": 5.983583273200204e-05, + "loss": 0.426, + "step": 103950 + }, + { + "epoch": 0.5365775638346721, + "grad_norm": 24464.38671875, + "learning_rate": 5.980001374813933e-05, + "loss": 0.4218, + "step": 104000 + }, + { + "epoch": 0.536835533817285, + "grad_norm": 23835.29296875, + "learning_rate": 5.976418953321904e-05, + "loss": 0.4261, + "step": 104050 + }, + { + "epoch": 0.5370935037998978, + "grad_norm": 23344.654296875, + "learning_rate": 5.972836010636346e-05, + "loss": 0.4292, + "step": 104100 + }, + { + "epoch": 0.5373514737825107, + "grad_norm": 23925.935546875, + "learning_rate": 5.9692525486697616e-05, + "loss": 0.4323, + "step": 104150 + }, + { + "epoch": 0.5376094437651235, + "grad_norm": 23155.76953125, + "learning_rate": 5.965668569334937e-05, + "loss": 0.428, + "step": 104200 + }, + { + "epoch": 0.5378674137477363, + "grad_norm": 22334.19921875, + "learning_rate": 5.962084074544928e-05, + "loss": 0.4129, + "step": 104250 + }, + { + "epoch": 0.5381253837303491, + "grad_norm": 20239.66796875, + "learning_rate": 5.95849906621307e-05, + "loss": 0.4335, + "step": 104300 + }, + { + "epoch": 0.538383353712962, + "grad_norm": 22626.19140625, + "learning_rate": 5.9549135462529704e-05, + "loss": 0.4274, + "step": 104350 + }, + { + "epoch": 0.5386413236955748, + "grad_norm": 21798.65625, + "learning_rate": 5.951327516578512e-05, + "loss": 0.4258, + "step": 104400 + }, + { + "epoch": 0.5388992936781876, + "grad_norm": 21796.7421875, + "learning_rate": 5.947740979103845e-05, + "loss": 0.4263, + "step": 104450 + }, + { + "epoch": 0.5391572636608004, + "grad_norm": 22380.21484375, + "learning_rate": 5.944153935743396e-05, + "loss": 0.4218, + "step": 104500 + }, + { + "epoch": 0.5394152336434133, + "grad_norm": 22526.4296875, + "learning_rate": 5.940566388411859e-05, + "loss": 0.4233, + "step": 104550 + }, + { + "epoch": 0.5396732036260261, + "grad_norm": 22876.5703125, + "learning_rate": 5.936978339024195e-05, + "loss": 0.4296, + "step": 104600 + }, + { + "epoch": 0.5399311736086388, + "grad_norm": 22592.654296875, + "learning_rate": 5.9333897894956394e-05, + "loss": 0.4287, + "step": 104650 + }, + { + "epoch": 0.5401891435912517, + "grad_norm": 21235.43359375, + "learning_rate": 5.929800741741688e-05, + "loss": 0.4269, + "step": 104700 + }, + { + "epoch": 0.5404471135738645, + "grad_norm": 22049.05859375, + "learning_rate": 5.926211197678104e-05, + "loss": 0.4266, + "step": 104750 + }, + { + "epoch": 0.5407050835564774, + "grad_norm": 23252.845703125, + "learning_rate": 5.922621159220918e-05, + "loss": 0.4223, + "step": 104800 + }, + { + "epoch": 0.5409630535390902, + "grad_norm": 20577.1796875, + "learning_rate": 5.919030628286424e-05, + "loss": 0.4302, + "step": 104850 + }, + { + "epoch": 0.541221023521703, + "grad_norm": 24854.8671875, + "learning_rate": 5.915439606791174e-05, + "loss": 0.4212, + "step": 104900 + }, + { + "epoch": 0.5414789935043158, + "grad_norm": 22561.552734375, + "learning_rate": 5.9118480966519906e-05, + "loss": 0.4196, + "step": 104950 + }, + { + "epoch": 0.5417369634869287, + "grad_norm": 23885.4765625, + "learning_rate": 5.9082560997859496e-05, + "loss": 0.421, + "step": 105000 + }, + { + "epoch": 0.5417369634869287, + "eval_loss": 0.4132173955440521, + "eval_runtime": 2876.3365, + "eval_samples_per_second": 1078.149, + "eval_steps_per_second": 2.106, + "step": 105000 + }, + { + "epoch": 0.5419949334695415, + "grad_norm": 20974.994140625, + "learning_rate": 5.90466361811039e-05, + "loss": 0.4228, + "step": 105050 + }, + { + "epoch": 0.5422529034521543, + "grad_norm": 24338.412109375, + "learning_rate": 5.9010706535429086e-05, + "loss": 0.4215, + "step": 105100 + }, + { + "epoch": 0.5425108734347671, + "grad_norm": 20734.796875, + "learning_rate": 5.8974772080013605e-05, + "loss": 0.4319, + "step": 105150 + }, + { + "epoch": 0.54276884341738, + "grad_norm": 21026.123046875, + "learning_rate": 5.8938832834038574e-05, + "loss": 0.4318, + "step": 105200 + }, + { + "epoch": 0.5430268133999928, + "grad_norm": 20023.287109375, + "learning_rate": 5.890288881668766e-05, + "loss": 0.4306, + "step": 105250 + }, + { + "epoch": 0.5432847833826056, + "grad_norm": 23171.42578125, + "learning_rate": 5.88669400471471e-05, + "loss": 0.4237, + "step": 105300 + }, + { + "epoch": 0.5435427533652184, + "grad_norm": 21692.109375, + "learning_rate": 5.8830986544605635e-05, + "loss": 0.4261, + "step": 105350 + }, + { + "epoch": 0.5438007233478312, + "grad_norm": 22358.216796875, + "learning_rate": 5.8795028328254566e-05, + "loss": 0.4204, + "step": 105400 + }, + { + "epoch": 0.5440586933304441, + "grad_norm": 22529.650390625, + "learning_rate": 5.875906541728766e-05, + "loss": 0.422, + "step": 105450 + }, + { + "epoch": 0.544316663313057, + "grad_norm": 18307.05859375, + "learning_rate": 5.8723097830901264e-05, + "loss": 0.4236, + "step": 105500 + }, + { + "epoch": 0.5445746332956697, + "grad_norm": 22356.583984375, + "learning_rate": 5.8687125588294154e-05, + "loss": 0.4213, + "step": 105550 + }, + { + "epoch": 0.5448326032782825, + "grad_norm": 21446.732421875, + "learning_rate": 5.8651148708667625e-05, + "loss": 0.4216, + "step": 105600 + }, + { + "epoch": 0.5450905732608954, + "grad_norm": 24014.49609375, + "learning_rate": 5.8615167211225416e-05, + "loss": 0.4283, + "step": 105650 + }, + { + "epoch": 0.5453485432435082, + "grad_norm": 22394.306640625, + "learning_rate": 5.8579181115173785e-05, + "loss": 0.4242, + "step": 105700 + }, + { + "epoch": 0.545606513226121, + "grad_norm": 25348.26171875, + "learning_rate": 5.8543190439721405e-05, + "loss": 0.4234, + "step": 105750 + }, + { + "epoch": 0.5458644832087338, + "grad_norm": 22638.720703125, + "learning_rate": 5.850719520407939e-05, + "loss": 0.4269, + "step": 105800 + }, + { + "epoch": 0.5461224531913467, + "grad_norm": 22702.841796875, + "learning_rate": 5.847119542746131e-05, + "loss": 0.4201, + "step": 105850 + }, + { + "epoch": 0.5463804231739595, + "grad_norm": 22299.849609375, + "learning_rate": 5.843519112908315e-05, + "loss": 0.4243, + "step": 105900 + }, + { + "epoch": 0.5466383931565723, + "grad_norm": 21965.283203125, + "learning_rate": 5.8399182328163304e-05, + "loss": 0.4209, + "step": 105950 + }, + { + "epoch": 0.5468963631391851, + "grad_norm": 22101.755859375, + "learning_rate": 5.836316904392256e-05, + "loss": 0.4254, + "step": 106000 + }, + { + "epoch": 0.547154333121798, + "grad_norm": 22735.970703125, + "learning_rate": 5.8327151295584126e-05, + "loss": 0.4251, + "step": 106050 + }, + { + "epoch": 0.5474123031044108, + "grad_norm": 24287.58203125, + "learning_rate": 5.829112910237359e-05, + "loss": 0.427, + "step": 106100 + }, + { + "epoch": 0.5476702730870235, + "grad_norm": 22509.02734375, + "learning_rate": 5.825510248351889e-05, + "loss": 0.4209, + "step": 106150 + }, + { + "epoch": 0.5479282430696364, + "grad_norm": 22325.32421875, + "learning_rate": 5.821907145825032e-05, + "loss": 0.4276, + "step": 106200 + }, + { + "epoch": 0.5481862130522492, + "grad_norm": 21362.255859375, + "learning_rate": 5.8183036045800556e-05, + "loss": 0.4273, + "step": 106250 + }, + { + "epoch": 0.5484441830348621, + "grad_norm": 22934.61328125, + "learning_rate": 5.814699626540461e-05, + "loss": 0.4318, + "step": 106300 + }, + { + "epoch": 0.5487021530174749, + "grad_norm": 23663.65625, + "learning_rate": 5.8110952136299814e-05, + "loss": 0.4246, + "step": 106350 + }, + { + "epoch": 0.5489601230000877, + "grad_norm": 20743.84765625, + "learning_rate": 5.807490367772584e-05, + "loss": 0.4289, + "step": 106400 + }, + { + "epoch": 0.5492180929827005, + "grad_norm": 20859.244140625, + "learning_rate": 5.8038850908924636e-05, + "loss": 0.4255, + "step": 106450 + }, + { + "epoch": 0.5494760629653134, + "grad_norm": 21824.990234375, + "learning_rate": 5.800279384914047e-05, + "loss": 0.4311, + "step": 106500 + }, + { + "epoch": 0.5497340329479262, + "grad_norm": 19514.681640625, + "learning_rate": 5.7966732517619926e-05, + "loss": 0.4311, + "step": 106550 + }, + { + "epoch": 0.549992002930539, + "grad_norm": 24263.765625, + "learning_rate": 5.7930666933611835e-05, + "loss": 0.4257, + "step": 106600 + }, + { + "epoch": 0.5502499729131518, + "grad_norm": 23152.279296875, + "learning_rate": 5.789459711636729e-05, + "loss": 0.4226, + "step": 106650 + }, + { + "epoch": 0.5505079428957647, + "grad_norm": 21756.8671875, + "learning_rate": 5.785852308513967e-05, + "loss": 0.4266, + "step": 106700 + }, + { + "epoch": 0.5507659128783775, + "grad_norm": 20913.3125, + "learning_rate": 5.78224448591846e-05, + "loss": 0.4228, + "step": 106750 + }, + { + "epoch": 0.5510238828609902, + "grad_norm": 24674.92578125, + "learning_rate": 5.778636245775996e-05, + "loss": 0.4246, + "step": 106800 + }, + { + "epoch": 0.5512818528436031, + "grad_norm": 24229.4296875, + "learning_rate": 5.775027590012579e-05, + "loss": 0.4244, + "step": 106850 + }, + { + "epoch": 0.5515398228262159, + "grad_norm": 21722.048828125, + "learning_rate": 5.771418520554443e-05, + "loss": 0.4264, + "step": 106900 + }, + { + "epoch": 0.5517977928088288, + "grad_norm": 22060.224609375, + "learning_rate": 5.7678090393280384e-05, + "loss": 0.4268, + "step": 106950 + }, + { + "epoch": 0.5520557627914416, + "grad_norm": 25690.306640625, + "learning_rate": 5.7641991482600366e-05, + "loss": 0.4298, + "step": 107000 + }, + { + "epoch": 0.5523137327740544, + "grad_norm": 24629.115234375, + "learning_rate": 5.7605888492773266e-05, + "loss": 0.4223, + "step": 107050 + }, + { + "epoch": 0.5525717027566672, + "grad_norm": 23552.78515625, + "learning_rate": 5.756978144307018e-05, + "loss": 0.4246, + "step": 107100 + }, + { + "epoch": 0.5528296727392801, + "grad_norm": 21611.703125, + "learning_rate": 5.753367035276431e-05, + "loss": 0.4173, + "step": 107150 + }, + { + "epoch": 0.5530876427218929, + "grad_norm": 24158.64453125, + "learning_rate": 5.749755524113111e-05, + "loss": 0.4211, + "step": 107200 + }, + { + "epoch": 0.5533456127045057, + "grad_norm": 23446.94140625, + "learning_rate": 5.746143612744811e-05, + "loss": 0.4262, + "step": 107250 + }, + { + "epoch": 0.5536035826871185, + "grad_norm": 21608.703125, + "learning_rate": 5.742531303099498e-05, + "loss": 0.424, + "step": 107300 + }, + { + "epoch": 0.5538615526697314, + "grad_norm": 25070.78125, + "learning_rate": 5.738918597105353e-05, + "loss": 0.4219, + "step": 107350 + }, + { + "epoch": 0.5541195226523442, + "grad_norm": 21161.5234375, + "learning_rate": 5.735305496690769e-05, + "loss": 0.4293, + "step": 107400 + }, + { + "epoch": 0.554377492634957, + "grad_norm": 23108.521484375, + "learning_rate": 5.7316920037843516e-05, + "loss": 0.427, + "step": 107450 + }, + { + "epoch": 0.5546354626175698, + "grad_norm": 22233.87890625, + "learning_rate": 5.728078120314909e-05, + "loss": 0.4204, + "step": 107500 + }, + { + "epoch": 0.5548934326001826, + "grad_norm": 24522.310546875, + "learning_rate": 5.724463848211464e-05, + "loss": 0.4257, + "step": 107550 + }, + { + "epoch": 0.5551514025827955, + "grad_norm": 22916.892578125, + "learning_rate": 5.720849189403244e-05, + "loss": 0.4251, + "step": 107600 + }, + { + "epoch": 0.5554093725654083, + "grad_norm": 20069.236328125, + "learning_rate": 5.7172341458196876e-05, + "loss": 0.424, + "step": 107650 + }, + { + "epoch": 0.5556673425480211, + "grad_norm": 25173.86328125, + "learning_rate": 5.713618719390432e-05, + "loss": 0.4294, + "step": 107700 + }, + { + "epoch": 0.5559253125306339, + "grad_norm": 21957.373046875, + "learning_rate": 5.710002912045323e-05, + "loss": 0.4239, + "step": 107750 + }, + { + "epoch": 0.5561832825132468, + "grad_norm": 20540.82421875, + "learning_rate": 5.706386725714407e-05, + "loss": 0.4259, + "step": 107800 + }, + { + "epoch": 0.5564412524958596, + "grad_norm": 22470.4921875, + "learning_rate": 5.702770162327936e-05, + "loss": 0.4273, + "step": 107850 + }, + { + "epoch": 0.5566992224784724, + "grad_norm": 21721.197265625, + "learning_rate": 5.69915322381636e-05, + "loss": 0.4233, + "step": 107900 + }, + { + "epoch": 0.5569571924610852, + "grad_norm": 21666.955078125, + "learning_rate": 5.6955359121103324e-05, + "loss": 0.4283, + "step": 107950 + }, + { + "epoch": 0.5572151624436981, + "grad_norm": 20970.9296875, + "learning_rate": 5.6919182291407014e-05, + "loss": 0.4275, + "step": 108000 + }, + { + "epoch": 0.5574731324263109, + "grad_norm": 22353.306640625, + "learning_rate": 5.688300176838518e-05, + "loss": 0.4244, + "step": 108050 + }, + { + "epoch": 0.5577311024089237, + "grad_norm": 22607.357421875, + "learning_rate": 5.68468175713503e-05, + "loss": 0.4252, + "step": 108100 + }, + { + "epoch": 0.5579890723915365, + "grad_norm": 21949.1015625, + "learning_rate": 5.681062971961677e-05, + "loss": 0.4194, + "step": 108150 + }, + { + "epoch": 0.5582470423741493, + "grad_norm": 23135.21875, + "learning_rate": 5.677443823250099e-05, + "loss": 0.425, + "step": 108200 + }, + { + "epoch": 0.5585050123567622, + "grad_norm": 19050.34765625, + "learning_rate": 5.673824312932123e-05, + "loss": 0.422, + "step": 108250 + }, + { + "epoch": 0.5587629823393749, + "grad_norm": 22969.15625, + "learning_rate": 5.67020444293978e-05, + "loss": 0.4253, + "step": 108300 + }, + { + "epoch": 0.5590209523219878, + "grad_norm": 22808.203125, + "learning_rate": 5.666584215205282e-05, + "loss": 0.4261, + "step": 108350 + }, + { + "epoch": 0.5592789223046006, + "grad_norm": 23061.126953125, + "learning_rate": 5.662963631661038e-05, + "loss": 0.4248, + "step": 108400 + }, + { + "epoch": 0.5595368922872135, + "grad_norm": 24134.693359375, + "learning_rate": 5.659342694239642e-05, + "loss": 0.4273, + "step": 108450 + }, + { + "epoch": 0.5597948622698263, + "grad_norm": 23659.2578125, + "learning_rate": 5.655721404873886e-05, + "loss": 0.427, + "step": 108500 + }, + { + "epoch": 0.5600528322524391, + "grad_norm": 20205.1953125, + "learning_rate": 5.652099765496741e-05, + "loss": 0.4257, + "step": 108550 + }, + { + "epoch": 0.5603108022350519, + "grad_norm": 21324.837890625, + "learning_rate": 5.6484777780413686e-05, + "loss": 0.4248, + "step": 108600 + }, + { + "epoch": 0.5605687722176648, + "grad_norm": 21779.849609375, + "learning_rate": 5.644855444441114e-05, + "loss": 0.4259, + "step": 108650 + }, + { + "epoch": 0.5608267422002776, + "grad_norm": 20502.0859375, + "learning_rate": 5.641232766629512e-05, + "loss": 0.4281, + "step": 108700 + }, + { + "epoch": 0.5610847121828904, + "grad_norm": 23600.5859375, + "learning_rate": 5.637609746540276e-05, + "loss": 0.4183, + "step": 108750 + }, + { + "epoch": 0.5613426821655032, + "grad_norm": 22977.41015625, + "learning_rate": 5.633986386107302e-05, + "loss": 0.4219, + "step": 108800 + }, + { + "epoch": 0.561600652148116, + "grad_norm": 23411.263671875, + "learning_rate": 5.630362687264672e-05, + "loss": 0.4268, + "step": 108850 + }, + { + "epoch": 0.5618586221307289, + "grad_norm": 20194.060546875, + "learning_rate": 5.6267386519466446e-05, + "loss": 0.4175, + "step": 108900 + }, + { + "epoch": 0.5621165921133416, + "grad_norm": 19387.88671875, + "learning_rate": 5.623114282087664e-05, + "loss": 0.4274, + "step": 108950 + }, + { + "epoch": 0.5623745620959545, + "grad_norm": 23158.28125, + "learning_rate": 5.619489579622343e-05, + "loss": 0.4222, + "step": 109000 + }, + { + "epoch": 0.5626325320785673, + "grad_norm": 23551.431640625, + "learning_rate": 5.6158645464854817e-05, + "loss": 0.428, + "step": 109050 + }, + { + "epoch": 0.5628905020611802, + "grad_norm": 23904.896484375, + "learning_rate": 5.6122391846120495e-05, + "loss": 0.4252, + "step": 109100 + }, + { + "epoch": 0.563148472043793, + "grad_norm": 21354.61328125, + "learning_rate": 5.608613495937197e-05, + "loss": 0.4202, + "step": 109150 + }, + { + "epoch": 0.5634064420264058, + "grad_norm": 23561.978515625, + "learning_rate": 5.6049874823962456e-05, + "loss": 0.4301, + "step": 109200 + }, + { + "epoch": 0.5636644120090186, + "grad_norm": 20979.53515625, + "learning_rate": 5.601361145924692e-05, + "loss": 0.4204, + "step": 109250 + }, + { + "epoch": 0.5639223819916315, + "grad_norm": 24039.125, + "learning_rate": 5.5977344884582e-05, + "loss": 0.4284, + "step": 109300 + }, + { + "epoch": 0.5641803519742443, + "grad_norm": 22242.35546875, + "learning_rate": 5.594107511932615e-05, + "loss": 0.4248, + "step": 109350 + }, + { + "epoch": 0.5644383219568571, + "grad_norm": 20016.1875, + "learning_rate": 5.5904802182839434e-05, + "loss": 0.4222, + "step": 109400 + }, + { + "epoch": 0.5646962919394699, + "grad_norm": 22243.0703125, + "learning_rate": 5.5868526094483666e-05, + "loss": 0.4276, + "step": 109450 + }, + { + "epoch": 0.5649542619220828, + "grad_norm": 23286.38671875, + "learning_rate": 5.58322468736223e-05, + "loss": 0.4208, + "step": 109500 + }, + { + "epoch": 0.5652122319046956, + "grad_norm": 21801.802734375, + "learning_rate": 5.579596453962047e-05, + "loss": 0.4275, + "step": 109550 + }, + { + "epoch": 0.5654702018873083, + "grad_norm": 23282.025390625, + "learning_rate": 5.575967911184502e-05, + "loss": 0.4255, + "step": 109600 + }, + { + "epoch": 0.5657281718699212, + "grad_norm": 25253.943359375, + "learning_rate": 5.572339060966439e-05, + "loss": 0.4239, + "step": 109650 + }, + { + "epoch": 0.565986141852534, + "grad_norm": 22364.595703125, + "learning_rate": 5.5687099052448675e-05, + "loss": 0.4255, + "step": 109700 + }, + { + "epoch": 0.5662441118351469, + "grad_norm": 23305.46484375, + "learning_rate": 5.565080445956961e-05, + "loss": 0.4254, + "step": 109750 + }, + { + "epoch": 0.5665020818177597, + "grad_norm": 20225.2421875, + "learning_rate": 5.561450685040054e-05, + "loss": 0.4239, + "step": 109800 + }, + { + "epoch": 0.5667600518003725, + "grad_norm": 20221.8203125, + "learning_rate": 5.557820624431645e-05, + "loss": 0.4171, + "step": 109850 + }, + { + "epoch": 0.5670180217829853, + "grad_norm": 19833.607421875, + "learning_rate": 5.554190266069387e-05, + "loss": 0.4224, + "step": 109900 + }, + { + "epoch": 0.5672759917655982, + "grad_norm": 19884.58203125, + "learning_rate": 5.550559611891095e-05, + "loss": 0.4196, + "step": 109950 + }, + { + "epoch": 0.567533961748211, + "grad_norm": 22072.25390625, + "learning_rate": 5.546928663834745e-05, + "loss": 0.4196, + "step": 110000 + }, + { + "epoch": 0.567533961748211, + "eval_loss": 0.4103853106498718, + "eval_runtime": 3606.5234, + "eval_samples_per_second": 859.864, + "eval_steps_per_second": 1.679, + "step": 110000 + }, + { + "epoch": 0.5677919317308238, + "grad_norm": 21647.181640625, + "learning_rate": 5.543297423838464e-05, + "loss": 0.414, + "step": 110050 + }, + { + "epoch": 0.5680499017134366, + "grad_norm": 23264.748046875, + "learning_rate": 5.5396658938405396e-05, + "loss": 0.4192, + "step": 110100 + }, + { + "epoch": 0.5683078716960495, + "grad_norm": 21868.10546875, + "learning_rate": 5.536034075779409e-05, + "loss": 0.4222, + "step": 110150 + }, + { + "epoch": 0.5685658416786623, + "grad_norm": 22489.07421875, + "learning_rate": 5.53240197159367e-05, + "loss": 0.4237, + "step": 110200 + }, + { + "epoch": 0.568823811661275, + "grad_norm": 21589.173828125, + "learning_rate": 5.5287695832220674e-05, + "loss": 0.4218, + "step": 110250 + }, + { + "epoch": 0.5690817816438879, + "grad_norm": 23184.103515625, + "learning_rate": 5.525136912603501e-05, + "loss": 0.4203, + "step": 110300 + }, + { + "epoch": 0.5693397516265007, + "grad_norm": 23085.970703125, + "learning_rate": 5.521503961677019e-05, + "loss": 0.4216, + "step": 110350 + }, + { + "epoch": 0.5695977216091136, + "grad_norm": 22217.3671875, + "learning_rate": 5.51787073238182e-05, + "loss": 0.424, + "step": 110400 + }, + { + "epoch": 0.5698556915917263, + "grad_norm": 23515.9375, + "learning_rate": 5.514237226657253e-05, + "loss": 0.4217, + "step": 110450 + }, + { + "epoch": 0.5701136615743392, + "grad_norm": 21375.2734375, + "learning_rate": 5.510603446442812e-05, + "loss": 0.4175, + "step": 110500 + }, + { + "epoch": 0.570371631556952, + "grad_norm": 21658.15625, + "learning_rate": 5.506969393678139e-05, + "loss": 0.4191, + "step": 110550 + }, + { + "epoch": 0.5706296015395649, + "grad_norm": 24653.294921875, + "learning_rate": 5.503335070303018e-05, + "loss": 0.419, + "step": 110600 + }, + { + "epoch": 0.5708875715221777, + "grad_norm": 21722.984375, + "learning_rate": 5.4997004782573855e-05, + "loss": 0.4237, + "step": 110650 + }, + { + "epoch": 0.5711455415047905, + "grad_norm": 20897.595703125, + "learning_rate": 5.496065619481312e-05, + "loss": 0.4211, + "step": 110700 + }, + { + "epoch": 0.5714035114874033, + "grad_norm": 20729.123046875, + "learning_rate": 5.4924304959150175e-05, + "loss": 0.4228, + "step": 110750 + }, + { + "epoch": 0.5716614814700162, + "grad_norm": 22107.888671875, + "learning_rate": 5.488795109498861e-05, + "loss": 0.4222, + "step": 110800 + }, + { + "epoch": 0.571919451452629, + "grad_norm": 24264.587890625, + "learning_rate": 5.485159462173337e-05, + "loss": 0.4232, + "step": 110850 + }, + { + "epoch": 0.5721774214352418, + "grad_norm": 23000.34375, + "learning_rate": 5.481523555879089e-05, + "loss": 0.4236, + "step": 110900 + }, + { + "epoch": 0.5724353914178546, + "grad_norm": 20345.26953125, + "learning_rate": 5.4778873925568905e-05, + "loss": 0.4254, + "step": 110950 + }, + { + "epoch": 0.5726933614004674, + "grad_norm": 25514.09765625, + "learning_rate": 5.4742509741476566e-05, + "loss": 0.4247, + "step": 111000 + }, + { + "epoch": 0.5729513313830803, + "grad_norm": 22510.115234375, + "learning_rate": 5.470614302592434e-05, + "loss": 0.4271, + "step": 111050 + }, + { + "epoch": 0.573209301365693, + "grad_norm": 24683.4921875, + "learning_rate": 5.466977379832411e-05, + "loss": 0.4207, + "step": 111100 + }, + { + "epoch": 0.5734672713483059, + "grad_norm": 22154.2890625, + "learning_rate": 5.4633402078089035e-05, + "loss": 0.422, + "step": 111150 + }, + { + "epoch": 0.5737252413309187, + "grad_norm": 23333.966796875, + "learning_rate": 5.459702788463367e-05, + "loss": 0.4218, + "step": 111200 + }, + { + "epoch": 0.5739832113135316, + "grad_norm": 26566.900390625, + "learning_rate": 5.4560651237373814e-05, + "loss": 0.4269, + "step": 111250 + }, + { + "epoch": 0.5742411812961444, + "grad_norm": 21463.828125, + "learning_rate": 5.452427215572666e-05, + "loss": 0.4196, + "step": 111300 + }, + { + "epoch": 0.5744991512787572, + "grad_norm": 24921.373046875, + "learning_rate": 5.448789065911064e-05, + "loss": 0.4248, + "step": 111350 + }, + { + "epoch": 0.57475712126137, + "grad_norm": 23610.16015625, + "learning_rate": 5.445150676694548e-05, + "loss": 0.4245, + "step": 111400 + }, + { + "epoch": 0.5750150912439829, + "grad_norm": 24598.2109375, + "learning_rate": 5.441512049865221e-05, + "loss": 0.4199, + "step": 111450 + }, + { + "epoch": 0.5752730612265957, + "grad_norm": 24330.02734375, + "learning_rate": 5.43787318736531e-05, + "loss": 0.423, + "step": 111500 + }, + { + "epoch": 0.5755310312092085, + "grad_norm": 23434.587890625, + "learning_rate": 5.434234091137171e-05, + "loss": 0.4214, + "step": 111550 + }, + { + "epoch": 0.5757890011918213, + "grad_norm": 25007.08203125, + "learning_rate": 5.430594763123283e-05, + "loss": 0.4258, + "step": 111600 + }, + { + "epoch": 0.5760469711744342, + "grad_norm": 24568.759765625, + "learning_rate": 5.4269552052662486e-05, + "loss": 0.4248, + "step": 111650 + }, + { + "epoch": 0.576304941157047, + "grad_norm": 22131.74609375, + "learning_rate": 5.423315419508792e-05, + "loss": 0.418, + "step": 111700 + }, + { + "epoch": 0.5765629111396597, + "grad_norm": 22058.443359375, + "learning_rate": 5.4196754077937626e-05, + "loss": 0.4289, + "step": 111750 + }, + { + "epoch": 0.5768208811222726, + "grad_norm": 23790.3203125, + "learning_rate": 5.4160351720641276e-05, + "loss": 0.4217, + "step": 111800 + }, + { + "epoch": 0.5770788511048854, + "grad_norm": 20349.287109375, + "learning_rate": 5.412394714262974e-05, + "loss": 0.4154, + "step": 111850 + }, + { + "epoch": 0.5773368210874983, + "grad_norm": 20262.9296875, + "learning_rate": 5.408754036333506e-05, + "loss": 0.4214, + "step": 111900 + }, + { + "epoch": 0.5775947910701111, + "grad_norm": 21678.17578125, + "learning_rate": 5.4051131402190494e-05, + "loss": 0.4168, + "step": 111950 + }, + { + "epoch": 0.5778527610527239, + "grad_norm": 22649.1875, + "learning_rate": 5.4014720278630415e-05, + "loss": 0.4206, + "step": 112000 + }, + { + "epoch": 0.5781107310353367, + "grad_norm": 22304.1328125, + "learning_rate": 5.39783070120904e-05, + "loss": 0.4199, + "step": 112050 + }, + { + "epoch": 0.5783687010179496, + "grad_norm": 22573.169921875, + "learning_rate": 5.394189162200715e-05, + "loss": 0.4325, + "step": 112100 + }, + { + "epoch": 0.5786266710005624, + "grad_norm": 22942.09765625, + "learning_rate": 5.390547412781847e-05, + "loss": 0.416, + "step": 112150 + }, + { + "epoch": 0.5788846409831752, + "grad_norm": 20210.18359375, + "learning_rate": 5.386905454896333e-05, + "loss": 0.4274, + "step": 112200 + }, + { + "epoch": 0.579142610965788, + "grad_norm": 22916.09375, + "learning_rate": 5.38326329048818e-05, + "loss": 0.4208, + "step": 112250 + }, + { + "epoch": 0.5794005809484009, + "grad_norm": 21563.78125, + "learning_rate": 5.379620921501503e-05, + "loss": 0.4264, + "step": 112300 + }, + { + "epoch": 0.5796585509310137, + "grad_norm": 20984.3671875, + "learning_rate": 5.375978349880528e-05, + "loss": 0.4229, + "step": 112350 + }, + { + "epoch": 0.5799165209136264, + "grad_norm": 22014.6640625, + "learning_rate": 5.372335577569592e-05, + "loss": 0.4205, + "step": 112400 + }, + { + "epoch": 0.5801744908962393, + "grad_norm": 22977.39453125, + "learning_rate": 5.3686926065131325e-05, + "loss": 0.4248, + "step": 112450 + }, + { + "epoch": 0.5804324608788521, + "grad_norm": 22589.521484375, + "learning_rate": 5.365049438655702e-05, + "loss": 0.4165, + "step": 112500 + }, + { + "epoch": 0.580690430861465, + "grad_norm": 24455.625, + "learning_rate": 5.3614060759419474e-05, + "loss": 0.4224, + "step": 112550 + }, + { + "epoch": 0.5809484008440777, + "grad_norm": 24485.833984375, + "learning_rate": 5.357762520316628e-05, + "loss": 0.4264, + "step": 112600 + }, + { + "epoch": 0.5812063708266906, + "grad_norm": 23294.244140625, + "learning_rate": 5.354118773724603e-05, + "loss": 0.4254, + "step": 112650 + }, + { + "epoch": 0.5814643408093034, + "grad_norm": 21813.884765625, + "learning_rate": 5.350474838110835e-05, + "loss": 0.4226, + "step": 112700 + }, + { + "epoch": 0.5817223107919163, + "grad_norm": 23532.0546875, + "learning_rate": 5.3468307154203836e-05, + "loss": 0.4236, + "step": 112750 + }, + { + "epoch": 0.5819802807745291, + "grad_norm": 24739.787109375, + "learning_rate": 5.343186407598413e-05, + "loss": 0.4276, + "step": 112800 + }, + { + "epoch": 0.5822382507571419, + "grad_norm": 23312.783203125, + "learning_rate": 5.339541916590184e-05, + "loss": 0.4281, + "step": 112850 + }, + { + "epoch": 0.5824962207397547, + "grad_norm": 24166.798828125, + "learning_rate": 5.335897244341054e-05, + "loss": 0.4238, + "step": 112900 + }, + { + "epoch": 0.5827541907223676, + "grad_norm": 23690.455078125, + "learning_rate": 5.332252392796478e-05, + "loss": 0.4181, + "step": 112950 + }, + { + "epoch": 0.5830121607049804, + "grad_norm": 23499.16015625, + "learning_rate": 5.32860736390201e-05, + "loss": 0.4143, + "step": 113000 + }, + { + "epoch": 0.5832701306875931, + "grad_norm": 23299.5625, + "learning_rate": 5.324962159603294e-05, + "loss": 0.4198, + "step": 113050 + }, + { + "epoch": 0.583528100670206, + "grad_norm": 22958.423828125, + "learning_rate": 5.321316781846071e-05, + "loss": 0.421, + "step": 113100 + }, + { + "epoch": 0.5837860706528188, + "grad_norm": 20775.119140625, + "learning_rate": 5.3176712325761704e-05, + "loss": 0.4148, + "step": 113150 + }, + { + "epoch": 0.5840440406354317, + "grad_norm": 23139.953125, + "learning_rate": 5.3140255137395155e-05, + "loss": 0.422, + "step": 113200 + }, + { + "epoch": 0.5843020106180444, + "grad_norm": 19829.94140625, + "learning_rate": 5.310379627282125e-05, + "loss": 0.4248, + "step": 113250 + }, + { + "epoch": 0.5845599806006573, + "grad_norm": 20085.572265625, + "learning_rate": 5.306733575150099e-05, + "loss": 0.4183, + "step": 113300 + }, + { + "epoch": 0.5848179505832701, + "grad_norm": 23206.27734375, + "learning_rate": 5.303087359289629e-05, + "loss": 0.423, + "step": 113350 + }, + { + "epoch": 0.585075920565883, + "grad_norm": 25039.34765625, + "learning_rate": 5.299440981646996e-05, + "loss": 0.4232, + "step": 113400 + }, + { + "epoch": 0.5853338905484958, + "grad_norm": 21276.865234375, + "learning_rate": 5.2957944441685646e-05, + "loss": 0.4205, + "step": 113450 + }, + { + "epoch": 0.5855918605311086, + "grad_norm": 22706.712890625, + "learning_rate": 5.292147748800788e-05, + "loss": 0.4225, + "step": 113500 + }, + { + "epoch": 0.5858498305137214, + "grad_norm": 18046.537109375, + "learning_rate": 5.2885008974902004e-05, + "loss": 0.4195, + "step": 113550 + }, + { + "epoch": 0.5861078004963343, + "grad_norm": 22363.5625, + "learning_rate": 5.28485389218342e-05, + "loss": 0.4149, + "step": 113600 + }, + { + "epoch": 0.5863657704789471, + "grad_norm": 24409.609375, + "learning_rate": 5.2812067348271466e-05, + "loss": 0.4224, + "step": 113650 + }, + { + "epoch": 0.5866237404615599, + "grad_norm": 23921.68359375, + "learning_rate": 5.277559427368164e-05, + "loss": 0.4274, + "step": 113700 + }, + { + "epoch": 0.5868817104441727, + "grad_norm": 23887.84765625, + "learning_rate": 5.273911971753335e-05, + "loss": 0.4185, + "step": 113750 + }, + { + "epoch": 0.5871396804267855, + "grad_norm": 23169.423828125, + "learning_rate": 5.270264369929597e-05, + "loss": 0.4218, + "step": 113800 + }, + { + "epoch": 0.5873976504093984, + "grad_norm": 23339.57421875, + "learning_rate": 5.266616623843972e-05, + "loss": 0.4211, + "step": 113850 + }, + { + "epoch": 0.5876556203920111, + "grad_norm": 22072.59765625, + "learning_rate": 5.2629687354435576e-05, + "loss": 0.4191, + "step": 113900 + }, + { + "epoch": 0.587913590374624, + "grad_norm": 24308.357421875, + "learning_rate": 5.259320706675523e-05, + "loss": 0.4168, + "step": 113950 + }, + { + "epoch": 0.5881715603572368, + "grad_norm": 20896.5703125, + "learning_rate": 5.255672539487119e-05, + "loss": 0.4201, + "step": 114000 + }, + { + "epoch": 0.5884295303398497, + "grad_norm": 20070.814453125, + "learning_rate": 5.252024235825661e-05, + "loss": 0.4216, + "step": 114050 + }, + { + "epoch": 0.5886875003224625, + "grad_norm": 24864.869140625, + "learning_rate": 5.2483757976385486e-05, + "loss": 0.4269, + "step": 114100 + }, + { + "epoch": 0.5889454703050753, + "grad_norm": 24734.0234375, + "learning_rate": 5.2447272268732436e-05, + "loss": 0.4196, + "step": 114150 + }, + { + "epoch": 0.5892034402876881, + "grad_norm": 22383.212890625, + "learning_rate": 5.2410785254772856e-05, + "loss": 0.4171, + "step": 114200 + }, + { + "epoch": 0.589461410270301, + "grad_norm": 22587.44921875, + "learning_rate": 5.237429695398276e-05, + "loss": 0.4232, + "step": 114250 + }, + { + "epoch": 0.5897193802529138, + "grad_norm": 23357.263671875, + "learning_rate": 5.2337807385838935e-05, + "loss": 0.4241, + "step": 114300 + }, + { + "epoch": 0.5899773502355266, + "grad_norm": 24632.125, + "learning_rate": 5.23013165698188e-05, + "loss": 0.4154, + "step": 114350 + }, + { + "epoch": 0.5902353202181394, + "grad_norm": 23191.818359375, + "learning_rate": 5.226482452540045e-05, + "loss": 0.424, + "step": 114400 + }, + { + "epoch": 0.5904932902007523, + "grad_norm": 23649.560546875, + "learning_rate": 5.2228331272062626e-05, + "loss": 0.427, + "step": 114450 + }, + { + "epoch": 0.5907512601833651, + "grad_norm": 23992.169921875, + "learning_rate": 5.21918368292847e-05, + "loss": 0.4267, + "step": 114500 + }, + { + "epoch": 0.5910092301659778, + "grad_norm": 21792.041015625, + "learning_rate": 5.215534121654673e-05, + "loss": 0.4272, + "step": 114550 + }, + { + "epoch": 0.5912672001485907, + "grad_norm": 25516.345703125, + "learning_rate": 5.211884445332935e-05, + "loss": 0.4207, + "step": 114600 + }, + { + "epoch": 0.5915251701312035, + "grad_norm": 22557.25390625, + "learning_rate": 5.208234655911384e-05, + "loss": 0.4228, + "step": 114650 + }, + { + "epoch": 0.5917831401138164, + "grad_norm": 24185.09765625, + "learning_rate": 5.2045847553382045e-05, + "loss": 0.4226, + "step": 114700 + }, + { + "epoch": 0.5920411100964291, + "grad_norm": 20565.134765625, + "learning_rate": 5.200934745561643e-05, + "loss": 0.4274, + "step": 114750 + }, + { + "epoch": 0.592299080079042, + "grad_norm": 24019.0, + "learning_rate": 5.197284628530007e-05, + "loss": 0.4234, + "step": 114800 + }, + { + "epoch": 0.5925570500616548, + "grad_norm": 26129.01171875, + "learning_rate": 5.193634406191658e-05, + "loss": 0.418, + "step": 114850 + }, + { + "epoch": 0.5928150200442677, + "grad_norm": 25187.611328125, + "learning_rate": 5.18998408049501e-05, + "loss": 0.4213, + "step": 114900 + }, + { + "epoch": 0.5930729900268805, + "grad_norm": 20145.14453125, + "learning_rate": 5.186333653388539e-05, + "loss": 0.418, + "step": 114950 + }, + { + "epoch": 0.5933309600094933, + "grad_norm": 22472.322265625, + "learning_rate": 5.182683126820773e-05, + "loss": 0.4209, + "step": 115000 + }, + { + "epoch": 0.5933309600094933, + "eval_loss": 0.4084908068180084, + "eval_runtime": 3582.6916, + "eval_samples_per_second": 865.584, + "eval_steps_per_second": 1.691, + "step": 115000 + }, + { + "epoch": 0.5935889299921061, + "grad_norm": 22404.052734375, + "learning_rate": 5.179032502740291e-05, + "loss": 0.4147, + "step": 115050 + }, + { + "epoch": 0.593846899974719, + "grad_norm": 21242.705078125, + "learning_rate": 5.175381783095725e-05, + "loss": 0.4237, + "step": 115100 + }, + { + "epoch": 0.5941048699573318, + "grad_norm": 22416.06640625, + "learning_rate": 5.171730969835758e-05, + "loss": 0.4185, + "step": 115150 + }, + { + "epoch": 0.5943628399399445, + "grad_norm": 22231.525390625, + "learning_rate": 5.1680800649091276e-05, + "loss": 0.4227, + "step": 115200 + }, + { + "epoch": 0.5946208099225574, + "grad_norm": 22431.462890625, + "learning_rate": 5.164429070264613e-05, + "loss": 0.4225, + "step": 115250 + }, + { + "epoch": 0.5948787799051702, + "grad_norm": 26008.57421875, + "learning_rate": 5.160777987851044e-05, + "loss": 0.4253, + "step": 115300 + }, + { + "epoch": 0.5951367498877831, + "grad_norm": 22555.501953125, + "learning_rate": 5.157126819617297e-05, + "loss": 0.4181, + "step": 115350 + }, + { + "epoch": 0.5953947198703958, + "grad_norm": 25113.587890625, + "learning_rate": 5.153475567512298e-05, + "loss": 0.4261, + "step": 115400 + }, + { + "epoch": 0.5956526898530087, + "grad_norm": 22877.908203125, + "learning_rate": 5.149824233485013e-05, + "loss": 0.4177, + "step": 115450 + }, + { + "epoch": 0.5959106598356215, + "grad_norm": 22468.34375, + "learning_rate": 5.1461728194844526e-05, + "loss": 0.4223, + "step": 115500 + }, + { + "epoch": 0.5961686298182344, + "grad_norm": 24525.326171875, + "learning_rate": 5.142521327459672e-05, + "loss": 0.4159, + "step": 115550 + }, + { + "epoch": 0.5964265998008472, + "grad_norm": 23334.296875, + "learning_rate": 5.1388697593597643e-05, + "loss": 0.4206, + "step": 115600 + }, + { + "epoch": 0.59668456978346, + "grad_norm": 21743.333984375, + "learning_rate": 5.1352181171338706e-05, + "loss": 0.4191, + "step": 115650 + }, + { + "epoch": 0.5969425397660728, + "grad_norm": 26287.66796875, + "learning_rate": 5.131566402731165e-05, + "loss": 0.4147, + "step": 115700 + }, + { + "epoch": 0.5972005097486857, + "grad_norm": 20856.890625, + "learning_rate": 5.12791461810086e-05, + "loss": 0.4248, + "step": 115750 + }, + { + "epoch": 0.5974584797312985, + "grad_norm": 22821.73046875, + "learning_rate": 5.124262765192208e-05, + "loss": 0.4239, + "step": 115800 + }, + { + "epoch": 0.5977164497139112, + "grad_norm": 24805.427734375, + "learning_rate": 5.1206108459545e-05, + "loss": 0.4172, + "step": 115850 + }, + { + "epoch": 0.5979744196965241, + "grad_norm": 25195.064453125, + "learning_rate": 5.116958862337057e-05, + "loss": 0.4242, + "step": 115900 + }, + { + "epoch": 0.5982323896791369, + "grad_norm": 22029.236328125, + "learning_rate": 5.1133068162892383e-05, + "loss": 0.4217, + "step": 115950 + }, + { + "epoch": 0.5984903596617498, + "grad_norm": 23296.77734375, + "learning_rate": 5.109654709760434e-05, + "loss": 0.4223, + "step": 116000 + }, + { + "epoch": 0.5987483296443625, + "grad_norm": 23822.447265625, + "learning_rate": 5.106002544700069e-05, + "loss": 0.4235, + "step": 116050 + }, + { + "epoch": 0.5990062996269754, + "grad_norm": 21188.46875, + "learning_rate": 5.1023503230576e-05, + "loss": 0.4275, + "step": 116100 + }, + { + "epoch": 0.5992642696095882, + "grad_norm": 24459.021484375, + "learning_rate": 5.0986980467825096e-05, + "loss": 0.4217, + "step": 116150 + }, + { + "epoch": 0.5995222395922011, + "grad_norm": 22304.396484375, + "learning_rate": 5.095045717824313e-05, + "loss": 0.42, + "step": 116200 + }, + { + "epoch": 0.5997802095748139, + "grad_norm": 20124.943359375, + "learning_rate": 5.0913933381325516e-05, + "loss": 0.4149, + "step": 116250 + }, + { + "epoch": 0.6000381795574267, + "grad_norm": 22610.3046875, + "learning_rate": 5.087740909656798e-05, + "loss": 0.4164, + "step": 116300 + }, + { + "epoch": 0.6002961495400395, + "grad_norm": 22058.974609375, + "learning_rate": 5.084088434346645e-05, + "loss": 0.4211, + "step": 116350 + }, + { + "epoch": 0.6005541195226524, + "grad_norm": 23463.626953125, + "learning_rate": 5.0804359141517134e-05, + "loss": 0.4182, + "step": 116400 + }, + { + "epoch": 0.6008120895052652, + "grad_norm": 25045.67578125, + "learning_rate": 5.076783351021648e-05, + "loss": 0.4202, + "step": 116450 + }, + { + "epoch": 0.601070059487878, + "grad_norm": 22583.076171875, + "learning_rate": 5.0731307469061184e-05, + "loss": 0.4177, + "step": 116500 + }, + { + "epoch": 0.6013280294704908, + "grad_norm": 26350.400390625, + "learning_rate": 5.069478103754811e-05, + "loss": 0.4193, + "step": 116550 + }, + { + "epoch": 0.6015859994531036, + "grad_norm": 21430.255859375, + "learning_rate": 5.0658254235174385e-05, + "loss": 0.422, + "step": 116600 + }, + { + "epoch": 0.6018439694357165, + "grad_norm": 21467.482421875, + "learning_rate": 5.0621727081437275e-05, + "loss": 0.4157, + "step": 116650 + }, + { + "epoch": 0.6021019394183292, + "grad_norm": 25780.095703125, + "learning_rate": 5.05851995958343e-05, + "loss": 0.4243, + "step": 116700 + }, + { + "epoch": 0.6023599094009421, + "grad_norm": 21074.52734375, + "learning_rate": 5.0548671797863125e-05, + "loss": 0.4271, + "step": 116750 + }, + { + "epoch": 0.6026178793835549, + "grad_norm": 25752.841796875, + "learning_rate": 5.051214370702155e-05, + "loss": 0.4209, + "step": 116800 + }, + { + "epoch": 0.6028758493661678, + "grad_norm": 23178.93359375, + "learning_rate": 5.047561534280758e-05, + "loss": 0.4232, + "step": 116850 + }, + { + "epoch": 0.6031338193487806, + "grad_norm": 23263.65625, + "learning_rate": 5.0439086724719355e-05, + "loss": 0.4196, + "step": 116900 + }, + { + "epoch": 0.6033917893313934, + "grad_norm": 20372.861328125, + "learning_rate": 5.040255787225514e-05, + "loss": 0.4194, + "step": 116950 + }, + { + "epoch": 0.6036497593140062, + "grad_norm": 23453.59375, + "learning_rate": 5.036602880491332e-05, + "loss": 0.4156, + "step": 117000 + }, + { + "epoch": 0.6039077292966191, + "grad_norm": 24039.392578125, + "learning_rate": 5.032949954219243e-05, + "loss": 0.4192, + "step": 117050 + }, + { + "epoch": 0.6041656992792319, + "grad_norm": 24246.55859375, + "learning_rate": 5.0292970103591044e-05, + "loss": 0.426, + "step": 117100 + }, + { + "epoch": 0.6044236692618447, + "grad_norm": 23403.130859375, + "learning_rate": 5.0256440508607904e-05, + "loss": 0.4195, + "step": 117150 + }, + { + "epoch": 0.6046816392444575, + "grad_norm": 21872.07421875, + "learning_rate": 5.021991077674179e-05, + "loss": 0.4214, + "step": 117200 + }, + { + "epoch": 0.6049396092270704, + "grad_norm": 22344.455078125, + "learning_rate": 5.018338092749155e-05, + "loss": 0.4205, + "step": 117250 + }, + { + "epoch": 0.6051975792096832, + "grad_norm": 22999.099609375, + "learning_rate": 5.014685098035612e-05, + "loss": 0.4203, + "step": 117300 + }, + { + "epoch": 0.6054555491922959, + "grad_norm": 21572.994140625, + "learning_rate": 5.011032095483448e-05, + "loss": 0.4279, + "step": 117350 + }, + { + "epoch": 0.6057135191749088, + "grad_norm": 21263.11328125, + "learning_rate": 5.007379087042566e-05, + "loss": 0.418, + "step": 117400 + }, + { + "epoch": 0.6059714891575216, + "grad_norm": 22789.671875, + "learning_rate": 5.00372607466287e-05, + "loss": 0.4196, + "step": 117450 + }, + { + "epoch": 0.6062294591401345, + "grad_norm": 21276.09765625, + "learning_rate": 5.000073060294267e-05, + "loss": 0.4125, + "step": 117500 + }, + { + "epoch": 0.6064874291227472, + "grad_norm": 22501.169921875, + "learning_rate": 4.9964200458866654e-05, + "loss": 0.4152, + "step": 117550 + }, + { + "epoch": 0.6067453991053601, + "grad_norm": 21645.912109375, + "learning_rate": 4.992767033389976e-05, + "loss": 0.4253, + "step": 117600 + }, + { + "epoch": 0.6070033690879729, + "grad_norm": 21256.7109375, + "learning_rate": 4.9891140247541025e-05, + "loss": 0.4214, + "step": 117650 + }, + { + "epoch": 0.6072613390705858, + "grad_norm": 22883.98046875, + "learning_rate": 4.985461021928952e-05, + "loss": 0.4238, + "step": 117700 + }, + { + "epoch": 0.6075193090531986, + "grad_norm": 21366.412109375, + "learning_rate": 4.981808026864426e-05, + "loss": 0.4225, + "step": 117750 + }, + { + "epoch": 0.6077772790358114, + "grad_norm": 24185.3515625, + "learning_rate": 4.978155041510425e-05, + "loss": 0.4196, + "step": 117800 + }, + { + "epoch": 0.6080352490184242, + "grad_norm": 21638.009765625, + "learning_rate": 4.974502067816838e-05, + "loss": 0.4221, + "step": 117850 + }, + { + "epoch": 0.608293219001037, + "grad_norm": 20867.111328125, + "learning_rate": 4.970849107733554e-05, + "loss": 0.4225, + "step": 117900 + }, + { + "epoch": 0.6085511889836499, + "grad_norm": 21785.69140625, + "learning_rate": 4.967196163210451e-05, + "loss": 0.4166, + "step": 117950 + }, + { + "epoch": 0.6088091589662626, + "grad_norm": 24691.8515625, + "learning_rate": 4.963543236197401e-05, + "loss": 0.4226, + "step": 118000 + }, + { + "epoch": 0.6090671289488755, + "grad_norm": 21214.1484375, + "learning_rate": 4.9598903286442675e-05, + "loss": 0.418, + "step": 118050 + }, + { + "epoch": 0.6093250989314883, + "grad_norm": 22802.849609375, + "learning_rate": 4.956237442500898e-05, + "loss": 0.4227, + "step": 118100 + }, + { + "epoch": 0.6095830689141012, + "grad_norm": 25204.90625, + "learning_rate": 4.952584579717135e-05, + "loss": 0.4152, + "step": 118150 + }, + { + "epoch": 0.6098410388967139, + "grad_norm": 21970.19140625, + "learning_rate": 4.9489317422428044e-05, + "loss": 0.4197, + "step": 118200 + }, + { + "epoch": 0.6100990088793268, + "grad_norm": 22331.013671875, + "learning_rate": 4.945278932027723e-05, + "loss": 0.4161, + "step": 118250 + }, + { + "epoch": 0.6103569788619396, + "grad_norm": 27234.177734375, + "learning_rate": 4.941626151021686e-05, + "loss": 0.4204, + "step": 118300 + }, + { + "epoch": 0.6106149488445525, + "grad_norm": 22253.0078125, + "learning_rate": 4.937973401174481e-05, + "loss": 0.4202, + "step": 118350 + }, + { + "epoch": 0.6108729188271653, + "grad_norm": 20930.27734375, + "learning_rate": 4.934320684435871e-05, + "loss": 0.4169, + "step": 118400 + }, + { + "epoch": 0.6111308888097781, + "grad_norm": 22569.205078125, + "learning_rate": 4.930668002755609e-05, + "loss": 0.4177, + "step": 118450 + }, + { + "epoch": 0.6113888587923909, + "grad_norm": 23197.943359375, + "learning_rate": 4.9270153580834256e-05, + "loss": 0.414, + "step": 118500 + }, + { + "epoch": 0.6116468287750038, + "grad_norm": 21927.1875, + "learning_rate": 4.923362752369029e-05, + "loss": 0.4203, + "step": 118550 + }, + { + "epoch": 0.6119047987576166, + "grad_norm": 23422.181640625, + "learning_rate": 4.919710187562112e-05, + "loss": 0.4213, + "step": 118600 + }, + { + "epoch": 0.6121627687402293, + "grad_norm": 23351.67578125, + "learning_rate": 4.9160576656123416e-05, + "loss": 0.4213, + "step": 118650 + }, + { + "epoch": 0.6124207387228422, + "grad_norm": 21228.416015625, + "learning_rate": 4.9124051884693664e-05, + "loss": 0.4192, + "step": 118700 + }, + { + "epoch": 0.612678708705455, + "grad_norm": 22555.9609375, + "learning_rate": 4.908752758082802e-05, + "loss": 0.4189, + "step": 118750 + }, + { + "epoch": 0.6129366786880679, + "grad_norm": 21010.859375, + "learning_rate": 4.905100376402251e-05, + "loss": 0.4194, + "step": 118800 + }, + { + "epoch": 0.6131946486706806, + "grad_norm": 23468.78515625, + "learning_rate": 4.901448045377279e-05, + "loss": 0.4151, + "step": 118850 + }, + { + "epoch": 0.6134526186532935, + "grad_norm": 23818.189453125, + "learning_rate": 4.8977957669574334e-05, + "loss": 0.4184, + "step": 118900 + }, + { + "epoch": 0.6137105886359063, + "grad_norm": 22162.76171875, + "learning_rate": 4.8941435430922294e-05, + "loss": 0.4181, + "step": 118950 + }, + { + "epoch": 0.6139685586185192, + "grad_norm": 22983.45703125, + "learning_rate": 4.8904913757311506e-05, + "loss": 0.4196, + "step": 119000 + }, + { + "epoch": 0.614226528601132, + "grad_norm": 22748.150390625, + "learning_rate": 4.886839266823656e-05, + "loss": 0.4195, + "step": 119050 + }, + { + "epoch": 0.6144844985837448, + "grad_norm": 23146.306640625, + "learning_rate": 4.8831872183191684e-05, + "loss": 0.4219, + "step": 119100 + }, + { + "epoch": 0.6147424685663576, + "grad_norm": 24951.591796875, + "learning_rate": 4.879535232167084e-05, + "loss": 0.4165, + "step": 119150 + }, + { + "epoch": 0.6150004385489705, + "grad_norm": 24381.689453125, + "learning_rate": 4.875883310316758e-05, + "loss": 0.4179, + "step": 119200 + }, + { + "epoch": 0.6152584085315833, + "grad_norm": 21191.4609375, + "learning_rate": 4.872231454717518e-05, + "loss": 0.4155, + "step": 119250 + }, + { + "epoch": 0.615516378514196, + "grad_norm": 21586.84375, + "learning_rate": 4.8685796673186526e-05, + "loss": 0.412, + "step": 119300 + }, + { + "epoch": 0.6157743484968089, + "grad_norm": 20381.505859375, + "learning_rate": 4.864927950069416e-05, + "loss": 0.4171, + "step": 119350 + }, + { + "epoch": 0.6160323184794217, + "grad_norm": 23258.296875, + "learning_rate": 4.861276304919026e-05, + "loss": 0.418, + "step": 119400 + }, + { + "epoch": 0.6162902884620346, + "grad_norm": 23629.14453125, + "learning_rate": 4.857624733816657e-05, + "loss": 0.4221, + "step": 119450 + }, + { + "epoch": 0.6165482584446473, + "grad_norm": 22892.7734375, + "learning_rate": 4.853973238711449e-05, + "loss": 0.4278, + "step": 119500 + }, + { + "epoch": 0.6168062284272602, + "grad_norm": 21639.669921875, + "learning_rate": 4.850321821552497e-05, + "loss": 0.4224, + "step": 119550 + }, + { + "epoch": 0.617064198409873, + "grad_norm": 21392.951171875, + "learning_rate": 4.84667048428886e-05, + "loss": 0.4192, + "step": 119600 + }, + { + "epoch": 0.6173221683924859, + "grad_norm": 22603.51953125, + "learning_rate": 4.843019228869548e-05, + "loss": 0.4169, + "step": 119650 + }, + { + "epoch": 0.6175801383750986, + "grad_norm": 22470.62109375, + "learning_rate": 4.8393680572435324e-05, + "loss": 0.4175, + "step": 119700 + }, + { + "epoch": 0.6178381083577115, + "grad_norm": 26185.634765625, + "learning_rate": 4.835716971359737e-05, + "loss": 0.4208, + "step": 119750 + }, + { + "epoch": 0.6180960783403243, + "grad_norm": 21508.12109375, + "learning_rate": 4.832065973167041e-05, + "loss": 0.4194, + "step": 119800 + }, + { + "epoch": 0.6183540483229372, + "grad_norm": 20717.205078125, + "learning_rate": 4.8284150646142784e-05, + "loss": 0.424, + "step": 119850 + }, + { + "epoch": 0.61861201830555, + "grad_norm": 20015.30078125, + "learning_rate": 4.8247642476502284e-05, + "loss": 0.4189, + "step": 119900 + }, + { + "epoch": 0.6188699882881628, + "grad_norm": 21596.349609375, + "learning_rate": 4.821113524223634e-05, + "loss": 0.4218, + "step": 119950 + }, + { + "epoch": 0.6191279582707756, + "grad_norm": 22051.921875, + "learning_rate": 4.817462896283173e-05, + "loss": 0.4184, + "step": 120000 + }, + { + "epoch": 0.6191279582707756, + "eval_loss": 0.40621376037597656, + "eval_runtime": 3588.5932, + "eval_samples_per_second": 864.16, + "eval_steps_per_second": 1.688, + "step": 120000 + }, + { + "epoch": 0.6193859282533885, + "grad_norm": 22562.478515625, + "learning_rate": 4.813812365777486e-05, + "loss": 0.4171, + "step": 120050 + }, + { + "epoch": 0.6196438982360013, + "grad_norm": 22531.505859375, + "learning_rate": 4.81016193465515e-05, + "loss": 0.4171, + "step": 120100 + }, + { + "epoch": 0.619901868218614, + "grad_norm": 21869.177734375, + "learning_rate": 4.8065116048647005e-05, + "loss": 0.4184, + "step": 120150 + }, + { + "epoch": 0.6201598382012269, + "grad_norm": 23087.56640625, + "learning_rate": 4.802861378354607e-05, + "loss": 0.4177, + "step": 120200 + }, + { + "epoch": 0.6204178081838397, + "grad_norm": 22546.060546875, + "learning_rate": 4.7992112570732925e-05, + "loss": 0.4213, + "step": 120250 + }, + { + "epoch": 0.6206757781664526, + "grad_norm": 22802.8984375, + "learning_rate": 4.795561242969122e-05, + "loss": 0.4218, + "step": 120300 + }, + { + "epoch": 0.6209337481490653, + "grad_norm": 19467.32421875, + "learning_rate": 4.791911337990401e-05, + "loss": 0.4141, + "step": 120350 + }, + { + "epoch": 0.6211917181316782, + "grad_norm": 25076.169921875, + "learning_rate": 4.78826154408538e-05, + "loss": 0.4178, + "step": 120400 + }, + { + "epoch": 0.621449688114291, + "grad_norm": 20815.935546875, + "learning_rate": 4.784611863202244e-05, + "loss": 0.4217, + "step": 120450 + }, + { + "epoch": 0.6217076580969039, + "grad_norm": 21686.271484375, + "learning_rate": 4.780962297289126e-05, + "loss": 0.4124, + "step": 120500 + }, + { + "epoch": 0.6219656280795167, + "grad_norm": 22759.310546875, + "learning_rate": 4.777312848294092e-05, + "loss": 0.4159, + "step": 120550 + }, + { + "epoch": 0.6222235980621295, + "grad_norm": 25325.75390625, + "learning_rate": 4.773663518165148e-05, + "loss": 0.4176, + "step": 120600 + }, + { + "epoch": 0.6224815680447423, + "grad_norm": 23474.958984375, + "learning_rate": 4.7700143088502344e-05, + "loss": 0.4143, + "step": 120650 + }, + { + "epoch": 0.6227395380273552, + "grad_norm": 25355.40625, + "learning_rate": 4.766365222297229e-05, + "loss": 0.4262, + "step": 120700 + }, + { + "epoch": 0.622997508009968, + "grad_norm": 22215.14453125, + "learning_rate": 4.762716260453945e-05, + "loss": 0.4149, + "step": 120750 + }, + { + "epoch": 0.6232554779925807, + "grad_norm": 23521.607421875, + "learning_rate": 4.759067425268126e-05, + "loss": 0.4223, + "step": 120800 + }, + { + "epoch": 0.6235134479751936, + "grad_norm": 24524.02734375, + "learning_rate": 4.7554187186874513e-05, + "loss": 0.4256, + "step": 120850 + }, + { + "epoch": 0.6237714179578064, + "grad_norm": 19954.322265625, + "learning_rate": 4.7517701426595266e-05, + "loss": 0.4119, + "step": 120900 + }, + { + "epoch": 0.6240293879404193, + "grad_norm": 21612.1953125, + "learning_rate": 4.748121699131893e-05, + "loss": 0.4196, + "step": 120950 + }, + { + "epoch": 0.624287357923032, + "grad_norm": 20466.0, + "learning_rate": 4.744473390052019e-05, + "loss": 0.4181, + "step": 121000 + }, + { + "epoch": 0.6245453279056449, + "grad_norm": 19992.173828125, + "learning_rate": 4.740825217367304e-05, + "loss": 0.4159, + "step": 121050 + }, + { + "epoch": 0.6248032978882577, + "grad_norm": 21553.1328125, + "learning_rate": 4.737177183025067e-05, + "loss": 0.4157, + "step": 121100 + }, + { + "epoch": 0.6250612678708706, + "grad_norm": 22242.568359375, + "learning_rate": 4.73352928897256e-05, + "loss": 0.4153, + "step": 121150 + }, + { + "epoch": 0.6253192378534834, + "grad_norm": 23883.212890625, + "learning_rate": 4.7298815371569606e-05, + "loss": 0.4173, + "step": 121200 + }, + { + "epoch": 0.6255772078360962, + "grad_norm": 22386.505859375, + "learning_rate": 4.7262339295253645e-05, + "loss": 0.4178, + "step": 121250 + }, + { + "epoch": 0.625835177818709, + "grad_norm": 22051.859375, + "learning_rate": 4.722586468024797e-05, + "loss": 0.4111, + "step": 121300 + }, + { + "epoch": 0.6260931478013219, + "grad_norm": 21374.4765625, + "learning_rate": 4.7189391546021996e-05, + "loss": 0.418, + "step": 121350 + }, + { + "epoch": 0.6263511177839347, + "grad_norm": 22240.453125, + "learning_rate": 4.7152919912044406e-05, + "loss": 0.4196, + "step": 121400 + }, + { + "epoch": 0.6266090877665474, + "grad_norm": 26278.798828125, + "learning_rate": 4.711644979778302e-05, + "loss": 0.4165, + "step": 121450 + }, + { + "epoch": 0.6268670577491603, + "grad_norm": 22151.77734375, + "learning_rate": 4.707998122270492e-05, + "loss": 0.422, + "step": 121500 + }, + { + "epoch": 0.6271250277317731, + "grad_norm": 21278.99609375, + "learning_rate": 4.7043514206276276e-05, + "loss": 0.4202, + "step": 121550 + }, + { + "epoch": 0.627382997714386, + "grad_norm": 24062.6015625, + "learning_rate": 4.70070487679625e-05, + "loss": 0.4174, + "step": 121600 + }, + { + "epoch": 0.6276409676969987, + "grad_norm": 21124.400390625, + "learning_rate": 4.697058492722815e-05, + "loss": 0.4156, + "step": 121650 + }, + { + "epoch": 0.6278989376796116, + "grad_norm": 22513.48046875, + "learning_rate": 4.6934122703536894e-05, + "loss": 0.4198, + "step": 121700 + }, + { + "epoch": 0.6281569076622244, + "grad_norm": 24250.720703125, + "learning_rate": 4.689766211635159e-05, + "loss": 0.4197, + "step": 121750 + }, + { + "epoch": 0.6284148776448373, + "grad_norm": 23831.220703125, + "learning_rate": 4.686120318513415e-05, + "loss": 0.415, + "step": 121800 + }, + { + "epoch": 0.62867284762745, + "grad_norm": 24005.458984375, + "learning_rate": 4.682474592934569e-05, + "loss": 0.4154, + "step": 121850 + }, + { + "epoch": 0.6289308176100629, + "grad_norm": 21365.09375, + "learning_rate": 4.6788290368446355e-05, + "loss": 0.4164, + "step": 121900 + }, + { + "epoch": 0.6291887875926757, + "grad_norm": 23601.689453125, + "learning_rate": 4.675183652189545e-05, + "loss": 0.418, + "step": 121950 + }, + { + "epoch": 0.6294467575752886, + "grad_norm": 21023.33203125, + "learning_rate": 4.671538440915129e-05, + "loss": 0.4181, + "step": 122000 + }, + { + "epoch": 0.6297047275579014, + "grad_norm": 22292.671875, + "learning_rate": 4.667893404967133e-05, + "loss": 0.4203, + "step": 122050 + }, + { + "epoch": 0.6299626975405141, + "grad_norm": 21975.3671875, + "learning_rate": 4.664248546291207e-05, + "loss": 0.4162, + "step": 122100 + }, + { + "epoch": 0.630220667523127, + "grad_norm": 22591.34765625, + "learning_rate": 4.660603866832906e-05, + "loss": 0.4146, + "step": 122150 + }, + { + "epoch": 0.6304786375057398, + "grad_norm": 23449.529296875, + "learning_rate": 4.6569593685376895e-05, + "loss": 0.4205, + "step": 122200 + }, + { + "epoch": 0.6307366074883527, + "grad_norm": 21614.046875, + "learning_rate": 4.653315053350918e-05, + "loss": 0.4173, + "step": 122250 + }, + { + "epoch": 0.6309945774709654, + "grad_norm": 26004.5859375, + "learning_rate": 4.649670923217859e-05, + "loss": 0.4137, + "step": 122300 + }, + { + "epoch": 0.6312525474535783, + "grad_norm": 23640.357421875, + "learning_rate": 4.646026980083676e-05, + "loss": 0.4165, + "step": 122350 + }, + { + "epoch": 0.6315105174361911, + "grad_norm": 23575.3984375, + "learning_rate": 4.6423832258934396e-05, + "loss": 0.4179, + "step": 122400 + }, + { + "epoch": 0.631768487418804, + "grad_norm": 26795.05078125, + "learning_rate": 4.63873966259211e-05, + "loss": 0.4171, + "step": 122450 + }, + { + "epoch": 0.6320264574014167, + "grad_norm": 22246.931640625, + "learning_rate": 4.6350962921245515e-05, + "loss": 0.4188, + "step": 122500 + }, + { + "epoch": 0.6322844273840296, + "grad_norm": 22268.3359375, + "learning_rate": 4.63145311643553e-05, + "loss": 0.4141, + "step": 122550 + }, + { + "epoch": 0.6325423973666424, + "grad_norm": 23749.38671875, + "learning_rate": 4.627810137469696e-05, + "loss": 0.4132, + "step": 122600 + }, + { + "epoch": 0.6328003673492553, + "grad_norm": 22449.15625, + "learning_rate": 4.624167357171606e-05, + "loss": 0.4177, + "step": 122650 + }, + { + "epoch": 0.6330583373318681, + "grad_norm": 22132.927734375, + "learning_rate": 4.6205247774857e-05, + "loss": 0.4211, + "step": 122700 + }, + { + "epoch": 0.6333163073144809, + "grad_norm": 20199.654296875, + "learning_rate": 4.616882400356323e-05, + "loss": 0.4127, + "step": 122750 + }, + { + "epoch": 0.6335742772970937, + "grad_norm": 23172.29296875, + "learning_rate": 4.613240227727699e-05, + "loss": 0.4173, + "step": 122800 + }, + { + "epoch": 0.6338322472797066, + "grad_norm": 23373.6640625, + "learning_rate": 4.609598261543954e-05, + "loss": 0.4139, + "step": 122850 + }, + { + "epoch": 0.6340902172623194, + "grad_norm": 22187.794921875, + "learning_rate": 4.6059565037490965e-05, + "loss": 0.4233, + "step": 122900 + }, + { + "epoch": 0.6343481872449321, + "grad_norm": 21762.28515625, + "learning_rate": 4.602314956287027e-05, + "loss": 0.4195, + "step": 122950 + }, + { + "epoch": 0.634606157227545, + "grad_norm": 24228.3125, + "learning_rate": 4.598673621101535e-05, + "loss": 0.4218, + "step": 123000 + }, + { + "epoch": 0.6348641272101578, + "grad_norm": 20360.208984375, + "learning_rate": 4.595032500136291e-05, + "loss": 0.4266, + "step": 123050 + }, + { + "epoch": 0.6351220971927707, + "grad_norm": 22763.712890625, + "learning_rate": 4.5913915953348574e-05, + "loss": 0.4153, + "step": 123100 + }, + { + "epoch": 0.6353800671753834, + "grad_norm": 25601.05078125, + "learning_rate": 4.5877509086406766e-05, + "loss": 0.4201, + "step": 123150 + }, + { + "epoch": 0.6356380371579963, + "grad_norm": 22695.91015625, + "learning_rate": 4.584110441997081e-05, + "loss": 0.4174, + "step": 123200 + }, + { + "epoch": 0.6358960071406091, + "grad_norm": 24915.857421875, + "learning_rate": 4.5804701973472755e-05, + "loss": 0.416, + "step": 123250 + }, + { + "epoch": 0.636153977123222, + "grad_norm": 24066.427734375, + "learning_rate": 4.576830176634356e-05, + "loss": 0.415, + "step": 123300 + }, + { + "epoch": 0.6364119471058348, + "grad_norm": 25726.71484375, + "learning_rate": 4.573190381801293e-05, + "loss": 0.4204, + "step": 123350 + }, + { + "epoch": 0.6366699170884476, + "grad_norm": 24271.998046875, + "learning_rate": 4.56955081479094e-05, + "loss": 0.4166, + "step": 123400 + }, + { + "epoch": 0.6369278870710604, + "grad_norm": 20897.818359375, + "learning_rate": 4.5659114775460286e-05, + "loss": 0.4156, + "step": 123450 + }, + { + "epoch": 0.6371858570536733, + "grad_norm": 24409.841796875, + "learning_rate": 4.562272372009163e-05, + "loss": 0.4208, + "step": 123500 + }, + { + "epoch": 0.6374438270362861, + "grad_norm": 24757.927734375, + "learning_rate": 4.5586335001228296e-05, + "loss": 0.4167, + "step": 123550 + }, + { + "epoch": 0.6377017970188988, + "grad_norm": 22433.091796875, + "learning_rate": 4.554994863829387e-05, + "loss": 0.4206, + "step": 123600 + }, + { + "epoch": 0.6379597670015117, + "grad_norm": 22757.798828125, + "learning_rate": 4.5513564650710706e-05, + "loss": 0.4113, + "step": 123650 + }, + { + "epoch": 0.6382177369841245, + "grad_norm": 22652.9140625, + "learning_rate": 4.547718305789984e-05, + "loss": 0.4224, + "step": 123700 + }, + { + "epoch": 0.6384757069667374, + "grad_norm": 25416.0390625, + "learning_rate": 4.5440803879281086e-05, + "loss": 0.4129, + "step": 123750 + }, + { + "epoch": 0.6387336769493501, + "grad_norm": 22621.40625, + "learning_rate": 4.5404427134272926e-05, + "loss": 0.4204, + "step": 123800 + }, + { + "epoch": 0.638991646931963, + "grad_norm": 24213.93359375, + "learning_rate": 4.536805284229258e-05, + "loss": 0.4109, + "step": 123850 + }, + { + "epoch": 0.6392496169145758, + "grad_norm": 20231.091796875, + "learning_rate": 4.5331681022755946e-05, + "loss": 0.4221, + "step": 123900 + }, + { + "epoch": 0.6395075868971887, + "grad_norm": 22513.21875, + "learning_rate": 4.529531169507757e-05, + "loss": 0.4189, + "step": 123950 + }, + { + "epoch": 0.6397655568798014, + "grad_norm": 19454.783203125, + "learning_rate": 4.5258944878670714e-05, + "loss": 0.4138, + "step": 124000 + }, + { + "epoch": 0.6400235268624143, + "grad_norm": 23547.423828125, + "learning_rate": 4.522258059294727e-05, + "loss": 0.4206, + "step": 124050 + }, + { + "epoch": 0.6402814968450271, + "grad_norm": 23985.0703125, + "learning_rate": 4.5186218857317825e-05, + "loss": 0.4186, + "step": 124100 + }, + { + "epoch": 0.64053946682764, + "grad_norm": 22254.078125, + "learning_rate": 4.5149859691191517e-05, + "loss": 0.4076, + "step": 124150 + }, + { + "epoch": 0.6407974368102528, + "grad_norm": 24060.70703125, + "learning_rate": 4.5113503113976194e-05, + "loss": 0.4207, + "step": 124200 + }, + { + "epoch": 0.6410554067928655, + "grad_norm": 21521.923828125, + "learning_rate": 4.5077149145078275e-05, + "loss": 0.4134, + "step": 124250 + }, + { + "epoch": 0.6413133767754784, + "grad_norm": 22107.48828125, + "learning_rate": 4.504079780390282e-05, + "loss": 0.4095, + "step": 124300 + }, + { + "epoch": 0.6415713467580912, + "grad_norm": 22610.880859375, + "learning_rate": 4.5004449109853485e-05, + "loss": 0.4216, + "step": 124350 + }, + { + "epoch": 0.6418293167407041, + "grad_norm": 22752.83984375, + "learning_rate": 4.496810308233247e-05, + "loss": 0.4225, + "step": 124400 + }, + { + "epoch": 0.6420872867233168, + "grad_norm": 22029.88671875, + "learning_rate": 4.4931759740740596e-05, + "loss": 0.4138, + "step": 124450 + }, + { + "epoch": 0.6423452567059297, + "grad_norm": 24989.2421875, + "learning_rate": 4.489541910447722e-05, + "loss": 0.4166, + "step": 124500 + }, + { + "epoch": 0.6426032266885425, + "grad_norm": 25843.16796875, + "learning_rate": 4.485908119294031e-05, + "loss": 0.4132, + "step": 124550 + }, + { + "epoch": 0.6428611966711554, + "grad_norm": 23847.01171875, + "learning_rate": 4.4822746025526286e-05, + "loss": 0.4256, + "step": 124600 + }, + { + "epoch": 0.6431191666537681, + "grad_norm": 21634.71484375, + "learning_rate": 4.478641362163019e-05, + "loss": 0.4182, + "step": 124650 + }, + { + "epoch": 0.643377136636381, + "grad_norm": 22252.021484375, + "learning_rate": 4.475008400064554e-05, + "loss": 0.419, + "step": 124700 + }, + { + "epoch": 0.6436351066189938, + "grad_norm": 24151.951171875, + "learning_rate": 4.471375718196439e-05, + "loss": 0.4201, + "step": 124750 + }, + { + "epoch": 0.6438930766016067, + "grad_norm": 23570.310546875, + "learning_rate": 4.4677433184977315e-05, + "loss": 0.4131, + "step": 124800 + }, + { + "epoch": 0.6441510465842195, + "grad_norm": 23886.896484375, + "learning_rate": 4.464111202907332e-05, + "loss": 0.4172, + "step": 124850 + }, + { + "epoch": 0.6444090165668322, + "grad_norm": 23476.888671875, + "learning_rate": 4.4604793733639973e-05, + "loss": 0.419, + "step": 124900 + }, + { + "epoch": 0.6446669865494451, + "grad_norm": 22735.759765625, + "learning_rate": 4.456847831806324e-05, + "loss": 0.4214, + "step": 124950 + }, + { + "epoch": 0.644924956532058, + "grad_norm": 25508.525390625, + "learning_rate": 4.4532165801727626e-05, + "loss": 0.4184, + "step": 125000 + }, + { + "epoch": 0.644924956532058, + "eval_loss": 0.40382638573646545, + "eval_runtime": 3215.6548, + "eval_samples_per_second": 964.382, + "eval_steps_per_second": 1.884, + "step": 125000 + }, + { + "epoch": 0.6451829265146708, + "grad_norm": 23686.8671875, + "learning_rate": 4.449585620401601e-05, + "loss": 0.4115, + "step": 125050 + }, + { + "epoch": 0.6454408964972835, + "grad_norm": 22472.7421875, + "learning_rate": 4.445954954430976e-05, + "loss": 0.4187, + "step": 125100 + }, + { + "epoch": 0.6456988664798964, + "grad_norm": 25044.5859375, + "learning_rate": 4.442324584198871e-05, + "loss": 0.4188, + "step": 125150 + }, + { + "epoch": 0.6459568364625092, + "grad_norm": 23489.119140625, + "learning_rate": 4.4386945116431025e-05, + "loss": 0.4212, + "step": 125200 + }, + { + "epoch": 0.6462148064451221, + "grad_norm": 23150.12109375, + "learning_rate": 4.435064738701335e-05, + "loss": 0.4155, + "step": 125250 + }, + { + "epoch": 0.6464727764277348, + "grad_norm": 22082.09765625, + "learning_rate": 4.4314352673110696e-05, + "loss": 0.4208, + "step": 125300 + }, + { + "epoch": 0.6467307464103477, + "grad_norm": 23107.71484375, + "learning_rate": 4.427806099409652e-05, + "loss": 0.4172, + "step": 125350 + }, + { + "epoch": 0.6469887163929605, + "grad_norm": 23660.607421875, + "learning_rate": 4.4241772369342554e-05, + "loss": 0.4156, + "step": 125400 + }, + { + "epoch": 0.6472466863755734, + "grad_norm": 22054.47265625, + "learning_rate": 4.420548681821901e-05, + "loss": 0.4174, + "step": 125450 + }, + { + "epoch": 0.6475046563581862, + "grad_norm": 22386.654296875, + "learning_rate": 4.416920436009439e-05, + "loss": 0.4164, + "step": 125500 + }, + { + "epoch": 0.647762626340799, + "grad_norm": 22394.78125, + "learning_rate": 4.413292501433557e-05, + "loss": 0.4128, + "step": 125550 + }, + { + "epoch": 0.6480205963234118, + "grad_norm": 21871.1953125, + "learning_rate": 4.4096648800307796e-05, + "loss": 0.4174, + "step": 125600 + }, + { + "epoch": 0.6482785663060247, + "grad_norm": 21630.826171875, + "learning_rate": 4.406037573737456e-05, + "loss": 0.4146, + "step": 125650 + }, + { + "epoch": 0.6485365362886375, + "grad_norm": 20917.244140625, + "learning_rate": 4.4024105844897744e-05, + "loss": 0.4172, + "step": 125700 + }, + { + "epoch": 0.6487945062712502, + "grad_norm": 21545.53515625, + "learning_rate": 4.3987839142237505e-05, + "loss": 0.4189, + "step": 125750 + }, + { + "epoch": 0.6490524762538631, + "grad_norm": 27708.19140625, + "learning_rate": 4.395157564875234e-05, + "loss": 0.4127, + "step": 125800 + }, + { + "epoch": 0.6493104462364759, + "grad_norm": 23791.052734375, + "learning_rate": 4.391531538379895e-05, + "loss": 0.4146, + "step": 125850 + }, + { + "epoch": 0.6495684162190888, + "grad_norm": 23441.0078125, + "learning_rate": 4.387905836673239e-05, + "loss": 0.4191, + "step": 125900 + }, + { + "epoch": 0.6498263862017015, + "grad_norm": 21998.982421875, + "learning_rate": 4.3842804616905944e-05, + "loss": 0.4165, + "step": 125950 + }, + { + "epoch": 0.6500843561843144, + "grad_norm": 26170.572265625, + "learning_rate": 4.380655415367116e-05, + "loss": 0.4106, + "step": 126000 + }, + { + "epoch": 0.6503423261669272, + "grad_norm": 23915.345703125, + "learning_rate": 4.3770306996377866e-05, + "loss": 0.417, + "step": 126050 + }, + { + "epoch": 0.6506002961495401, + "grad_norm": 22807.23828125, + "learning_rate": 4.373406316437404e-05, + "loss": 0.4138, + "step": 126100 + }, + { + "epoch": 0.6508582661321528, + "grad_norm": 22825.060546875, + "learning_rate": 4.369782267700598e-05, + "loss": 0.4159, + "step": 126150 + }, + { + "epoch": 0.6511162361147657, + "grad_norm": 21670.83984375, + "learning_rate": 4.366158555361812e-05, + "loss": 0.4131, + "step": 126200 + }, + { + "epoch": 0.6513742060973785, + "grad_norm": 24840.630859375, + "learning_rate": 4.362535181355319e-05, + "loss": 0.4072, + "step": 126250 + }, + { + "epoch": 0.6516321760799914, + "grad_norm": 24121.158203125, + "learning_rate": 4.358912147615199e-05, + "loss": 0.4085, + "step": 126300 + }, + { + "epoch": 0.6518901460626042, + "grad_norm": 21738.236328125, + "learning_rate": 4.355289456075363e-05, + "loss": 0.4154, + "step": 126350 + }, + { + "epoch": 0.6521481160452169, + "grad_norm": 24880.833984375, + "learning_rate": 4.3516671086695296e-05, + "loss": 0.4154, + "step": 126400 + }, + { + "epoch": 0.6524060860278298, + "grad_norm": 21572.140625, + "learning_rate": 4.348045107331239e-05, + "loss": 0.4185, + "step": 126450 + }, + { + "epoch": 0.6526640560104426, + "grad_norm": 24076.17578125, + "learning_rate": 4.344423453993849e-05, + "loss": 0.4132, + "step": 126500 + }, + { + "epoch": 0.6529220259930555, + "grad_norm": 23531.365234375, + "learning_rate": 4.340802150590522e-05, + "loss": 0.4179, + "step": 126550 + }, + { + "epoch": 0.6531799959756682, + "grad_norm": 24287.568359375, + "learning_rate": 4.337181199054243e-05, + "loss": 0.4136, + "step": 126600 + }, + { + "epoch": 0.6534379659582811, + "grad_norm": 23352.52734375, + "learning_rate": 4.3335606013178046e-05, + "loss": 0.4177, + "step": 126650 + }, + { + "epoch": 0.6536959359408939, + "grad_norm": 22291.494140625, + "learning_rate": 4.3299403593138144e-05, + "loss": 0.4155, + "step": 126700 + }, + { + "epoch": 0.6539539059235068, + "grad_norm": 20745.798828125, + "learning_rate": 4.3263204749746836e-05, + "loss": 0.4139, + "step": 126750 + }, + { + "epoch": 0.6542118759061195, + "grad_norm": 24670.357421875, + "learning_rate": 4.322700950232639e-05, + "loss": 0.423, + "step": 126800 + }, + { + "epoch": 0.6544698458887324, + "grad_norm": 23067.81640625, + "learning_rate": 4.31908178701971e-05, + "loss": 0.4174, + "step": 126850 + }, + { + "epoch": 0.6547278158713452, + "grad_norm": 25275.47265625, + "learning_rate": 4.315462987267739e-05, + "loss": 0.4181, + "step": 126900 + }, + { + "epoch": 0.6549857858539581, + "grad_norm": 21032.4375, + "learning_rate": 4.311844552908372e-05, + "loss": 0.4111, + "step": 126950 + }, + { + "epoch": 0.6552437558365709, + "grad_norm": 21629.0625, + "learning_rate": 4.308226485873056e-05, + "loss": 0.4129, + "step": 127000 + }, + { + "epoch": 0.6555017258191836, + "grad_norm": 24375.935546875, + "learning_rate": 4.3046087880930466e-05, + "loss": 0.4129, + "step": 127050 + }, + { + "epoch": 0.6557596958017965, + "grad_norm": 21224.63671875, + "learning_rate": 4.3009914614994e-05, + "loss": 0.4156, + "step": 127100 + }, + { + "epoch": 0.6560176657844093, + "grad_norm": 24836.560546875, + "learning_rate": 4.297374508022977e-05, + "loss": 0.4133, + "step": 127150 + }, + { + "epoch": 0.6562756357670222, + "grad_norm": 22769.599609375, + "learning_rate": 4.293757929594435e-05, + "loss": 0.4151, + "step": 127200 + }, + { + "epoch": 0.6565336057496349, + "grad_norm": 22936.603515625, + "learning_rate": 4.2901417281442345e-05, + "loss": 0.4173, + "step": 127250 + }, + { + "epoch": 0.6567915757322478, + "grad_norm": 21296.39453125, + "learning_rate": 4.286525905602634e-05, + "loss": 0.4121, + "step": 127300 + }, + { + "epoch": 0.6570495457148606, + "grad_norm": 24282.591796875, + "learning_rate": 4.282910463899689e-05, + "loss": 0.4086, + "step": 127350 + }, + { + "epoch": 0.6573075156974735, + "grad_norm": 22443.6015625, + "learning_rate": 4.2792954049652545e-05, + "loss": 0.4183, + "step": 127400 + }, + { + "epoch": 0.6575654856800862, + "grad_norm": 21437.98046875, + "learning_rate": 4.275680730728976e-05, + "loss": 0.4172, + "step": 127450 + }, + { + "epoch": 0.6578234556626991, + "grad_norm": 24970.3125, + "learning_rate": 4.2720664431202987e-05, + "loss": 0.4187, + "step": 127500 + }, + { + "epoch": 0.6580814256453119, + "grad_norm": 21128.349609375, + "learning_rate": 4.268452544068457e-05, + "loss": 0.4142, + "step": 127550 + }, + { + "epoch": 0.6583393956279248, + "grad_norm": 26429.14453125, + "learning_rate": 4.2648390355024836e-05, + "loss": 0.4115, + "step": 127600 + }, + { + "epoch": 0.6585973656105376, + "grad_norm": 22542.380859375, + "learning_rate": 4.261225919351195e-05, + "loss": 0.4144, + "step": 127650 + }, + { + "epoch": 0.6588553355931503, + "grad_norm": 23179.853515625, + "learning_rate": 4.257613197543207e-05, + "loss": 0.4164, + "step": 127700 + }, + { + "epoch": 0.6591133055757632, + "grad_norm": 24641.048828125, + "learning_rate": 4.254000872006918e-05, + "loss": 0.4175, + "step": 127750 + }, + { + "epoch": 0.659371275558376, + "grad_norm": 23836.771484375, + "learning_rate": 4.250388944670517e-05, + "loss": 0.4201, + "step": 127800 + }, + { + "epoch": 0.6596292455409889, + "grad_norm": 23714.7578125, + "learning_rate": 4.2467774174619836e-05, + "loss": 0.4102, + "step": 127850 + }, + { + "epoch": 0.6598872155236016, + "grad_norm": 23630.2890625, + "learning_rate": 4.2431662923090785e-05, + "loss": 0.411, + "step": 127900 + }, + { + "epoch": 0.6601451855062145, + "grad_norm": 23018.384765625, + "learning_rate": 4.239555571139353e-05, + "loss": 0.4113, + "step": 127950 + }, + { + "epoch": 0.6604031554888273, + "grad_norm": 23594.041015625, + "learning_rate": 4.235945255880137e-05, + "loss": 0.4153, + "step": 128000 + }, + { + "epoch": 0.6606611254714402, + "grad_norm": 24231.07421875, + "learning_rate": 4.232335348458549e-05, + "loss": 0.4159, + "step": 128050 + }, + { + "epoch": 0.6609190954540529, + "grad_norm": 22362.98828125, + "learning_rate": 4.228725850801486e-05, + "loss": 0.4218, + "step": 128100 + }, + { + "epoch": 0.6611770654366658, + "grad_norm": 23008.44140625, + "learning_rate": 4.225116764835631e-05, + "loss": 0.416, + "step": 128150 + }, + { + "epoch": 0.6614350354192786, + "grad_norm": 23027.1875, + "learning_rate": 4.221508092487441e-05, + "loss": 0.4163, + "step": 128200 + }, + { + "epoch": 0.6616930054018915, + "grad_norm": 25121.61328125, + "learning_rate": 4.2178998356831553e-05, + "loss": 0.4167, + "step": 128250 + }, + { + "epoch": 0.6619509753845043, + "grad_norm": 24767.4140625, + "learning_rate": 4.214291996348794e-05, + "loss": 0.4176, + "step": 128300 + }, + { + "epoch": 0.662208945367117, + "grad_norm": 24596.533203125, + "learning_rate": 4.210684576410151e-05, + "loss": 0.4183, + "step": 128350 + }, + { + "epoch": 0.6624669153497299, + "grad_norm": 21095.8671875, + "learning_rate": 4.2070775777927976e-05, + "loss": 0.4151, + "step": 128400 + }, + { + "epoch": 0.6627248853323428, + "grad_norm": 25389.1640625, + "learning_rate": 4.203471002422077e-05, + "loss": 0.4226, + "step": 128450 + }, + { + "epoch": 0.6629828553149556, + "grad_norm": 24613.94921875, + "learning_rate": 4.199864852223113e-05, + "loss": 0.4093, + "step": 128500 + }, + { + "epoch": 0.6632408252975683, + "grad_norm": 23665.59765625, + "learning_rate": 4.196259129120796e-05, + "loss": 0.4135, + "step": 128550 + }, + { + "epoch": 0.6634987952801812, + "grad_norm": 22946.5234375, + "learning_rate": 4.192653835039795e-05, + "loss": 0.4151, + "step": 128600 + }, + { + "epoch": 0.663756765262794, + "grad_norm": 22438.23046875, + "learning_rate": 4.189048971904541e-05, + "loss": 0.4064, + "step": 128650 + }, + { + "epoch": 0.6640147352454069, + "grad_norm": 22760.623046875, + "learning_rate": 4.185444541639243e-05, + "loss": 0.4084, + "step": 128700 + }, + { + "epoch": 0.6642727052280196, + "grad_norm": 25223.484375, + "learning_rate": 4.1818405461678763e-05, + "loss": 0.4151, + "step": 128750 + }, + { + "epoch": 0.6645306752106325, + "grad_norm": 31547.962890625, + "learning_rate": 4.178236987414182e-05, + "loss": 0.4115, + "step": 128800 + }, + { + "epoch": 0.6647886451932453, + "grad_norm": 19114.953125, + "learning_rate": 4.174633867301674e-05, + "loss": 0.4109, + "step": 128850 + }, + { + "epoch": 0.6650466151758582, + "grad_norm": 22819.888671875, + "learning_rate": 4.1710311877536226e-05, + "loss": 0.4123, + "step": 128900 + }, + { + "epoch": 0.6653045851584709, + "grad_norm": 22868.62890625, + "learning_rate": 4.167428950693073e-05, + "loss": 0.413, + "step": 128950 + }, + { + "epoch": 0.6655625551410838, + "grad_norm": 23062.359375, + "learning_rate": 4.163827158042826e-05, + "loss": 0.4152, + "step": 129000 + }, + { + "epoch": 0.6658205251236966, + "grad_norm": 25990.505859375, + "learning_rate": 4.160225811725453e-05, + "loss": 0.4176, + "step": 129050 + }, + { + "epoch": 0.6660784951063095, + "grad_norm": 21594.1953125, + "learning_rate": 4.156624913663279e-05, + "loss": 0.4136, + "step": 129100 + }, + { + "epoch": 0.6663364650889223, + "grad_norm": 21145.869140625, + "learning_rate": 4.153024465778393e-05, + "loss": 0.4216, + "step": 129150 + }, + { + "epoch": 0.666594435071535, + "grad_norm": 22634.7734375, + "learning_rate": 4.149424469992649e-05, + "loss": 0.4114, + "step": 129200 + }, + { + "epoch": 0.6668524050541479, + "grad_norm": 23526.46875, + "learning_rate": 4.145824928227652e-05, + "loss": 0.4217, + "step": 129250 + }, + { + "epoch": 0.6671103750367607, + "grad_norm": 22295.880859375, + "learning_rate": 4.142225842404769e-05, + "loss": 0.4169, + "step": 129300 + }, + { + "epoch": 0.6673683450193736, + "grad_norm": 22282.421875, + "learning_rate": 4.13862721444512e-05, + "loss": 0.4195, + "step": 129350 + }, + { + "epoch": 0.6676263150019863, + "grad_norm": 21856.337890625, + "learning_rate": 4.135029046269585e-05, + "loss": 0.4229, + "step": 129400 + }, + { + "epoch": 0.6678842849845992, + "grad_norm": 20999.04296875, + "learning_rate": 4.131431339798796e-05, + "loss": 0.4168, + "step": 129450 + }, + { + "epoch": 0.668142254967212, + "grad_norm": 24684.484375, + "learning_rate": 4.12783409695314e-05, + "loss": 0.4117, + "step": 129500 + }, + { + "epoch": 0.6684002249498249, + "grad_norm": 24120.349609375, + "learning_rate": 4.124237319652753e-05, + "loss": 0.4186, + "step": 129550 + }, + { + "epoch": 0.6686581949324376, + "grad_norm": 23283.736328125, + "learning_rate": 4.1206410098175265e-05, + "loss": 0.4176, + "step": 129600 + }, + { + "epoch": 0.6689161649150505, + "grad_norm": 21902.6875, + "learning_rate": 4.117045169367102e-05, + "loss": 0.4153, + "step": 129650 + }, + { + "epoch": 0.6691741348976633, + "grad_norm": 22762.6015625, + "learning_rate": 4.1134498002208674e-05, + "loss": 0.414, + "step": 129700 + }, + { + "epoch": 0.6694321048802762, + "grad_norm": 20947.083984375, + "learning_rate": 4.109854904297965e-05, + "loss": 0.4113, + "step": 129750 + }, + { + "epoch": 0.669690074862889, + "grad_norm": 24687.189453125, + "learning_rate": 4.106260483517276e-05, + "loss": 0.4207, + "step": 129800 + }, + { + "epoch": 0.6699480448455017, + "grad_norm": 24164.724609375, + "learning_rate": 4.102666539797435e-05, + "loss": 0.4116, + "step": 129850 + }, + { + "epoch": 0.6702060148281146, + "grad_norm": 23408.68359375, + "learning_rate": 4.099073075056818e-05, + "loss": 0.4181, + "step": 129900 + }, + { + "epoch": 0.6704639848107274, + "grad_norm": 22822.3515625, + "learning_rate": 4.0954800912135516e-05, + "loss": 0.4176, + "step": 129950 + }, + { + "epoch": 0.6707219547933403, + "grad_norm": 21576.173828125, + "learning_rate": 4.091887590185494e-05, + "loss": 0.4165, + "step": 130000 + }, + { + "epoch": 0.6707219547933403, + "eval_loss": 0.40186887979507446, + "eval_runtime": 3150.7117, + "eval_samples_per_second": 984.26, + "eval_steps_per_second": 1.922, + "step": 130000 + }, + { + "epoch": 0.670979924775953, + "grad_norm": 21987.3671875, + "learning_rate": 4.0882955738902576e-05, + "loss": 0.4176, + "step": 130050 + }, + { + "epoch": 0.6712378947585659, + "grad_norm": 23900.74609375, + "learning_rate": 4.0847040442451895e-05, + "loss": 0.4183, + "step": 130100 + }, + { + "epoch": 0.6714958647411787, + "grad_norm": 22624.236328125, + "learning_rate": 4.081113003167378e-05, + "loss": 0.4146, + "step": 130150 + }, + { + "epoch": 0.6717538347237916, + "grad_norm": 22636.490234375, + "learning_rate": 4.0775224525736546e-05, + "loss": 0.4107, + "step": 130200 + }, + { + "epoch": 0.6720118047064043, + "grad_norm": 22667.66796875, + "learning_rate": 4.07393239438058e-05, + "loss": 0.4151, + "step": 130250 + }, + { + "epoch": 0.6722697746890172, + "grad_norm": 20381.720703125, + "learning_rate": 4.070342830504465e-05, + "loss": 0.4167, + "step": 130300 + }, + { + "epoch": 0.67252774467163, + "grad_norm": 22913.248046875, + "learning_rate": 4.0667537628613424e-05, + "loss": 0.4116, + "step": 130350 + }, + { + "epoch": 0.6727857146542429, + "grad_norm": 23168.865234375, + "learning_rate": 4.063165193366992e-05, + "loss": 0.413, + "step": 130400 + }, + { + "epoch": 0.6730436846368557, + "grad_norm": 21597.861328125, + "learning_rate": 4.059577123936918e-05, + "loss": 0.4179, + "step": 130450 + }, + { + "epoch": 0.6733016546194684, + "grad_norm": 20305.806640625, + "learning_rate": 4.055989556486365e-05, + "loss": 0.4199, + "step": 130500 + }, + { + "epoch": 0.6735596246020813, + "grad_norm": 23520.173828125, + "learning_rate": 4.052402492930311e-05, + "loss": 0.4154, + "step": 130550 + }, + { + "epoch": 0.6738175945846941, + "grad_norm": 23356.85546875, + "learning_rate": 4.048815935183453e-05, + "loss": 0.4154, + "step": 130600 + }, + { + "epoch": 0.674075564567307, + "grad_norm": 22958.611328125, + "learning_rate": 4.0452298851602324e-05, + "loss": 0.4149, + "step": 130650 + }, + { + "epoch": 0.6743335345499197, + "grad_norm": 24888.25390625, + "learning_rate": 4.04164434477481e-05, + "loss": 0.4166, + "step": 130700 + }, + { + "epoch": 0.6745915045325326, + "grad_norm": 22958.189453125, + "learning_rate": 4.0380593159410806e-05, + "loss": 0.4159, + "step": 130750 + }, + { + "epoch": 0.6748494745151454, + "grad_norm": 21863.55859375, + "learning_rate": 4.03447480057266e-05, + "loss": 0.4142, + "step": 130800 + }, + { + "epoch": 0.6751074444977583, + "grad_norm": 23096.375, + "learning_rate": 4.030890800582895e-05, + "loss": 0.4108, + "step": 130850 + }, + { + "epoch": 0.675365414480371, + "grad_norm": 23506.576171875, + "learning_rate": 4.027307317884854e-05, + "loss": 0.4111, + "step": 130900 + }, + { + "epoch": 0.6756233844629839, + "grad_norm": 26913.11328125, + "learning_rate": 4.023724354391331e-05, + "loss": 0.4145, + "step": 130950 + }, + { + "epoch": 0.6758813544455967, + "grad_norm": 22008.958984375, + "learning_rate": 4.020141912014846e-05, + "loss": 0.4118, + "step": 131000 + }, + { + "epoch": 0.6761393244282096, + "grad_norm": 21431.857421875, + "learning_rate": 4.016559992667632e-05, + "loss": 0.417, + "step": 131050 + }, + { + "epoch": 0.6763972944108223, + "grad_norm": 24077.453125, + "learning_rate": 4.0129785982616524e-05, + "loss": 0.4121, + "step": 131100 + }, + { + "epoch": 0.6766552643934352, + "grad_norm": 22978.5390625, + "learning_rate": 4.009397730708583e-05, + "loss": 0.4074, + "step": 131150 + }, + { + "epoch": 0.676913234376048, + "grad_norm": 25474.740234375, + "learning_rate": 4.005817391919826e-05, + "loss": 0.4159, + "step": 131200 + }, + { + "epoch": 0.6771712043586608, + "grad_norm": 23532.416015625, + "learning_rate": 4.0022375838064904e-05, + "loss": 0.4202, + "step": 131250 + }, + { + "epoch": 0.6774291743412737, + "grad_norm": 23746.072265625, + "learning_rate": 3.998658308279414e-05, + "loss": 0.4157, + "step": 131300 + }, + { + "epoch": 0.6776871443238864, + "grad_norm": 21691.6875, + "learning_rate": 3.995079567249142e-05, + "loss": 0.4158, + "step": 131350 + }, + { + "epoch": 0.6779451143064993, + "grad_norm": 24167.923828125, + "learning_rate": 3.991501362625937e-05, + "loss": 0.4165, + "step": 131400 + }, + { + "epoch": 0.6782030842891121, + "grad_norm": 22420.27734375, + "learning_rate": 3.9879236963197784e-05, + "loss": 0.418, + "step": 131450 + }, + { + "epoch": 0.678461054271725, + "grad_norm": 22116.75, + "learning_rate": 3.984346570240352e-05, + "loss": 0.4152, + "step": 131500 + }, + { + "epoch": 0.6787190242543377, + "grad_norm": 23841.001953125, + "learning_rate": 3.9807699862970596e-05, + "loss": 0.4179, + "step": 131550 + }, + { + "epoch": 0.6789769942369506, + "grad_norm": 22931.126953125, + "learning_rate": 3.977193946399011e-05, + "loss": 0.4171, + "step": 131600 + }, + { + "epoch": 0.6792349642195634, + "grad_norm": 24939.294921875, + "learning_rate": 3.973618452455031e-05, + "loss": 0.4147, + "step": 131650 + }, + { + "epoch": 0.6794929342021763, + "grad_norm": 22026.615234375, + "learning_rate": 3.970043506373644e-05, + "loss": 0.4084, + "step": 131700 + }, + { + "epoch": 0.679750904184789, + "grad_norm": 24636.595703125, + "learning_rate": 3.9664691100630904e-05, + "loss": 0.4137, + "step": 131750 + }, + { + "epoch": 0.6800088741674019, + "grad_norm": 25599.443359375, + "learning_rate": 3.962895265431311e-05, + "loss": 0.4167, + "step": 131800 + }, + { + "epoch": 0.6802668441500147, + "grad_norm": 23514.0078125, + "learning_rate": 3.9593219743859575e-05, + "loss": 0.408, + "step": 131850 + }, + { + "epoch": 0.6805248141326276, + "grad_norm": 21798.9609375, + "learning_rate": 3.9557492388343844e-05, + "loss": 0.4129, + "step": 131900 + }, + { + "epoch": 0.6807827841152404, + "grad_norm": 24803.248046875, + "learning_rate": 3.952177060683644e-05, + "loss": 0.4126, + "step": 131950 + }, + { + "epoch": 0.6810407540978531, + "grad_norm": 23215.529296875, + "learning_rate": 3.948605441840501e-05, + "loss": 0.4114, + "step": 132000 + }, + { + "epoch": 0.681298724080466, + "grad_norm": 21179.626953125, + "learning_rate": 3.945034384211412e-05, + "loss": 0.4139, + "step": 132050 + }, + { + "epoch": 0.6815566940630788, + "grad_norm": 22894.04296875, + "learning_rate": 3.941463889702543e-05, + "loss": 0.4144, + "step": 132100 + }, + { + "epoch": 0.6818146640456917, + "grad_norm": 22581.392578125, + "learning_rate": 3.937893960219751e-05, + "loss": 0.4163, + "step": 132150 + }, + { + "epoch": 0.6820726340283044, + "grad_norm": 27557.634765625, + "learning_rate": 3.9343245976685966e-05, + "loss": 0.4194, + "step": 132200 + }, + { + "epoch": 0.6823306040109173, + "grad_norm": 24157.97265625, + "learning_rate": 3.9307558039543355e-05, + "loss": 0.4089, + "step": 132250 + }, + { + "epoch": 0.6825885739935301, + "grad_norm": 23363.904296875, + "learning_rate": 3.927187580981922e-05, + "loss": 0.4108, + "step": 132300 + }, + { + "epoch": 0.682846543976143, + "grad_norm": 24005.15625, + "learning_rate": 3.9236199306560054e-05, + "loss": 0.4103, + "step": 132350 + }, + { + "epoch": 0.6831045139587557, + "grad_norm": 23476.4609375, + "learning_rate": 3.920052854880925e-05, + "loss": 0.4189, + "step": 132400 + }, + { + "epoch": 0.6833624839413686, + "grad_norm": 23734.173828125, + "learning_rate": 3.91648635556072e-05, + "loss": 0.4183, + "step": 132450 + }, + { + "epoch": 0.6836204539239814, + "grad_norm": 22112.642578125, + "learning_rate": 3.912920434599117e-05, + "loss": 0.4139, + "step": 132500 + }, + { + "epoch": 0.6838784239065943, + "grad_norm": 23442.96484375, + "learning_rate": 3.909355093899537e-05, + "loss": 0.4137, + "step": 132550 + }, + { + "epoch": 0.6841363938892071, + "grad_norm": 22873.734375, + "learning_rate": 3.905790335365087e-05, + "loss": 0.4097, + "step": 132600 + }, + { + "epoch": 0.6843943638718198, + "grad_norm": 24382.9140625, + "learning_rate": 3.902226160898567e-05, + "loss": 0.4134, + "step": 132650 + }, + { + "epoch": 0.6846523338544327, + "grad_norm": 23238.1953125, + "learning_rate": 3.898662572402468e-05, + "loss": 0.4137, + "step": 132700 + }, + { + "epoch": 0.6849103038370455, + "grad_norm": 21690.37890625, + "learning_rate": 3.89509957177896e-05, + "loss": 0.4114, + "step": 132750 + }, + { + "epoch": 0.6851682738196584, + "grad_norm": 25762.189453125, + "learning_rate": 3.891537160929907e-05, + "loss": 0.4134, + "step": 132800 + }, + { + "epoch": 0.6854262438022711, + "grad_norm": 22006.044921875, + "learning_rate": 3.88797534175685e-05, + "loss": 0.4132, + "step": 132850 + }, + { + "epoch": 0.685684213784884, + "grad_norm": 22149.5546875, + "learning_rate": 3.8844141161610256e-05, + "loss": 0.4154, + "step": 132900 + }, + { + "epoch": 0.6859421837674968, + "grad_norm": 23865.419921875, + "learning_rate": 3.880853486043343e-05, + "loss": 0.4135, + "step": 132950 + }, + { + "epoch": 0.6862001537501097, + "grad_norm": 22708.126953125, + "learning_rate": 3.877293453304399e-05, + "loss": 0.4143, + "step": 133000 + }, + { + "epoch": 0.6864581237327224, + "grad_norm": 19948.517578125, + "learning_rate": 3.8737340198444683e-05, + "loss": 0.4181, + "step": 133050 + }, + { + "epoch": 0.6867160937153353, + "grad_norm": 22594.826171875, + "learning_rate": 3.870175187563509e-05, + "loss": 0.4108, + "step": 133100 + }, + { + "epoch": 0.6869740636979481, + "grad_norm": 24876.56640625, + "learning_rate": 3.866616958361159e-05, + "loss": 0.4136, + "step": 133150 + }, + { + "epoch": 0.687232033680561, + "grad_norm": 20055.0859375, + "learning_rate": 3.8630593341367285e-05, + "loss": 0.4176, + "step": 133200 + }, + { + "epoch": 0.6874900036631737, + "grad_norm": 24807.9140625, + "learning_rate": 3.8595023167892096e-05, + "loss": 0.4084, + "step": 133250 + }, + { + "epoch": 0.6877479736457865, + "grad_norm": 21060.78125, + "learning_rate": 3.8559459082172696e-05, + "loss": 0.4086, + "step": 133300 + }, + { + "epoch": 0.6880059436283994, + "grad_norm": 22740.255859375, + "learning_rate": 3.852390110319252e-05, + "loss": 0.4109, + "step": 133350 + }, + { + "epoch": 0.6882639136110122, + "grad_norm": 24095.68359375, + "learning_rate": 3.848834924993169e-05, + "loss": 0.4118, + "step": 133400 + }, + { + "epoch": 0.6885218835936251, + "grad_norm": 20011.78125, + "learning_rate": 3.8452803541367136e-05, + "loss": 0.4133, + "step": 133450 + }, + { + "epoch": 0.6887798535762378, + "grad_norm": 21369.7265625, + "learning_rate": 3.8417263996472444e-05, + "loss": 0.4104, + "step": 133500 + }, + { + "epoch": 0.6890378235588507, + "grad_norm": 22532.251953125, + "learning_rate": 3.8381730634217946e-05, + "loss": 0.415, + "step": 133550 + }, + { + "epoch": 0.6892957935414635, + "grad_norm": 21174.34765625, + "learning_rate": 3.8346203473570677e-05, + "loss": 0.4121, + "step": 133600 + }, + { + "epoch": 0.6895537635240764, + "grad_norm": 21758.87109375, + "learning_rate": 3.831068253349431e-05, + "loss": 0.4181, + "step": 133650 + }, + { + "epoch": 0.6898117335066891, + "grad_norm": 21809.083984375, + "learning_rate": 3.827516783294927e-05, + "loss": 0.41, + "step": 133700 + }, + { + "epoch": 0.690069703489302, + "grad_norm": 21419.69921875, + "learning_rate": 3.8239659390892593e-05, + "loss": 0.4166, + "step": 133750 + }, + { + "epoch": 0.6903276734719148, + "grad_norm": 20746.517578125, + "learning_rate": 3.820415722627802e-05, + "loss": 0.4168, + "step": 133800 + }, + { + "epoch": 0.6905856434545277, + "grad_norm": 22737.89453125, + "learning_rate": 3.816866135805589e-05, + "loss": 0.4119, + "step": 133850 + }, + { + "epoch": 0.6908436134371404, + "grad_norm": 23691.408203125, + "learning_rate": 3.813317180517324e-05, + "loss": 0.4105, + "step": 133900 + }, + { + "epoch": 0.6911015834197533, + "grad_norm": 22899.70703125, + "learning_rate": 3.8097688586573684e-05, + "loss": 0.412, + "step": 133950 + }, + { + "epoch": 0.6913595534023661, + "grad_norm": 25553.763671875, + "learning_rate": 3.8062211721197475e-05, + "loss": 0.4158, + "step": 134000 + }, + { + "epoch": 0.691617523384979, + "grad_norm": 22099.93359375, + "learning_rate": 3.802674122798152e-05, + "loss": 0.4149, + "step": 134050 + }, + { + "epoch": 0.6918754933675918, + "grad_norm": 25735.91015625, + "learning_rate": 3.799127712585922e-05, + "loss": 0.4058, + "step": 134100 + }, + { + "epoch": 0.6921334633502045, + "grad_norm": 21259.95703125, + "learning_rate": 3.795581943376067e-05, + "loss": 0.4192, + "step": 134150 + }, + { + "epoch": 0.6923914333328174, + "grad_norm": 22438.23046875, + "learning_rate": 3.7920368170612476e-05, + "loss": 0.414, + "step": 134200 + }, + { + "epoch": 0.6926494033154302, + "grad_norm": 24721.974609375, + "learning_rate": 3.788492335533786e-05, + "loss": 0.4154, + "step": 134250 + }, + { + "epoch": 0.6929073732980431, + "grad_norm": 24267.611328125, + "learning_rate": 3.7849485006856545e-05, + "loss": 0.4108, + "step": 134300 + }, + { + "epoch": 0.6931653432806558, + "grad_norm": 25588.193359375, + "learning_rate": 3.781405314408486e-05, + "loss": 0.4169, + "step": 134350 + }, + { + "epoch": 0.6934233132632687, + "grad_norm": 22651.216796875, + "learning_rate": 3.7778627785935626e-05, + "loss": 0.4112, + "step": 134400 + }, + { + "epoch": 0.6936812832458815, + "grad_norm": 24765.76953125, + "learning_rate": 3.774320895131823e-05, + "loss": 0.4173, + "step": 134450 + }, + { + "epoch": 0.6939392532284944, + "grad_norm": 25384.44921875, + "learning_rate": 3.7707796659138584e-05, + "loss": 0.4097, + "step": 134500 + }, + { + "epoch": 0.6941972232111071, + "grad_norm": 21145.587890625, + "learning_rate": 3.767239092829903e-05, + "loss": 0.4125, + "step": 134550 + }, + { + "epoch": 0.69445519319372, + "grad_norm": 22693.28515625, + "learning_rate": 3.763699177769849e-05, + "loss": 0.4111, + "step": 134600 + }, + { + "epoch": 0.6947131631763328, + "grad_norm": 20415.33984375, + "learning_rate": 3.760159922623235e-05, + "loss": 0.4178, + "step": 134650 + }, + { + "epoch": 0.6949711331589457, + "grad_norm": 23304.33984375, + "learning_rate": 3.756621329279247e-05, + "loss": 0.4142, + "step": 134700 + }, + { + "epoch": 0.6952291031415585, + "grad_norm": 22485.029296875, + "learning_rate": 3.7530833996267156e-05, + "loss": 0.4129, + "step": 134750 + }, + { + "epoch": 0.6954870731241712, + "grad_norm": 20506.5625, + "learning_rate": 3.7495461355541206e-05, + "loss": 0.4104, + "step": 134800 + }, + { + "epoch": 0.6957450431067841, + "grad_norm": 26106.26953125, + "learning_rate": 3.746009538949584e-05, + "loss": 0.4122, + "step": 134850 + }, + { + "epoch": 0.6960030130893969, + "grad_norm": 25230.55859375, + "learning_rate": 3.742473611700874e-05, + "loss": 0.4173, + "step": 134900 + }, + { + "epoch": 0.6962609830720098, + "grad_norm": 23462.197265625, + "learning_rate": 3.738938355695402e-05, + "loss": 0.4211, + "step": 134950 + }, + { + "epoch": 0.6965189530546225, + "grad_norm": 22550.8359375, + "learning_rate": 3.735403772820213e-05, + "loss": 0.4154, + "step": 135000 + }, + { + "epoch": 0.6965189530546225, + "eval_loss": 0.399837851524353, + "eval_runtime": 3136.0222, + "eval_samples_per_second": 988.871, + "eval_steps_per_second": 1.931, + "step": 135000 + }, + { + "epoch": 0.6967769230372354, + "grad_norm": 22235.15234375, + "learning_rate": 3.731869864962004e-05, + "loss": 0.4183, + "step": 135050 + }, + { + "epoch": 0.6970348930198482, + "grad_norm": 21969.208984375, + "learning_rate": 3.728336634007105e-05, + "loss": 0.41, + "step": 135100 + }, + { + "epoch": 0.6972928630024611, + "grad_norm": 22907.32421875, + "learning_rate": 3.724804081841488e-05, + "loss": 0.4213, + "step": 135150 + }, + { + "epoch": 0.6975508329850738, + "grad_norm": 22994.646484375, + "learning_rate": 3.721272210350757e-05, + "loss": 0.4103, + "step": 135200 + }, + { + "epoch": 0.6978088029676867, + "grad_norm": 22118.224609375, + "learning_rate": 3.717741021420162e-05, + "loss": 0.4195, + "step": 135250 + }, + { + "epoch": 0.6980667729502995, + "grad_norm": 19673.6484375, + "learning_rate": 3.7142105169345764e-05, + "loss": 0.4105, + "step": 135300 + }, + { + "epoch": 0.6983247429329124, + "grad_norm": 23110.041015625, + "learning_rate": 3.71068069877852e-05, + "loss": 0.4132, + "step": 135350 + }, + { + "epoch": 0.6985827129155251, + "grad_norm": 26589.453125, + "learning_rate": 3.707151568836144e-05, + "loss": 0.4171, + "step": 135400 + }, + { + "epoch": 0.6988406828981379, + "grad_norm": 25272.74609375, + "learning_rate": 3.7036231289912206e-05, + "loss": 0.4098, + "step": 135450 + }, + { + "epoch": 0.6990986528807508, + "grad_norm": 23238.626953125, + "learning_rate": 3.700095381127172e-05, + "loss": 0.4102, + "step": 135500 + }, + { + "epoch": 0.6993566228633636, + "grad_norm": 25412.8203125, + "learning_rate": 3.696568327127036e-05, + "loss": 0.4131, + "step": 135550 + }, + { + "epoch": 0.6996145928459765, + "grad_norm": 22329.0703125, + "learning_rate": 3.693041968873488e-05, + "loss": 0.4196, + "step": 135600 + }, + { + "epoch": 0.6998725628285892, + "grad_norm": 23497.068359375, + "learning_rate": 3.6895163082488294e-05, + "loss": 0.4137, + "step": 135650 + }, + { + "epoch": 0.7001305328112021, + "grad_norm": 23415.0859375, + "learning_rate": 3.6859913471349906e-05, + "loss": 0.4088, + "step": 135700 + }, + { + "epoch": 0.7003885027938149, + "grad_norm": 24474.064453125, + "learning_rate": 3.682467087413525e-05, + "loss": 0.4122, + "step": 135750 + }, + { + "epoch": 0.7006464727764278, + "grad_norm": 24427.3359375, + "learning_rate": 3.678943530965615e-05, + "loss": 0.4133, + "step": 135800 + }, + { + "epoch": 0.7009044427590405, + "grad_norm": 24399.58203125, + "learning_rate": 3.675420679672068e-05, + "loss": 0.4113, + "step": 135850 + }, + { + "epoch": 0.7011624127416534, + "grad_norm": 22070.033203125, + "learning_rate": 3.671898535413313e-05, + "loss": 0.4099, + "step": 135900 + }, + { + "epoch": 0.7014203827242662, + "grad_norm": 21846.20703125, + "learning_rate": 3.668377100069404e-05, + "loss": 0.4164, + "step": 135950 + }, + { + "epoch": 0.7016783527068791, + "grad_norm": 21927.2265625, + "learning_rate": 3.664856375520012e-05, + "loss": 0.4124, + "step": 136000 + }, + { + "epoch": 0.7019363226894918, + "grad_norm": 22155.341796875, + "learning_rate": 3.6613363636444344e-05, + "loss": 0.416, + "step": 136050 + }, + { + "epoch": 0.7021942926721046, + "grad_norm": 23344.486328125, + "learning_rate": 3.6578170663215826e-05, + "loss": 0.4162, + "step": 136100 + }, + { + "epoch": 0.7024522626547175, + "grad_norm": 23390.642578125, + "learning_rate": 3.6542984854299936e-05, + "loss": 0.4082, + "step": 136150 + }, + { + "epoch": 0.7027102326373303, + "grad_norm": 22980.90625, + "learning_rate": 3.6507806228478125e-05, + "loss": 0.4067, + "step": 136200 + }, + { + "epoch": 0.7029682026199432, + "grad_norm": 22321.662109375, + "learning_rate": 3.6472634804528095e-05, + "loss": 0.4129, + "step": 136250 + }, + { + "epoch": 0.7032261726025559, + "grad_norm": 22719.455078125, + "learning_rate": 3.643747060122366e-05, + "loss": 0.4169, + "step": 136300 + }, + { + "epoch": 0.7034841425851688, + "grad_norm": 25283.494140625, + "learning_rate": 3.640231363733481e-05, + "loss": 0.4081, + "step": 136350 + }, + { + "epoch": 0.7037421125677816, + "grad_norm": 24430.919921875, + "learning_rate": 3.636716393162764e-05, + "loss": 0.4187, + "step": 136400 + }, + { + "epoch": 0.7040000825503945, + "grad_norm": 23372.662109375, + "learning_rate": 3.633202150286435e-05, + "loss": 0.4117, + "step": 136450 + }, + { + "epoch": 0.7042580525330072, + "grad_norm": 23912.595703125, + "learning_rate": 3.6296886369803346e-05, + "loss": 0.4126, + "step": 136500 + }, + { + "epoch": 0.7045160225156201, + "grad_norm": 24092.0390625, + "learning_rate": 3.626175855119903e-05, + "loss": 0.4163, + "step": 136550 + }, + { + "epoch": 0.7047739924982329, + "grad_norm": 23452.2421875, + "learning_rate": 3.6226638065802e-05, + "loss": 0.4088, + "step": 136600 + }, + { + "epoch": 0.7050319624808458, + "grad_norm": 24399.787109375, + "learning_rate": 3.6191524932358845e-05, + "loss": 0.4139, + "step": 136650 + }, + { + "epoch": 0.7052899324634585, + "grad_norm": 23295.599609375, + "learning_rate": 3.6156419169612287e-05, + "loss": 0.4112, + "step": 136700 + }, + { + "epoch": 0.7055479024460714, + "grad_norm": 25809.876953125, + "learning_rate": 3.6121320796301126e-05, + "loss": 0.4141, + "step": 136750 + }, + { + "epoch": 0.7058058724286842, + "grad_norm": 21679.818359375, + "learning_rate": 3.608622983116018e-05, + "loss": 0.4183, + "step": 136800 + }, + { + "epoch": 0.706063842411297, + "grad_norm": 24492.578125, + "learning_rate": 3.6051146292920334e-05, + "loss": 0.4103, + "step": 136850 + }, + { + "epoch": 0.7063218123939099, + "grad_norm": 24805.59375, + "learning_rate": 3.601607020030847e-05, + "loss": 0.4129, + "step": 136900 + }, + { + "epoch": 0.7065797823765226, + "grad_norm": 23000.9765625, + "learning_rate": 3.5981001572047566e-05, + "loss": 0.4091, + "step": 136950 + }, + { + "epoch": 0.7068377523591355, + "grad_norm": 24590.6875, + "learning_rate": 3.594594042685655e-05, + "loss": 0.4061, + "step": 137000 + }, + { + "epoch": 0.7070957223417483, + "grad_norm": 22223.16015625, + "learning_rate": 3.5910886783450416e-05, + "loss": 0.4174, + "step": 137050 + }, + { + "epoch": 0.7073536923243612, + "grad_norm": 28207.7578125, + "learning_rate": 3.587584066054007e-05, + "loss": 0.4119, + "step": 137100 + }, + { + "epoch": 0.7076116623069739, + "grad_norm": 23703.271484375, + "learning_rate": 3.584080207683249e-05, + "loss": 0.4104, + "step": 137150 + }, + { + "epoch": 0.7078696322895868, + "grad_norm": 24903.92578125, + "learning_rate": 3.580577105103059e-05, + "loss": 0.4139, + "step": 137200 + }, + { + "epoch": 0.7081276022721996, + "grad_norm": 21130.029296875, + "learning_rate": 3.5770747601833235e-05, + "loss": 0.4208, + "step": 137250 + }, + { + "epoch": 0.7083855722548125, + "grad_norm": 22223.611328125, + "learning_rate": 3.5735731747935306e-05, + "loss": 0.4118, + "step": 137300 + }, + { + "epoch": 0.7086435422374252, + "grad_norm": 21862.12109375, + "learning_rate": 3.570072350802753e-05, + "loss": 0.4101, + "step": 137350 + }, + { + "epoch": 0.708901512220038, + "grad_norm": 22504.25390625, + "learning_rate": 3.566572290079667e-05, + "loss": 0.4187, + "step": 137400 + }, + { + "epoch": 0.7091594822026509, + "grad_norm": 21898.53125, + "learning_rate": 3.563072994492535e-05, + "loss": 0.4068, + "step": 137450 + }, + { + "epoch": 0.7094174521852638, + "grad_norm": 21629.5859375, + "learning_rate": 3.559574465909215e-05, + "loss": 0.4107, + "step": 137500 + }, + { + "epoch": 0.7096754221678765, + "grad_norm": 23078.080078125, + "learning_rate": 3.5560767061971515e-05, + "loss": 0.4093, + "step": 137550 + }, + { + "epoch": 0.7099333921504893, + "grad_norm": 21831.11328125, + "learning_rate": 3.5525797172233826e-05, + "loss": 0.4083, + "step": 137600 + }, + { + "epoch": 0.7101913621331022, + "grad_norm": 20934.220703125, + "learning_rate": 3.5490835008545334e-05, + "loss": 0.4143, + "step": 137650 + }, + { + "epoch": 0.710449332115715, + "grad_norm": 21335.0, + "learning_rate": 3.545588058956816e-05, + "loss": 0.4104, + "step": 137700 + }, + { + "epoch": 0.7107073020983279, + "grad_norm": 20424.279296875, + "learning_rate": 3.542093393396031e-05, + "loss": 0.4117, + "step": 137750 + }, + { + "epoch": 0.7109652720809406, + "grad_norm": 24527.76171875, + "learning_rate": 3.5385995060375596e-05, + "loss": 0.4128, + "step": 137800 + }, + { + "epoch": 0.7112232420635535, + "grad_norm": 23370.17578125, + "learning_rate": 3.535106398746376e-05, + "loss": 0.4149, + "step": 137850 + }, + { + "epoch": 0.7114812120461663, + "grad_norm": 22996.2890625, + "learning_rate": 3.531614073387028e-05, + "loss": 0.412, + "step": 137900 + }, + { + "epoch": 0.7117391820287792, + "grad_norm": 26592.931640625, + "learning_rate": 3.528122531823657e-05, + "loss": 0.4111, + "step": 137950 + }, + { + "epoch": 0.7119971520113919, + "grad_norm": 22353.35546875, + "learning_rate": 3.5246317759199745e-05, + "loss": 0.412, + "step": 138000 + }, + { + "epoch": 0.7122551219940048, + "grad_norm": 22266.91796875, + "learning_rate": 3.521141807539281e-05, + "loss": 0.4113, + "step": 138050 + }, + { + "epoch": 0.7125130919766176, + "grad_norm": 21723.318359375, + "learning_rate": 3.517652628544457e-05, + "loss": 0.4058, + "step": 138100 + }, + { + "epoch": 0.7127710619592305, + "grad_norm": 23738.322265625, + "learning_rate": 3.5141642407979535e-05, + "loss": 0.4072, + "step": 138150 + }, + { + "epoch": 0.7130290319418432, + "grad_norm": 25993.587890625, + "learning_rate": 3.5106766461618083e-05, + "loss": 0.4066, + "step": 138200 + }, + { + "epoch": 0.713287001924456, + "grad_norm": 23321.55859375, + "learning_rate": 3.50718984649763e-05, + "loss": 0.4104, + "step": 138250 + }, + { + "epoch": 0.7135449719070689, + "grad_norm": 22022.267578125, + "learning_rate": 3.503703843666605e-05, + "loss": 0.4096, + "step": 138300 + }, + { + "epoch": 0.7138029418896817, + "grad_norm": 22249.640625, + "learning_rate": 3.500218639529493e-05, + "loss": 0.4121, + "step": 138350 + }, + { + "epoch": 0.7140609118722946, + "grad_norm": 21145.283203125, + "learning_rate": 3.496734235946632e-05, + "loss": 0.4126, + "step": 138400 + }, + { + "epoch": 0.7143188818549073, + "grad_norm": 22439.38671875, + "learning_rate": 3.493250634777924e-05, + "loss": 0.4076, + "step": 138450 + }, + { + "epoch": 0.7145768518375202, + "grad_norm": 25641.93359375, + "learning_rate": 3.4897678378828516e-05, + "loss": 0.4105, + "step": 138500 + }, + { + "epoch": 0.714834821820133, + "grad_norm": 22200.46875, + "learning_rate": 3.486285847120465e-05, + "loss": 0.4097, + "step": 138550 + }, + { + "epoch": 0.7150927918027459, + "grad_norm": 22691.666015625, + "learning_rate": 3.482804664349381e-05, + "loss": 0.4154, + "step": 138600 + }, + { + "epoch": 0.7153507617853586, + "grad_norm": 22139.16796875, + "learning_rate": 3.479324291427788e-05, + "loss": 0.4124, + "step": 138650 + }, + { + "epoch": 0.7156087317679715, + "grad_norm": 23695.7578125, + "learning_rate": 3.4758447302134414e-05, + "loss": 0.4174, + "step": 138700 + }, + { + "epoch": 0.7158667017505843, + "grad_norm": 24720.06640625, + "learning_rate": 3.472365982563666e-05, + "loss": 0.4095, + "step": 138750 + }, + { + "epoch": 0.7161246717331972, + "grad_norm": 22861.171875, + "learning_rate": 3.4688880503353474e-05, + "loss": 0.4039, + "step": 138800 + }, + { + "epoch": 0.7163826417158099, + "grad_norm": 22751.833984375, + "learning_rate": 3.465410935384939e-05, + "loss": 0.4175, + "step": 138850 + }, + { + "epoch": 0.7166406116984227, + "grad_norm": 22689.5, + "learning_rate": 3.461934639568457e-05, + "loss": 0.4133, + "step": 138900 + }, + { + "epoch": 0.7168985816810356, + "grad_norm": 23292.1328125, + "learning_rate": 3.458459164741482e-05, + "loss": 0.4062, + "step": 138950 + }, + { + "epoch": 0.7171565516636484, + "grad_norm": 22390.515625, + "learning_rate": 3.4549845127591563e-05, + "loss": 0.4169, + "step": 139000 + }, + { + "epoch": 0.7174145216462613, + "grad_norm": 23531.9921875, + "learning_rate": 3.451510685476178e-05, + "loss": 0.4084, + "step": 139050 + }, + { + "epoch": 0.717672491628874, + "grad_norm": 23847.154296875, + "learning_rate": 3.448037684746812e-05, + "loss": 0.4134, + "step": 139100 + }, + { + "epoch": 0.7179304616114869, + "grad_norm": 22651.15234375, + "learning_rate": 3.4445655124248774e-05, + "loss": 0.4118, + "step": 139150 + }, + { + "epoch": 0.7181884315940997, + "grad_norm": 21893.123046875, + "learning_rate": 3.441094170363755e-05, + "loss": 0.4065, + "step": 139200 + }, + { + "epoch": 0.7184464015767126, + "grad_norm": 22238.685546875, + "learning_rate": 3.4376236604163756e-05, + "loss": 0.4164, + "step": 139250 + }, + { + "epoch": 0.7187043715593253, + "grad_norm": 25605.083984375, + "learning_rate": 3.434153984435234e-05, + "loss": 0.4105, + "step": 139300 + }, + { + "epoch": 0.7189623415419382, + "grad_norm": 22414.0703125, + "learning_rate": 3.430685144272374e-05, + "loss": 0.4095, + "step": 139350 + }, + { + "epoch": 0.719220311524551, + "grad_norm": 22067.443359375, + "learning_rate": 3.4272171417793954e-05, + "loss": 0.4105, + "step": 139400 + }, + { + "epoch": 0.7194782815071639, + "grad_norm": 22398.36328125, + "learning_rate": 3.423749978807454e-05, + "loss": 0.4065, + "step": 139450 + }, + { + "epoch": 0.7197362514897766, + "grad_norm": 25660.017578125, + "learning_rate": 3.420283657207248e-05, + "loss": 0.4139, + "step": 139500 + }, + { + "epoch": 0.7199942214723895, + "grad_norm": 27245.4609375, + "learning_rate": 3.416818178829039e-05, + "loss": 0.4106, + "step": 139550 + }, + { + "epoch": 0.7202521914550023, + "grad_norm": 22430.6484375, + "learning_rate": 3.413353545522628e-05, + "loss": 0.4103, + "step": 139600 + }, + { + "epoch": 0.7205101614376151, + "grad_norm": 25269.876953125, + "learning_rate": 3.409889759137373e-05, + "loss": 0.4073, + "step": 139650 + }, + { + "epoch": 0.720768131420228, + "grad_norm": 22811.275390625, + "learning_rate": 3.406426821522172e-05, + "loss": 0.4156, + "step": 139700 + }, + { + "epoch": 0.7210261014028407, + "grad_norm": 21838.966796875, + "learning_rate": 3.402964734525477e-05, + "loss": 0.4132, + "step": 139750 + }, + { + "epoch": 0.7212840713854536, + "grad_norm": 22130.935546875, + "learning_rate": 3.39950349999528e-05, + "loss": 0.418, + "step": 139800 + }, + { + "epoch": 0.7215420413680664, + "grad_norm": 22744.779296875, + "learning_rate": 3.396043119779123e-05, + "loss": 0.4098, + "step": 139850 + }, + { + "epoch": 0.7218000113506793, + "grad_norm": 22559.07421875, + "learning_rate": 3.392583595724093e-05, + "loss": 0.4159, + "step": 139900 + }, + { + "epoch": 0.722057981333292, + "grad_norm": 20920.349609375, + "learning_rate": 3.3891249296768116e-05, + "loss": 0.406, + "step": 139950 + }, + { + "epoch": 0.7223159513159049, + "grad_norm": 20708.716796875, + "learning_rate": 3.38566712348345e-05, + "loss": 0.4102, + "step": 140000 + }, + { + "epoch": 0.7223159513159049, + "eval_loss": 0.39852654933929443, + "eval_runtime": 3128.1309, + "eval_samples_per_second": 991.365, + "eval_steps_per_second": 1.936, + "step": 140000 + }, + { + "epoch": 0.7225739212985177, + "grad_norm": 24440.734375, + "learning_rate": 3.382210178989718e-05, + "loss": 0.4144, + "step": 140050 + }, + { + "epoch": 0.7228318912811306, + "grad_norm": 22715.88671875, + "learning_rate": 3.378754098040867e-05, + "loss": 0.4146, + "step": 140100 + }, + { + "epoch": 0.7230898612637433, + "grad_norm": 23713.474609375, + "learning_rate": 3.375298882481683e-05, + "loss": 0.4089, + "step": 140150 + }, + { + "epoch": 0.7233478312463562, + "grad_norm": 24705.048828125, + "learning_rate": 3.371844534156497e-05, + "loss": 0.4052, + "step": 140200 + }, + { + "epoch": 0.723605801228969, + "grad_norm": 22624.98046875, + "learning_rate": 3.368391054909169e-05, + "loss": 0.4155, + "step": 140250 + }, + { + "epoch": 0.7238637712115819, + "grad_norm": 24774.72265625, + "learning_rate": 3.364938446583103e-05, + "loss": 0.4058, + "step": 140300 + }, + { + "epoch": 0.7241217411941946, + "grad_norm": 24109.02734375, + "learning_rate": 3.361486711021235e-05, + "loss": 0.4169, + "step": 140350 + }, + { + "epoch": 0.7243797111768074, + "grad_norm": 20315.724609375, + "learning_rate": 3.3580358500660284e-05, + "loss": 0.4135, + "step": 140400 + }, + { + "epoch": 0.7246376811594203, + "grad_norm": 26642.84765625, + "learning_rate": 3.3545858655594935e-05, + "loss": 0.4182, + "step": 140450 + }, + { + "epoch": 0.7248956511420331, + "grad_norm": 23466.93359375, + "learning_rate": 3.351136759343161e-05, + "loss": 0.4098, + "step": 140500 + }, + { + "epoch": 0.725153621124646, + "grad_norm": 25247.11328125, + "learning_rate": 3.3476885332580985e-05, + "loss": 0.4085, + "step": 140550 + }, + { + "epoch": 0.7254115911072587, + "grad_norm": 25220.11328125, + "learning_rate": 3.3442411891449e-05, + "loss": 0.4139, + "step": 140600 + }, + { + "epoch": 0.7256695610898716, + "grad_norm": 21836.095703125, + "learning_rate": 3.3407947288436936e-05, + "loss": 0.4127, + "step": 140650 + }, + { + "epoch": 0.7259275310724844, + "grad_norm": 22301.443359375, + "learning_rate": 3.3373491541941346e-05, + "loss": 0.4127, + "step": 140700 + }, + { + "epoch": 0.7261855010550973, + "grad_norm": 21902.615234375, + "learning_rate": 3.333904467035399e-05, + "loss": 0.4111, + "step": 140750 + }, + { + "epoch": 0.72644347103771, + "grad_norm": 21408.71484375, + "learning_rate": 3.3304606692061984e-05, + "loss": 0.4095, + "step": 140800 + }, + { + "epoch": 0.7267014410203229, + "grad_norm": 26146.03515625, + "learning_rate": 3.3270177625447626e-05, + "loss": 0.4096, + "step": 140850 + }, + { + "epoch": 0.7269594110029357, + "grad_norm": 22772.9921875, + "learning_rate": 3.323575748888852e-05, + "loss": 0.4109, + "step": 140900 + }, + { + "epoch": 0.7272173809855486, + "grad_norm": 24654.810546875, + "learning_rate": 3.320134630075742e-05, + "loss": 0.4135, + "step": 140950 + }, + { + "epoch": 0.7274753509681613, + "grad_norm": 23458.103515625, + "learning_rate": 3.31669440794224e-05, + "loss": 0.4128, + "step": 141000 + }, + { + "epoch": 0.7277333209507741, + "grad_norm": 22455.630859375, + "learning_rate": 3.3132550843246654e-05, + "loss": 0.411, + "step": 141050 + }, + { + "epoch": 0.727991290933387, + "grad_norm": 22372.08203125, + "learning_rate": 3.3098166610588655e-05, + "loss": 0.413, + "step": 141100 + }, + { + "epoch": 0.7282492609159998, + "grad_norm": 22878.216796875, + "learning_rate": 3.306379139980206e-05, + "loss": 0.4054, + "step": 141150 + }, + { + "epoch": 0.7285072308986127, + "grad_norm": 22959.708984375, + "learning_rate": 3.302942522923563e-05, + "loss": 0.4114, + "step": 141200 + }, + { + "epoch": 0.7287652008812254, + "grad_norm": 22574.986328125, + "learning_rate": 3.2995068117233417e-05, + "loss": 0.4105, + "step": 141250 + }, + { + "epoch": 0.7290231708638383, + "grad_norm": 23770.279296875, + "learning_rate": 3.2960720082134555e-05, + "loss": 0.4091, + "step": 141300 + }, + { + "epoch": 0.7292811408464511, + "grad_norm": 23017.416015625, + "learning_rate": 3.292638114227338e-05, + "loss": 0.411, + "step": 141350 + }, + { + "epoch": 0.729539110829064, + "grad_norm": 23605.982421875, + "learning_rate": 3.289205131597932e-05, + "loss": 0.4097, + "step": 141400 + }, + { + "epoch": 0.7297970808116767, + "grad_norm": 22409.12890625, + "learning_rate": 3.2857730621577006e-05, + "loss": 0.4096, + "step": 141450 + }, + { + "epoch": 0.7300550507942896, + "grad_norm": 22681.11328125, + "learning_rate": 3.282341907738613e-05, + "loss": 0.4066, + "step": 141500 + }, + { + "epoch": 0.7303130207769024, + "grad_norm": 27188.859375, + "learning_rate": 3.278911670172154e-05, + "loss": 0.4104, + "step": 141550 + }, + { + "epoch": 0.7305709907595153, + "grad_norm": 25134.85546875, + "learning_rate": 3.2754823512893225e-05, + "loss": 0.4105, + "step": 141600 + }, + { + "epoch": 0.730828960742128, + "grad_norm": 21408.478515625, + "learning_rate": 3.2720539529206154e-05, + "loss": 0.412, + "step": 141650 + }, + { + "epoch": 0.7310869307247408, + "grad_norm": 21062.59375, + "learning_rate": 3.26862647689605e-05, + "loss": 0.411, + "step": 141700 + }, + { + "epoch": 0.7313449007073537, + "grad_norm": 21591.23828125, + "learning_rate": 3.265199925045143e-05, + "loss": 0.4171, + "step": 141750 + }, + { + "epoch": 0.7316028706899665, + "grad_norm": 23328.751953125, + "learning_rate": 3.261774299196926e-05, + "loss": 0.4127, + "step": 141800 + }, + { + "epoch": 0.7318608406725794, + "grad_norm": 27247.59375, + "learning_rate": 3.258349601179928e-05, + "loss": 0.4087, + "step": 141850 + }, + { + "epoch": 0.7321188106551921, + "grad_norm": 24500.822265625, + "learning_rate": 3.254925832822188e-05, + "loss": 0.4015, + "step": 141900 + }, + { + "epoch": 0.732376780637805, + "grad_norm": 25855.849609375, + "learning_rate": 3.251502995951247e-05, + "loss": 0.4125, + "step": 141950 + }, + { + "epoch": 0.7326347506204178, + "grad_norm": 23075.234375, + "learning_rate": 3.248081092394148e-05, + "loss": 0.4112, + "step": 142000 + }, + { + "epoch": 0.7328927206030307, + "grad_norm": 25166.712890625, + "learning_rate": 3.2446601239774405e-05, + "loss": 0.4121, + "step": 142050 + }, + { + "epoch": 0.7331506905856434, + "grad_norm": 23327.337890625, + "learning_rate": 3.241240092527167e-05, + "loss": 0.41, + "step": 142100 + }, + { + "epoch": 0.7334086605682563, + "grad_norm": 34138.34375, + "learning_rate": 3.237820999868876e-05, + "loss": 0.413, + "step": 142150 + }, + { + "epoch": 0.7336666305508691, + "grad_norm": 23031.2109375, + "learning_rate": 3.234402847827612e-05, + "loss": 0.414, + "step": 142200 + }, + { + "epoch": 0.733924600533482, + "grad_norm": 23237.44921875, + "learning_rate": 3.230985638227921e-05, + "loss": 0.4159, + "step": 142250 + }, + { + "epoch": 0.7341825705160947, + "grad_norm": 21437.705078125, + "learning_rate": 3.2275693728938395e-05, + "loss": 0.4078, + "step": 142300 + }, + { + "epoch": 0.7344405404987076, + "grad_norm": 23815.9140625, + "learning_rate": 3.224154053648906e-05, + "loss": 0.4135, + "step": 142350 + }, + { + "epoch": 0.7346985104813204, + "grad_norm": 26809.724609375, + "learning_rate": 3.2207396823161514e-05, + "loss": 0.409, + "step": 142400 + }, + { + "epoch": 0.7349564804639332, + "grad_norm": 21905.6484375, + "learning_rate": 3.2173262607181e-05, + "loss": 0.41, + "step": 142450 + }, + { + "epoch": 0.735214450446546, + "grad_norm": 23628.076171875, + "learning_rate": 3.2139137906767743e-05, + "loss": 0.4175, + "step": 142500 + }, + { + "epoch": 0.7354724204291588, + "grad_norm": 24156.837890625, + "learning_rate": 3.210502274013679e-05, + "loss": 0.4114, + "step": 142550 + }, + { + "epoch": 0.7357303904117717, + "grad_norm": 21517.404296875, + "learning_rate": 3.207091712549819e-05, + "loss": 0.4112, + "step": 142600 + }, + { + "epoch": 0.7359883603943845, + "grad_norm": 22684.734375, + "learning_rate": 3.203682108105685e-05, + "loss": 0.417, + "step": 142650 + }, + { + "epoch": 0.7362463303769974, + "grad_norm": 22205.361328125, + "learning_rate": 3.2002734625012585e-05, + "loss": 0.4111, + "step": 142700 + }, + { + "epoch": 0.7365043003596101, + "grad_norm": 21131.41796875, + "learning_rate": 3.196865777556008e-05, + "loss": 0.4114, + "step": 142750 + }, + { + "epoch": 0.736762270342223, + "grad_norm": 23506.66796875, + "learning_rate": 3.1934590550888894e-05, + "loss": 0.4183, + "step": 142800 + }, + { + "epoch": 0.7370202403248358, + "grad_norm": 24435.4609375, + "learning_rate": 3.190053296918345e-05, + "loss": 0.4166, + "step": 142850 + }, + { + "epoch": 0.7372782103074487, + "grad_norm": 22610.4296875, + "learning_rate": 3.186648504862303e-05, + "loss": 0.4109, + "step": 142900 + }, + { + "epoch": 0.7375361802900614, + "grad_norm": 24722.974609375, + "learning_rate": 3.183244680738178e-05, + "loss": 0.4093, + "step": 142950 + }, + { + "epoch": 0.7377941502726743, + "grad_norm": 23927.6640625, + "learning_rate": 3.1798418263628596e-05, + "loss": 0.4106, + "step": 143000 + }, + { + "epoch": 0.7380521202552871, + "grad_norm": 23958.216796875, + "learning_rate": 3.176439943552732e-05, + "loss": 0.4067, + "step": 143050 + }, + { + "epoch": 0.7383100902379, + "grad_norm": 23272.818359375, + "learning_rate": 3.1730390341236496e-05, + "loss": 0.4086, + "step": 143100 + }, + { + "epoch": 0.7385680602205127, + "grad_norm": 20998.751953125, + "learning_rate": 3.1696390998909556e-05, + "loss": 0.4099, + "step": 143150 + }, + { + "epoch": 0.7388260302031255, + "grad_norm": 24493.677734375, + "learning_rate": 3.166240142669464e-05, + "loss": 0.413, + "step": 143200 + }, + { + "epoch": 0.7390840001857384, + "grad_norm": 22639.927734375, + "learning_rate": 3.162842164273479e-05, + "loss": 0.4105, + "step": 143250 + }, + { + "epoch": 0.7393419701683512, + "grad_norm": 24407.029296875, + "learning_rate": 3.15944516651677e-05, + "loss": 0.4188, + "step": 143300 + }, + { + "epoch": 0.7395999401509641, + "grad_norm": 26538.68359375, + "learning_rate": 3.156049151212591e-05, + "loss": 0.4135, + "step": 143350 + }, + { + "epoch": 0.7398579101335768, + "grad_norm": 24519.060546875, + "learning_rate": 3.1526541201736695e-05, + "loss": 0.4141, + "step": 143400 + }, + { + "epoch": 0.7401158801161897, + "grad_norm": 21236.681640625, + "learning_rate": 3.149260075212206e-05, + "loss": 0.4096, + "step": 143450 + }, + { + "epoch": 0.7403738500988025, + "grad_norm": 24463.015625, + "learning_rate": 3.1458670181398796e-05, + "loss": 0.4035, + "step": 143500 + }, + { + "epoch": 0.7406318200814154, + "grad_norm": 26984.408203125, + "learning_rate": 3.1424749507678336e-05, + "loss": 0.4115, + "step": 143550 + }, + { + "epoch": 0.7408897900640281, + "grad_norm": 25619.35546875, + "learning_rate": 3.139083874906691e-05, + "loss": 0.4131, + "step": 143600 + }, + { + "epoch": 0.741147760046641, + "grad_norm": 24277.7890625, + "learning_rate": 3.13569379236654e-05, + "loss": 0.3994, + "step": 143650 + }, + { + "epoch": 0.7414057300292538, + "grad_norm": 24007.654296875, + "learning_rate": 3.1323047049569446e-05, + "loss": 0.4091, + "step": 143700 + }, + { + "epoch": 0.7416637000118667, + "grad_norm": 21688.703125, + "learning_rate": 3.12891661448693e-05, + "loss": 0.4176, + "step": 143750 + }, + { + "epoch": 0.7419216699944794, + "grad_norm": 22735.900390625, + "learning_rate": 3.125529522764995e-05, + "loss": 0.4091, + "step": 143800 + }, + { + "epoch": 0.7421796399770922, + "grad_norm": 23359.259765625, + "learning_rate": 3.122143431599105e-05, + "loss": 0.4125, + "step": 143850 + }, + { + "epoch": 0.7424376099597051, + "grad_norm": 26637.263671875, + "learning_rate": 3.118758342796687e-05, + "loss": 0.4137, + "step": 143900 + }, + { + "epoch": 0.7426955799423179, + "grad_norm": 24977.3984375, + "learning_rate": 3.1153742581646406e-05, + "loss": 0.4094, + "step": 143950 + }, + { + "epoch": 0.7429535499249308, + "grad_norm": 25850.91796875, + "learning_rate": 3.111991179509318e-05, + "loss": 0.4092, + "step": 144000 + }, + { + "epoch": 0.7432115199075435, + "grad_norm": 22823.0625, + "learning_rate": 3.1086091086365474e-05, + "loss": 0.4111, + "step": 144050 + }, + { + "epoch": 0.7434694898901564, + "grad_norm": 24187.640625, + "learning_rate": 3.1052280473516076e-05, + "loss": 0.414, + "step": 144100 + }, + { + "epoch": 0.7437274598727692, + "grad_norm": 21726.537109375, + "learning_rate": 3.101847997459249e-05, + "loss": 0.4098, + "step": 144150 + }, + { + "epoch": 0.7439854298553821, + "grad_norm": 23075.27734375, + "learning_rate": 3.098468960763671e-05, + "loss": 0.4084, + "step": 144200 + }, + { + "epoch": 0.7442433998379948, + "grad_norm": 24207.529296875, + "learning_rate": 3.095090939068541e-05, + "loss": 0.4156, + "step": 144250 + }, + { + "epoch": 0.7445013698206077, + "grad_norm": 25209.39453125, + "learning_rate": 3.091713934176982e-05, + "loss": 0.4122, + "step": 144300 + }, + { + "epoch": 0.7447593398032205, + "grad_norm": 24308.361328125, + "learning_rate": 3.088337947891573e-05, + "loss": 0.408, + "step": 144350 + }, + { + "epoch": 0.7450173097858334, + "grad_norm": 22416.990234375, + "learning_rate": 3.0849629820143517e-05, + "loss": 0.4136, + "step": 144400 + }, + { + "epoch": 0.7452752797684461, + "grad_norm": 24417.943359375, + "learning_rate": 3.081589038346806e-05, + "loss": 0.4079, + "step": 144450 + }, + { + "epoch": 0.745533249751059, + "grad_norm": 21519.650390625, + "learning_rate": 3.078216118689885e-05, + "loss": 0.4073, + "step": 144500 + }, + { + "epoch": 0.7457912197336718, + "grad_norm": 22821.1796875, + "learning_rate": 3.074844224843986e-05, + "loss": 0.4058, + "step": 144550 + }, + { + "epoch": 0.7460491897162846, + "grad_norm": 22559.86328125, + "learning_rate": 3.071473358608963e-05, + "loss": 0.413, + "step": 144600 + }, + { + "epoch": 0.7463071596988974, + "grad_norm": 25803.658203125, + "learning_rate": 3.068103521784115e-05, + "loss": 0.4077, + "step": 144650 + }, + { + "epoch": 0.7465651296815102, + "grad_norm": 27601.787109375, + "learning_rate": 3.0647347161681983e-05, + "loss": 0.4057, + "step": 144700 + }, + { + "epoch": 0.7468230996641231, + "grad_norm": 21363.67578125, + "learning_rate": 3.061366943559417e-05, + "loss": 0.4082, + "step": 144750 + }, + { + "epoch": 0.7470810696467359, + "grad_norm": 24007.3046875, + "learning_rate": 3.058000205755421e-05, + "loss": 0.4121, + "step": 144800 + }, + { + "epoch": 0.7473390396293488, + "grad_norm": 29940.8125, + "learning_rate": 3.054634504553312e-05, + "loss": 0.4046, + "step": 144850 + }, + { + "epoch": 0.7475970096119615, + "grad_norm": 24161.90234375, + "learning_rate": 3.0512698417496334e-05, + "loss": 0.4108, + "step": 144900 + }, + { + "epoch": 0.7478549795945744, + "grad_norm": 22984.619140625, + "learning_rate": 3.0479062191403785e-05, + "loss": 0.4158, + "step": 144950 + }, + { + "epoch": 0.7481129495771872, + "grad_norm": 26418.95703125, + "learning_rate": 3.0445436385209836e-05, + "loss": 0.4059, + "step": 145000 + }, + { + "epoch": 0.7481129495771872, + "eval_loss": 0.3971329629421234, + "eval_runtime": 3201.285, + "eval_samples_per_second": 968.711, + "eval_steps_per_second": 1.892, + "step": 145000 + }, + { + "epoch": 0.7483709195598001, + "grad_norm": 22503.525390625, + "learning_rate": 3.0411821016863308e-05, + "loss": 0.4048, + "step": 145050 + }, + { + "epoch": 0.7486288895424128, + "grad_norm": 23114.525390625, + "learning_rate": 3.03782161043074e-05, + "loss": 0.4111, + "step": 145100 + }, + { + "epoch": 0.7488868595250256, + "grad_norm": 23249.794921875, + "learning_rate": 3.0344621665479778e-05, + "loss": 0.4093, + "step": 145150 + }, + { + "epoch": 0.7491448295076385, + "grad_norm": 23568.833984375, + "learning_rate": 3.0311037718312518e-05, + "loss": 0.4166, + "step": 145200 + }, + { + "epoch": 0.7494027994902513, + "grad_norm": 21794.6015625, + "learning_rate": 3.027746428073206e-05, + "loss": 0.4122, + "step": 145250 + }, + { + "epoch": 0.7496607694728641, + "grad_norm": 23710.212890625, + "learning_rate": 3.024390137065929e-05, + "loss": 0.4066, + "step": 145300 + }, + { + "epoch": 0.7499187394554769, + "grad_norm": 23179.240234375, + "learning_rate": 3.0210349006009385e-05, + "loss": 0.4127, + "step": 145350 + }, + { + "epoch": 0.7501767094380898, + "grad_norm": 25111.078125, + "learning_rate": 3.017680720469199e-05, + "loss": 0.4128, + "step": 145400 + }, + { + "epoch": 0.7504346794207026, + "grad_norm": 24289.095703125, + "learning_rate": 3.0143275984611042e-05, + "loss": 0.4167, + "step": 145450 + }, + { + "epoch": 0.7506926494033155, + "grad_norm": 22695.802734375, + "learning_rate": 3.0109755363664893e-05, + "loss": 0.4135, + "step": 145500 + }, + { + "epoch": 0.7509506193859282, + "grad_norm": 26995.833984375, + "learning_rate": 3.0076245359746163e-05, + "loss": 0.4057, + "step": 145550 + }, + { + "epoch": 0.7512085893685411, + "grad_norm": 21887.568359375, + "learning_rate": 3.004274599074185e-05, + "loss": 0.4089, + "step": 145600 + }, + { + "epoch": 0.7514665593511539, + "grad_norm": 27344.78125, + "learning_rate": 3.00092572745333e-05, + "loss": 0.4156, + "step": 145650 + }, + { + "epoch": 0.7517245293337668, + "grad_norm": 25476.15234375, + "learning_rate": 2.9975779228996104e-05, + "loss": 0.4113, + "step": 145700 + }, + { + "epoch": 0.7519824993163795, + "grad_norm": 24602.64453125, + "learning_rate": 2.9942311872000215e-05, + "loss": 0.4077, + "step": 145750 + }, + { + "epoch": 0.7522404692989924, + "grad_norm": 24124.984375, + "learning_rate": 2.990885522140985e-05, + "loss": 0.4122, + "step": 145800 + }, + { + "epoch": 0.7524984392816052, + "grad_norm": 24945.2109375, + "learning_rate": 2.987540929508354e-05, + "loss": 0.409, + "step": 145850 + }, + { + "epoch": 0.752756409264218, + "grad_norm": 26535.109375, + "learning_rate": 2.9841974110874037e-05, + "loss": 0.4132, + "step": 145900 + }, + { + "epoch": 0.7530143792468308, + "grad_norm": 21566.904296875, + "learning_rate": 2.980854968662843e-05, + "loss": 0.4073, + "step": 145950 + }, + { + "epoch": 0.7532723492294436, + "grad_norm": 22965.73828125, + "learning_rate": 2.9775136040188007e-05, + "loss": 0.4124, + "step": 146000 + }, + { + "epoch": 0.7535303192120565, + "grad_norm": 25307.123046875, + "learning_rate": 2.974173318938833e-05, + "loss": 0.4134, + "step": 146050 + }, + { + "epoch": 0.7537882891946693, + "grad_norm": 22280.431640625, + "learning_rate": 2.9708341152059226e-05, + "loss": 0.4085, + "step": 146100 + }, + { + "epoch": 0.7540462591772822, + "grad_norm": 25268.705078125, + "learning_rate": 2.9674959946024662e-05, + "loss": 0.4031, + "step": 146150 + }, + { + "epoch": 0.7543042291598949, + "grad_norm": 20014.28125, + "learning_rate": 2.9641589589102918e-05, + "loss": 0.4093, + "step": 146200 + }, + { + "epoch": 0.7545621991425078, + "grad_norm": 28430.544921875, + "learning_rate": 2.9608230099106427e-05, + "loss": 0.4112, + "step": 146250 + }, + { + "epoch": 0.7548201691251206, + "grad_norm": 21031.328125, + "learning_rate": 2.9574881493841867e-05, + "loss": 0.4084, + "step": 146300 + }, + { + "epoch": 0.7550781391077335, + "grad_norm": 24118.916015625, + "learning_rate": 2.9541543791110032e-05, + "loss": 0.4152, + "step": 146350 + }, + { + "epoch": 0.7553361090903462, + "grad_norm": 20663.740234375, + "learning_rate": 2.950821700870598e-05, + "loss": 0.409, + "step": 146400 + }, + { + "epoch": 0.7555940790729591, + "grad_norm": 23081.328125, + "learning_rate": 2.9474901164418877e-05, + "loss": 0.4089, + "step": 146450 + }, + { + "epoch": 0.7558520490555719, + "grad_norm": 24167.80859375, + "learning_rate": 2.9441596276032085e-05, + "loss": 0.4096, + "step": 146500 + }, + { + "epoch": 0.7561100190381848, + "grad_norm": 24959.595703125, + "learning_rate": 2.940830236132313e-05, + "loss": 0.4109, + "step": 146550 + }, + { + "epoch": 0.7563679890207975, + "grad_norm": 22731.36328125, + "learning_rate": 2.9375019438063622e-05, + "loss": 0.41, + "step": 146600 + }, + { + "epoch": 0.7566259590034103, + "grad_norm": 24127.41015625, + "learning_rate": 2.9341747524019368e-05, + "loss": 0.4078, + "step": 146650 + }, + { + "epoch": 0.7568839289860232, + "grad_norm": 27476.791015625, + "learning_rate": 2.9308486636950254e-05, + "loss": 0.4063, + "step": 146700 + }, + { + "epoch": 0.757141898968636, + "grad_norm": 24664.61328125, + "learning_rate": 2.9275236794610328e-05, + "loss": 0.4086, + "step": 146750 + }, + { + "epoch": 0.7573998689512488, + "grad_norm": 23883.0625, + "learning_rate": 2.9241998014747664e-05, + "loss": 0.4046, + "step": 146800 + }, + { + "epoch": 0.7576578389338616, + "grad_norm": 23431.509765625, + "learning_rate": 2.9208770315104515e-05, + "loss": 0.4054, + "step": 146850 + }, + { + "epoch": 0.7579158089164745, + "grad_norm": 25177.9453125, + "learning_rate": 2.9175553713417176e-05, + "loss": 0.4094, + "step": 146900 + }, + { + "epoch": 0.7581737788990873, + "grad_norm": 22862.201171875, + "learning_rate": 2.9142348227416e-05, + "loss": 0.4073, + "step": 146950 + }, + { + "epoch": 0.7584317488817002, + "grad_norm": 21731.240234375, + "learning_rate": 2.9109153874825478e-05, + "loss": 0.4081, + "step": 147000 + }, + { + "epoch": 0.7586897188643129, + "grad_norm": 24952.87109375, + "learning_rate": 2.9075970673364083e-05, + "loss": 0.4092, + "step": 147050 + }, + { + "epoch": 0.7589476888469258, + "grad_norm": 23138.029296875, + "learning_rate": 2.9042798640744385e-05, + "loss": 0.4051, + "step": 147100 + }, + { + "epoch": 0.7592056588295386, + "grad_norm": 21496.501953125, + "learning_rate": 2.900963779467295e-05, + "loss": 0.4096, + "step": 147150 + }, + { + "epoch": 0.7594636288121515, + "grad_norm": 22243.36328125, + "learning_rate": 2.8976488152850406e-05, + "loss": 0.3985, + "step": 147200 + }, + { + "epoch": 0.7597215987947642, + "grad_norm": 24515.029296875, + "learning_rate": 2.894334973297137e-05, + "loss": 0.4043, + "step": 147250 + }, + { + "epoch": 0.759979568777377, + "grad_norm": 23431.802734375, + "learning_rate": 2.8910222552724553e-05, + "loss": 0.4092, + "step": 147300 + }, + { + "epoch": 0.7602375387599899, + "grad_norm": 24192.44140625, + "learning_rate": 2.8877106629792515e-05, + "loss": 0.413, + "step": 147350 + }, + { + "epoch": 0.7604955087426027, + "grad_norm": 24239.015625, + "learning_rate": 2.884400198185196e-05, + "loss": 0.4064, + "step": 147400 + }, + { + "epoch": 0.7607534787252155, + "grad_norm": 22407.27734375, + "learning_rate": 2.881090862657348e-05, + "loss": 0.4086, + "step": 147450 + }, + { + "epoch": 0.7610114487078283, + "grad_norm": 24915.517578125, + "learning_rate": 2.877782658162166e-05, + "loss": 0.4067, + "step": 147500 + }, + { + "epoch": 0.7612694186904412, + "grad_norm": 23721.33984375, + "learning_rate": 2.8744755864655098e-05, + "loss": 0.4078, + "step": 147550 + }, + { + "epoch": 0.761527388673054, + "grad_norm": 23041.625, + "learning_rate": 2.8711696493326233e-05, + "loss": 0.4092, + "step": 147600 + }, + { + "epoch": 0.7617853586556669, + "grad_norm": 24021.81640625, + "learning_rate": 2.867864848528158e-05, + "loss": 0.4116, + "step": 147650 + }, + { + "epoch": 0.7620433286382796, + "grad_norm": 21309.7890625, + "learning_rate": 2.8645611858161502e-05, + "loss": 0.414, + "step": 147700 + }, + { + "epoch": 0.7623012986208925, + "grad_norm": 21959.544921875, + "learning_rate": 2.8612586629600307e-05, + "loss": 0.4113, + "step": 147750 + }, + { + "epoch": 0.7625592686035053, + "grad_norm": 22090.75, + "learning_rate": 2.857957281722623e-05, + "loss": 0.41, + "step": 147800 + }, + { + "epoch": 0.7628172385861182, + "grad_norm": 21273.6640625, + "learning_rate": 2.854657043866138e-05, + "loss": 0.4043, + "step": 147850 + }, + { + "epoch": 0.7630752085687309, + "grad_norm": 22781.33984375, + "learning_rate": 2.8513579511521825e-05, + "loss": 0.4009, + "step": 147900 + }, + { + "epoch": 0.7633331785513437, + "grad_norm": 24383.95703125, + "learning_rate": 2.8480600053417472e-05, + "loss": 0.4077, + "step": 147950 + }, + { + "epoch": 0.7635911485339566, + "grad_norm": 23988.673828125, + "learning_rate": 2.8447632081952104e-05, + "loss": 0.4048, + "step": 148000 + }, + { + "epoch": 0.7638491185165694, + "grad_norm": 24593.1484375, + "learning_rate": 2.8414675614723397e-05, + "loss": 0.4145, + "step": 148050 + }, + { + "epoch": 0.7641070884991822, + "grad_norm": 25818.216796875, + "learning_rate": 2.838173066932287e-05, + "loss": 0.408, + "step": 148100 + }, + { + "epoch": 0.764365058481795, + "grad_norm": 25780.1796875, + "learning_rate": 2.8348797263335886e-05, + "loss": 0.4109, + "step": 148150 + }, + { + "epoch": 0.7646230284644079, + "grad_norm": 22835.51171875, + "learning_rate": 2.8315875414341687e-05, + "loss": 0.4037, + "step": 148200 + }, + { + "epoch": 0.7648809984470207, + "grad_norm": 22711.501953125, + "learning_rate": 2.8282965139913296e-05, + "loss": 0.4117, + "step": 148250 + }, + { + "epoch": 0.7651389684296336, + "grad_norm": 22654.080078125, + "learning_rate": 2.825006645761758e-05, + "loss": 0.4094, + "step": 148300 + }, + { + "epoch": 0.7653969384122463, + "grad_norm": 23406.8671875, + "learning_rate": 2.821717938501526e-05, + "loss": 0.4096, + "step": 148350 + }, + { + "epoch": 0.7656549083948592, + "grad_norm": 23591.68359375, + "learning_rate": 2.8184303939660745e-05, + "loss": 0.4087, + "step": 148400 + }, + { + "epoch": 0.765912878377472, + "grad_norm": 21550.94140625, + "learning_rate": 2.815144013910237e-05, + "loss": 0.4046, + "step": 148450 + }, + { + "epoch": 0.7661708483600849, + "grad_norm": 23503.48046875, + "learning_rate": 2.8118588000882177e-05, + "loss": 0.4116, + "step": 148500 + }, + { + "epoch": 0.7664288183426976, + "grad_norm": 25247.244140625, + "learning_rate": 2.8085747542536e-05, + "loss": 0.4023, + "step": 148550 + }, + { + "epoch": 0.7666867883253105, + "grad_norm": 23665.91796875, + "learning_rate": 2.805291878159344e-05, + "loss": 0.4117, + "step": 148600 + }, + { + "epoch": 0.7669447583079233, + "grad_norm": 22785.59765625, + "learning_rate": 2.8020101735577837e-05, + "loss": 0.4084, + "step": 148650 + }, + { + "epoch": 0.7672027282905362, + "grad_norm": 20447.72265625, + "learning_rate": 2.7987296422006327e-05, + "loss": 0.4091, + "step": 148700 + }, + { + "epoch": 0.7674606982731489, + "grad_norm": 24965.869140625, + "learning_rate": 2.795450285838974e-05, + "loss": 0.4067, + "step": 148750 + }, + { + "epoch": 0.7677186682557617, + "grad_norm": 24323.09765625, + "learning_rate": 2.7921721062232637e-05, + "loss": 0.4037, + "step": 148800 + }, + { + "epoch": 0.7679766382383746, + "grad_norm": 23956.177734375, + "learning_rate": 2.7888951051033314e-05, + "loss": 0.4079, + "step": 148850 + }, + { + "epoch": 0.7682346082209874, + "grad_norm": 24222.4140625, + "learning_rate": 2.7856192842283756e-05, + "loss": 0.4112, + "step": 148900 + }, + { + "epoch": 0.7684925782036002, + "grad_norm": 24444.046875, + "learning_rate": 2.782344645346966e-05, + "loss": 0.4148, + "step": 148950 + }, + { + "epoch": 0.768750548186213, + "grad_norm": 23160.578125, + "learning_rate": 2.779071190207046e-05, + "loss": 0.4063, + "step": 149000 + }, + { + "epoch": 0.7690085181688259, + "grad_norm": 25806.732421875, + "learning_rate": 2.7757989205559142e-05, + "loss": 0.4112, + "step": 149050 + }, + { + "epoch": 0.7692664881514387, + "grad_norm": 21389.734375, + "learning_rate": 2.7725278381402524e-05, + "loss": 0.4104, + "step": 149100 + }, + { + "epoch": 0.7695244581340516, + "grad_norm": 23550.23828125, + "learning_rate": 2.769257944706098e-05, + "loss": 0.4121, + "step": 149150 + }, + { + "epoch": 0.7697824281166643, + "grad_norm": 21442.373046875, + "learning_rate": 2.765989241998854e-05, + "loss": 0.4087, + "step": 149200 + }, + { + "epoch": 0.7700403980992772, + "grad_norm": 23958.978515625, + "learning_rate": 2.7627217317632993e-05, + "loss": 0.4136, + "step": 149250 + }, + { + "epoch": 0.77029836808189, + "grad_norm": 22143.07421875, + "learning_rate": 2.759455415743556e-05, + "loss": 0.41, + "step": 149300 + }, + { + "epoch": 0.7705563380645029, + "grad_norm": 22873.86328125, + "learning_rate": 2.7561902956831294e-05, + "loss": 0.4094, + "step": 149350 + }, + { + "epoch": 0.7708143080471156, + "grad_norm": 22419.3046875, + "learning_rate": 2.7529263733248734e-05, + "loss": 0.4133, + "step": 149400 + }, + { + "epoch": 0.7710722780297284, + "grad_norm": 22167.474609375, + "learning_rate": 2.7496636504110075e-05, + "loss": 0.4181, + "step": 149450 + }, + { + "epoch": 0.7713302480123413, + "grad_norm": 25449.96875, + "learning_rate": 2.74640212868311e-05, + "loss": 0.412, + "step": 149500 + }, + { + "epoch": 0.7715882179949541, + "grad_norm": 22876.767578125, + "learning_rate": 2.7431418098821154e-05, + "loss": 0.4087, + "step": 149550 + }, + { + "epoch": 0.7718461879775669, + "grad_norm": 25600.65625, + "learning_rate": 2.7398826957483235e-05, + "loss": 0.4133, + "step": 149600 + }, + { + "epoch": 0.7721041579601797, + "grad_norm": 21764.0, + "learning_rate": 2.7366247880213834e-05, + "loss": 0.4073, + "step": 149650 + }, + { + "epoch": 0.7723621279427926, + "grad_norm": 21836.0625, + "learning_rate": 2.7333680884403046e-05, + "loss": 0.4165, + "step": 149700 + }, + { + "epoch": 0.7726200979254054, + "grad_norm": 22049.466796875, + "learning_rate": 2.7301125987434496e-05, + "loss": 0.4104, + "step": 149750 + }, + { + "epoch": 0.7728780679080183, + "grad_norm": 25398.28515625, + "learning_rate": 2.7268583206685348e-05, + "loss": 0.4036, + "step": 149800 + }, + { + "epoch": 0.773136037890631, + "grad_norm": 22303.654296875, + "learning_rate": 2.72360525595263e-05, + "loss": 0.4077, + "step": 149850 + }, + { + "epoch": 0.7733940078732439, + "grad_norm": 24734.65234375, + "learning_rate": 2.7203534063321633e-05, + "loss": 0.409, + "step": 149900 + }, + { + "epoch": 0.7736519778558567, + "grad_norm": 22068.283203125, + "learning_rate": 2.7171027735429023e-05, + "loss": 0.4148, + "step": 149950 + }, + { + "epoch": 0.7739099478384696, + "grad_norm": 23250.4921875, + "learning_rate": 2.7138533593199766e-05, + "loss": 0.4062, + "step": 150000 + }, + { + "epoch": 0.7739099478384696, + "eval_loss": 0.3953176736831665, + "eval_runtime": 3196.6561, + "eval_samples_per_second": 970.114, + "eval_steps_per_second": 1.895, + "step": 150000 + }, + { + "epoch": 0.7741679178210823, + "grad_norm": 26452.75390625, + "learning_rate": 2.710605165397859e-05, + "loss": 0.4098, + "step": 150050 + }, + { + "epoch": 0.7744258878036951, + "grad_norm": 23934.783203125, + "learning_rate": 2.707358193510371e-05, + "loss": 0.4113, + "step": 150100 + }, + { + "epoch": 0.774683857786308, + "grad_norm": 22443.591796875, + "learning_rate": 2.7041124453906884e-05, + "loss": 0.4119, + "step": 150150 + }, + { + "epoch": 0.7749418277689208, + "grad_norm": 23333.529296875, + "learning_rate": 2.7008679227713214e-05, + "loss": 0.4029, + "step": 150200 + }, + { + "epoch": 0.7751997977515336, + "grad_norm": 22431.576171875, + "learning_rate": 2.6976246273841388e-05, + "loss": 0.4045, + "step": 150250 + }, + { + "epoch": 0.7754577677341464, + "grad_norm": 26959.68359375, + "learning_rate": 2.694382560960348e-05, + "loss": 0.4072, + "step": 150300 + }, + { + "epoch": 0.7757157377167593, + "grad_norm": 21064.66015625, + "learning_rate": 2.6911417252304994e-05, + "loss": 0.411, + "step": 150350 + }, + { + "epoch": 0.7759737076993721, + "grad_norm": 23242.583984375, + "learning_rate": 2.6879021219244906e-05, + "loss": 0.4075, + "step": 150400 + }, + { + "epoch": 0.776231677681985, + "grad_norm": 24738.037109375, + "learning_rate": 2.6846637527715546e-05, + "loss": 0.4069, + "step": 150450 + }, + { + "epoch": 0.7764896476645977, + "grad_norm": 23944.759765625, + "learning_rate": 2.681426619500277e-05, + "loss": 0.403, + "step": 150500 + }, + { + "epoch": 0.7767476176472106, + "grad_norm": 22064.611328125, + "learning_rate": 2.678190723838572e-05, + "loss": 0.4045, + "step": 150550 + }, + { + "epoch": 0.7770055876298234, + "grad_norm": 24025.298828125, + "learning_rate": 2.6749560675137002e-05, + "loss": 0.4087, + "step": 150600 + }, + { + "epoch": 0.7772635576124363, + "grad_norm": 20863.119140625, + "learning_rate": 2.6717226522522553e-05, + "loss": 0.4087, + "step": 150650 + }, + { + "epoch": 0.777521527595049, + "grad_norm": 24537.642578125, + "learning_rate": 2.668490479780179e-05, + "loss": 0.4127, + "step": 150700 + }, + { + "epoch": 0.7777794975776618, + "grad_norm": 24400.193359375, + "learning_rate": 2.665259551822733e-05, + "loss": 0.4066, + "step": 150750 + }, + { + "epoch": 0.7780374675602747, + "grad_norm": 25251.81640625, + "learning_rate": 2.6620298701045322e-05, + "loss": 0.4111, + "step": 150800 + }, + { + "epoch": 0.7782954375428875, + "grad_norm": 23078.0, + "learning_rate": 2.658801436349511e-05, + "loss": 0.4109, + "step": 150850 + }, + { + "epoch": 0.7785534075255003, + "grad_norm": 20437.556640625, + "learning_rate": 2.655574252280949e-05, + "loss": 0.4096, + "step": 150900 + }, + { + "epoch": 0.7788113775081131, + "grad_norm": 24091.796875, + "learning_rate": 2.652348319621457e-05, + "loss": 0.4097, + "step": 150950 + }, + { + "epoch": 0.779069347490726, + "grad_norm": 22893.6640625, + "learning_rate": 2.6491236400929686e-05, + "loss": 0.4093, + "step": 151000 + }, + { + "epoch": 0.7793273174733388, + "grad_norm": 22871.80859375, + "learning_rate": 2.645900215416761e-05, + "loss": 0.407, + "step": 151050 + }, + { + "epoch": 0.7795852874559517, + "grad_norm": 21766.30078125, + "learning_rate": 2.642678047313435e-05, + "loss": 0.4071, + "step": 151100 + }, + { + "epoch": 0.7798432574385644, + "grad_norm": 24945.544921875, + "learning_rate": 2.639457137502919e-05, + "loss": 0.4073, + "step": 151150 + }, + { + "epoch": 0.7801012274211773, + "grad_norm": 22374.009765625, + "learning_rate": 2.636237487704475e-05, + "loss": 0.409, + "step": 151200 + }, + { + "epoch": 0.7803591974037901, + "grad_norm": 23499.08984375, + "learning_rate": 2.6330190996366875e-05, + "loss": 0.4087, + "step": 151250 + }, + { + "epoch": 0.780617167386403, + "grad_norm": 24672.017578125, + "learning_rate": 2.629801975017469e-05, + "loss": 0.4075, + "step": 151300 + }, + { + "epoch": 0.7808751373690157, + "grad_norm": 23105.05078125, + "learning_rate": 2.6265861155640626e-05, + "loss": 0.4031, + "step": 151350 + }, + { + "epoch": 0.7811331073516286, + "grad_norm": 23226.171875, + "learning_rate": 2.6233715229930282e-05, + "loss": 0.4137, + "step": 151400 + }, + { + "epoch": 0.7813910773342414, + "grad_norm": 24494.732421875, + "learning_rate": 2.620158199020255e-05, + "loss": 0.4089, + "step": 151450 + }, + { + "epoch": 0.7816490473168543, + "grad_norm": 24024.236328125, + "learning_rate": 2.616946145360952e-05, + "loss": 0.4084, + "step": 151500 + }, + { + "epoch": 0.781907017299467, + "grad_norm": 21957.2265625, + "learning_rate": 2.613735363729649e-05, + "loss": 0.4079, + "step": 151550 + }, + { + "epoch": 0.7821649872820798, + "grad_norm": 22637.291015625, + "learning_rate": 2.6105258558402056e-05, + "loss": 0.4093, + "step": 151600 + }, + { + "epoch": 0.7824229572646927, + "grad_norm": 27436.56640625, + "learning_rate": 2.607317623405787e-05, + "loss": 0.4054, + "step": 151650 + }, + { + "epoch": 0.7826809272473055, + "grad_norm": 21909.509765625, + "learning_rate": 2.6041106681388922e-05, + "loss": 0.4052, + "step": 151700 + }, + { + "epoch": 0.7829388972299183, + "grad_norm": 22887.494140625, + "learning_rate": 2.6009049917513283e-05, + "loss": 0.408, + "step": 151750 + }, + { + "epoch": 0.7831968672125311, + "grad_norm": 20771.53125, + "learning_rate": 2.5977005959542222e-05, + "loss": 0.4052, + "step": 151800 + }, + { + "epoch": 0.783454837195144, + "grad_norm": 22012.322265625, + "learning_rate": 2.5944974824580244e-05, + "loss": 0.4053, + "step": 151850 + }, + { + "epoch": 0.7837128071777568, + "grad_norm": 25365.822265625, + "learning_rate": 2.5912956529724865e-05, + "loss": 0.4141, + "step": 151900 + }, + { + "epoch": 0.7839707771603697, + "grad_norm": 23211.658203125, + "learning_rate": 2.5880951092066885e-05, + "loss": 0.4094, + "step": 151950 + }, + { + "epoch": 0.7842287471429824, + "grad_norm": 21514.79296875, + "learning_rate": 2.584895852869018e-05, + "loss": 0.4056, + "step": 152000 + }, + { + "epoch": 0.7844867171255953, + "grad_norm": 23275.76953125, + "learning_rate": 2.581697885667176e-05, + "loss": 0.4076, + "step": 152050 + }, + { + "epoch": 0.7847446871082081, + "grad_norm": 24080.478515625, + "learning_rate": 2.578501209308174e-05, + "loss": 0.409, + "step": 152100 + }, + { + "epoch": 0.785002657090821, + "grad_norm": 23384.275390625, + "learning_rate": 2.5753058254983376e-05, + "loss": 0.4063, + "step": 152150 + }, + { + "epoch": 0.7852606270734337, + "grad_norm": 22736.451171875, + "learning_rate": 2.572111735943298e-05, + "loss": 0.4054, + "step": 152200 + }, + { + "epoch": 0.7855185970560465, + "grad_norm": 24730.462890625, + "learning_rate": 2.568918942348002e-05, + "loss": 0.4074, + "step": 152250 + }, + { + "epoch": 0.7857765670386594, + "grad_norm": 23020.759765625, + "learning_rate": 2.5657274464166996e-05, + "loss": 0.4143, + "step": 152300 + }, + { + "epoch": 0.7860345370212722, + "grad_norm": 22263.357421875, + "learning_rate": 2.56253724985295e-05, + "loss": 0.4075, + "step": 152350 + }, + { + "epoch": 0.786292507003885, + "grad_norm": 23515.408203125, + "learning_rate": 2.5593483543596165e-05, + "loss": 0.4055, + "step": 152400 + }, + { + "epoch": 0.7865504769864978, + "grad_norm": 21960.447265625, + "learning_rate": 2.55616076163887e-05, + "loss": 0.407, + "step": 152450 + }, + { + "epoch": 0.7868084469691107, + "grad_norm": 26880.94140625, + "learning_rate": 2.55297447339219e-05, + "loss": 0.4029, + "step": 152500 + }, + { + "epoch": 0.7870664169517235, + "grad_norm": 22276.259765625, + "learning_rate": 2.5497894913203492e-05, + "loss": 0.4038, + "step": 152550 + }, + { + "epoch": 0.7873243869343364, + "grad_norm": 22566.541015625, + "learning_rate": 2.5466058171234336e-05, + "loss": 0.4055, + "step": 152600 + }, + { + "epoch": 0.7875823569169491, + "grad_norm": 24620.486328125, + "learning_rate": 2.543423452500826e-05, + "loss": 0.4031, + "step": 152650 + }, + { + "epoch": 0.787840326899562, + "grad_norm": 24162.99609375, + "learning_rate": 2.540242399151208e-05, + "loss": 0.4075, + "step": 152700 + }, + { + "epoch": 0.7880982968821748, + "grad_norm": 25309.958984375, + "learning_rate": 2.537062658772572e-05, + "loss": 0.4052, + "step": 152750 + }, + { + "epoch": 0.7883562668647877, + "grad_norm": 22024.390625, + "learning_rate": 2.533884233062192e-05, + "loss": 0.4036, + "step": 152800 + }, + { + "epoch": 0.7886142368474004, + "grad_norm": 22356.041015625, + "learning_rate": 2.530707123716657e-05, + "loss": 0.4065, + "step": 152850 + }, + { + "epoch": 0.7888722068300132, + "grad_norm": 22957.642578125, + "learning_rate": 2.527531332431844e-05, + "loss": 0.403, + "step": 152900 + }, + { + "epoch": 0.7891301768126261, + "grad_norm": 22161.298828125, + "learning_rate": 2.52435686090293e-05, + "loss": 0.4046, + "step": 152950 + }, + { + "epoch": 0.7893881467952389, + "grad_norm": 22849.720703125, + "learning_rate": 2.5211837108243847e-05, + "loss": 0.4045, + "step": 153000 + }, + { + "epoch": 0.7896461167778517, + "grad_norm": 25891.248046875, + "learning_rate": 2.5180118838899756e-05, + "loss": 0.4083, + "step": 153050 + }, + { + "epoch": 0.7899040867604645, + "grad_norm": 23150.634765625, + "learning_rate": 2.5148413817927598e-05, + "loss": 0.4104, + "step": 153100 + }, + { + "epoch": 0.7901620567430774, + "grad_norm": 23457.515625, + "learning_rate": 2.511672206225094e-05, + "loss": 0.4101, + "step": 153150 + }, + { + "epoch": 0.7904200267256902, + "grad_norm": 21316.8828125, + "learning_rate": 2.508504358878621e-05, + "loss": 0.4091, + "step": 153200 + }, + { + "epoch": 0.7906779967083031, + "grad_norm": 25747.87109375, + "learning_rate": 2.5053378414442748e-05, + "loss": 0.4131, + "step": 153250 + }, + { + "epoch": 0.7909359666909158, + "grad_norm": 21499.56640625, + "learning_rate": 2.502172655612286e-05, + "loss": 0.4028, + "step": 153300 + }, + { + "epoch": 0.7911939366735287, + "grad_norm": 22949.970703125, + "learning_rate": 2.499008803072162e-05, + "loss": 0.4078, + "step": 153350 + }, + { + "epoch": 0.7914519066561415, + "grad_norm": 26207.181640625, + "learning_rate": 2.495846285512714e-05, + "loss": 0.4064, + "step": 153400 + }, + { + "epoch": 0.7917098766387544, + "grad_norm": 25037.625, + "learning_rate": 2.4926851046220246e-05, + "loss": 0.4067, + "step": 153450 + }, + { + "epoch": 0.7919678466213671, + "grad_norm": 24114.482421875, + "learning_rate": 2.4895252620874775e-05, + "loss": 0.4123, + "step": 153500 + }, + { + "epoch": 0.79222581660398, + "grad_norm": 24953.568359375, + "learning_rate": 2.4863667595957325e-05, + "loss": 0.4083, + "step": 153550 + }, + { + "epoch": 0.7924837865865928, + "grad_norm": 24928.2265625, + "learning_rate": 2.483209598832736e-05, + "loss": 0.4066, + "step": 153600 + }, + { + "epoch": 0.7927417565692056, + "grad_norm": 24045.166015625, + "learning_rate": 2.4800537814837227e-05, + "loss": 0.4056, + "step": 153650 + }, + { + "epoch": 0.7929997265518184, + "grad_norm": 24591.826171875, + "learning_rate": 2.476899309233205e-05, + "loss": 0.4094, + "step": 153700 + }, + { + "epoch": 0.7932576965344312, + "grad_norm": 23336.810546875, + "learning_rate": 2.4737461837649782e-05, + "loss": 0.41, + "step": 153750 + }, + { + "epoch": 0.7935156665170441, + "grad_norm": 23454.171875, + "learning_rate": 2.4705944067621216e-05, + "loss": 0.4068, + "step": 153800 + }, + { + "epoch": 0.7937736364996569, + "grad_norm": 25322.201171875, + "learning_rate": 2.467443979906991e-05, + "loss": 0.4097, + "step": 153850 + }, + { + "epoch": 0.7940316064822697, + "grad_norm": 24731.580078125, + "learning_rate": 2.464294904881222e-05, + "loss": 0.4028, + "step": 153900 + }, + { + "epoch": 0.7942895764648825, + "grad_norm": 21753.568359375, + "learning_rate": 2.4611471833657356e-05, + "loss": 0.4148, + "step": 153950 + }, + { + "epoch": 0.7945475464474954, + "grad_norm": 26548.966796875, + "learning_rate": 2.458000817040717e-05, + "loss": 0.4074, + "step": 154000 + }, + { + "epoch": 0.7948055164301082, + "grad_norm": 21149.470703125, + "learning_rate": 2.4548558075856414e-05, + "loss": 0.408, + "step": 154050 + }, + { + "epoch": 0.7950634864127211, + "grad_norm": 25742.859375, + "learning_rate": 2.4517121566792517e-05, + "loss": 0.405, + "step": 154100 + }, + { + "epoch": 0.7953214563953338, + "grad_norm": 20954.91796875, + "learning_rate": 2.4485698659995658e-05, + "loss": 0.3975, + "step": 154150 + }, + { + "epoch": 0.7955794263779467, + "grad_norm": 23551.646484375, + "learning_rate": 2.445428937223884e-05, + "loss": 0.4059, + "step": 154200 + }, + { + "epoch": 0.7958373963605595, + "grad_norm": 25214.693359375, + "learning_rate": 2.4422893720287654e-05, + "loss": 0.4008, + "step": 154250 + }, + { + "epoch": 0.7960953663431724, + "grad_norm": 25346.916015625, + "learning_rate": 2.4391511720900545e-05, + "loss": 0.4035, + "step": 154300 + }, + { + "epoch": 0.7963533363257851, + "grad_norm": 21641.23828125, + "learning_rate": 2.43601433908286e-05, + "loss": 0.4069, + "step": 154350 + }, + { + "epoch": 0.7966113063083979, + "grad_norm": 22860.998046875, + "learning_rate": 2.4328788746815628e-05, + "loss": 0.4022, + "step": 154400 + }, + { + "epoch": 0.7968692762910108, + "grad_norm": 21989.96484375, + "learning_rate": 2.429744780559813e-05, + "loss": 0.4055, + "step": 154450 + }, + { + "epoch": 0.7971272462736236, + "grad_norm": 24413.74609375, + "learning_rate": 2.4266120583905272e-05, + "loss": 0.412, + "step": 154500 + }, + { + "epoch": 0.7973852162562364, + "grad_norm": 24805.859375, + "learning_rate": 2.4234807098458957e-05, + "loss": 0.41, + "step": 154550 + }, + { + "epoch": 0.7976431862388492, + "grad_norm": 23658.326171875, + "learning_rate": 2.42035073659737e-05, + "loss": 0.41, + "step": 154600 + }, + { + "epoch": 0.7979011562214621, + "grad_norm": 25225.228515625, + "learning_rate": 2.417222140315669e-05, + "loss": 0.4069, + "step": 154650 + }, + { + "epoch": 0.7981591262040749, + "grad_norm": 23417.3828125, + "learning_rate": 2.414094922670777e-05, + "loss": 0.4102, + "step": 154700 + }, + { + "epoch": 0.7984170961866878, + "grad_norm": 25014.5078125, + "learning_rate": 2.4109690853319422e-05, + "loss": 0.412, + "step": 154750 + }, + { + "epoch": 0.7986750661693005, + "grad_norm": 25523.3125, + "learning_rate": 2.407844629967674e-05, + "loss": 0.4102, + "step": 154800 + }, + { + "epoch": 0.7989330361519134, + "grad_norm": 23173.44921875, + "learning_rate": 2.404721558245752e-05, + "loss": 0.407, + "step": 154850 + }, + { + "epoch": 0.7991910061345262, + "grad_norm": 24673.5078125, + "learning_rate": 2.401599871833204e-05, + "loss": 0.4054, + "step": 154900 + }, + { + "epoch": 0.799448976117139, + "grad_norm": 24709.765625, + "learning_rate": 2.398479572396331e-05, + "loss": 0.4097, + "step": 154950 + }, + { + "epoch": 0.7997069460997518, + "grad_norm": 22404.29296875, + "learning_rate": 2.395360661600687e-05, + "loss": 0.4072, + "step": 155000 + }, + { + "epoch": 0.7997069460997518, + "eval_loss": 0.39372530579566956, + "eval_runtime": 3195.8879, + "eval_samples_per_second": 970.347, + "eval_steps_per_second": 1.895, + "step": 155000 + }, + { + "epoch": 0.7999649160823646, + "grad_norm": 24004.09375, + "learning_rate": 2.3922431411110834e-05, + "loss": 0.4016, + "step": 155050 + }, + { + "epoch": 0.8002228860649775, + "grad_norm": 25013.6484375, + "learning_rate": 2.3891270125915992e-05, + "loss": 0.4068, + "step": 155100 + }, + { + "epoch": 0.8004808560475903, + "grad_norm": 23532.982421875, + "learning_rate": 2.3860122777055553e-05, + "loss": 0.4036, + "step": 155150 + }, + { + "epoch": 0.8007388260302031, + "grad_norm": 27413.044921875, + "learning_rate": 2.3828989381155426e-05, + "loss": 0.4098, + "step": 155200 + }, + { + "epoch": 0.8009967960128159, + "grad_norm": 25821.794921875, + "learning_rate": 2.379786995483399e-05, + "loss": 0.4076, + "step": 155250 + }, + { + "epoch": 0.8012547659954288, + "grad_norm": 23864.154296875, + "learning_rate": 2.37667645147022e-05, + "loss": 0.4082, + "step": 155300 + }, + { + "epoch": 0.8015127359780416, + "grad_norm": 22892.451171875, + "learning_rate": 2.3735673077363534e-05, + "loss": 0.4116, + "step": 155350 + }, + { + "epoch": 0.8017707059606545, + "grad_norm": 24638.51953125, + "learning_rate": 2.3704595659413987e-05, + "loss": 0.4015, + "step": 155400 + }, + { + "epoch": 0.8020286759432672, + "grad_norm": 23007.734375, + "learning_rate": 2.3673532277442112e-05, + "loss": 0.4075, + "step": 155450 + }, + { + "epoch": 0.8022866459258801, + "grad_norm": 25629.17578125, + "learning_rate": 2.364248294802892e-05, + "loss": 0.4031, + "step": 155500 + }, + { + "epoch": 0.8025446159084929, + "grad_norm": 23949.939453125, + "learning_rate": 2.3611447687747955e-05, + "loss": 0.4091, + "step": 155550 + }, + { + "epoch": 0.8028025858911058, + "grad_norm": 23120.3515625, + "learning_rate": 2.3580426513165228e-05, + "loss": 0.4106, + "step": 155600 + }, + { + "epoch": 0.8030605558737185, + "grad_norm": 26965.955078125, + "learning_rate": 2.3549419440839236e-05, + "loss": 0.4054, + "step": 155650 + }, + { + "epoch": 0.8033185258563313, + "grad_norm": 23370.33984375, + "learning_rate": 2.3518426487320948e-05, + "loss": 0.407, + "step": 155700 + }, + { + "epoch": 0.8035764958389442, + "grad_norm": 22571.12890625, + "learning_rate": 2.3487447669153833e-05, + "loss": 0.4118, + "step": 155750 + }, + { + "epoch": 0.803834465821557, + "grad_norm": 24092.56640625, + "learning_rate": 2.3456483002873768e-05, + "loss": 0.4053, + "step": 155800 + }, + { + "epoch": 0.8040924358041698, + "grad_norm": 24549.140625, + "learning_rate": 2.3425532505009072e-05, + "loss": 0.405, + "step": 155850 + }, + { + "epoch": 0.8043504057867826, + "grad_norm": 23510.904296875, + "learning_rate": 2.3394596192080574e-05, + "loss": 0.4049, + "step": 155900 + }, + { + "epoch": 0.8046083757693955, + "grad_norm": 23147.369140625, + "learning_rate": 2.3363674080601416e-05, + "loss": 0.4032, + "step": 155950 + }, + { + "epoch": 0.8048663457520083, + "grad_norm": 21877.10546875, + "learning_rate": 2.3332766187077264e-05, + "loss": 0.4006, + "step": 156000 + }, + { + "epoch": 0.8051243157346211, + "grad_norm": 24041.384765625, + "learning_rate": 2.330187252800614e-05, + "loss": 0.4056, + "step": 156050 + }, + { + "epoch": 0.8053822857172339, + "grad_norm": 23452.453125, + "learning_rate": 2.327099311987848e-05, + "loss": 0.4071, + "step": 156100 + }, + { + "epoch": 0.8056402556998468, + "grad_norm": 23023.5859375, + "learning_rate": 2.3240127979177123e-05, + "loss": 0.4095, + "step": 156150 + }, + { + "epoch": 0.8058982256824596, + "grad_norm": 23684.615234375, + "learning_rate": 2.3209277122377255e-05, + "loss": 0.4023, + "step": 156200 + }, + { + "epoch": 0.8061561956650725, + "grad_norm": 22598.732421875, + "learning_rate": 2.31784405659465e-05, + "loss": 0.4013, + "step": 156250 + }, + { + "epoch": 0.8064141656476852, + "grad_norm": 21835.93359375, + "learning_rate": 2.3147618326344804e-05, + "loss": 0.4072, + "step": 156300 + }, + { + "epoch": 0.806672135630298, + "grad_norm": 26343.41015625, + "learning_rate": 2.311681042002448e-05, + "loss": 0.4154, + "step": 156350 + }, + { + "epoch": 0.8069301056129109, + "grad_norm": 24116.162109375, + "learning_rate": 2.3086016863430193e-05, + "loss": 0.4032, + "step": 156400 + }, + { + "epoch": 0.8071880755955237, + "grad_norm": 23874.53515625, + "learning_rate": 2.3055237672998946e-05, + "loss": 0.4063, + "step": 156450 + }, + { + "epoch": 0.8074460455781365, + "grad_norm": 25624.203125, + "learning_rate": 2.302447286516006e-05, + "loss": 0.4034, + "step": 156500 + }, + { + "epoch": 0.8077040155607493, + "grad_norm": 22652.2109375, + "learning_rate": 2.2993722456335236e-05, + "loss": 0.4049, + "step": 156550 + }, + { + "epoch": 0.8079619855433622, + "grad_norm": 26234.255859375, + "learning_rate": 2.2962986462938385e-05, + "loss": 0.4035, + "step": 156600 + }, + { + "epoch": 0.808219955525975, + "grad_norm": 24374.974609375, + "learning_rate": 2.293226490137584e-05, + "loss": 0.4052, + "step": 156650 + }, + { + "epoch": 0.8084779255085878, + "grad_norm": 24195.4296875, + "learning_rate": 2.2901557788046146e-05, + "loss": 0.4072, + "step": 156700 + }, + { + "epoch": 0.8087358954912006, + "grad_norm": 24590.525390625, + "learning_rate": 2.2870865139340165e-05, + "loss": 0.4092, + "step": 156750 + }, + { + "epoch": 0.8089938654738135, + "grad_norm": 20863.509765625, + "learning_rate": 2.2840186971641083e-05, + "loss": 0.4073, + "step": 156800 + }, + { + "epoch": 0.8092518354564263, + "grad_norm": 23662.16015625, + "learning_rate": 2.2809523301324238e-05, + "loss": 0.4101, + "step": 156850 + }, + { + "epoch": 0.8095098054390392, + "grad_norm": 21700.666015625, + "learning_rate": 2.2778874144757357e-05, + "loss": 0.4075, + "step": 156900 + }, + { + "epoch": 0.8097677754216519, + "grad_norm": 29026.71484375, + "learning_rate": 2.274823951830036e-05, + "loss": 0.4005, + "step": 156950 + }, + { + "epoch": 0.8100257454042648, + "grad_norm": 27310.48828125, + "learning_rate": 2.2717619438305397e-05, + "loss": 0.4058, + "step": 157000 + }, + { + "epoch": 0.8102837153868776, + "grad_norm": 25008.673828125, + "learning_rate": 2.2687013921116895e-05, + "loss": 0.404, + "step": 157050 + }, + { + "epoch": 0.8105416853694904, + "grad_norm": 22623.57421875, + "learning_rate": 2.2656422983071452e-05, + "loss": 0.4059, + "step": 157100 + }, + { + "epoch": 0.8107996553521032, + "grad_norm": 23960.427734375, + "learning_rate": 2.2625846640497965e-05, + "loss": 0.4096, + "step": 157150 + }, + { + "epoch": 0.811057625334716, + "grad_norm": 22415.021484375, + "learning_rate": 2.2595284909717475e-05, + "loss": 0.4061, + "step": 157200 + }, + { + "epoch": 0.8113155953173289, + "grad_norm": 23358.822265625, + "learning_rate": 2.2564737807043233e-05, + "loss": 0.4003, + "step": 157250 + }, + { + "epoch": 0.8115735652999417, + "grad_norm": 21686.9765625, + "learning_rate": 2.2534205348780702e-05, + "loss": 0.4063, + "step": 157300 + }, + { + "epoch": 0.8118315352825545, + "grad_norm": 22949.484375, + "learning_rate": 2.2503687551227504e-05, + "loss": 0.407, + "step": 157350 + }, + { + "epoch": 0.8120895052651673, + "grad_norm": 21776.201171875, + "learning_rate": 2.2473184430673444e-05, + "loss": 0.4073, + "step": 157400 + }, + { + "epoch": 0.8123474752477802, + "grad_norm": 25641.17578125, + "learning_rate": 2.244269600340055e-05, + "loss": 0.4074, + "step": 157450 + }, + { + "epoch": 0.812605445230393, + "grad_norm": 22723.42578125, + "learning_rate": 2.2412222285682867e-05, + "loss": 0.4119, + "step": 157500 + }, + { + "epoch": 0.8128634152130059, + "grad_norm": 24244.48046875, + "learning_rate": 2.2381763293786746e-05, + "loss": 0.4157, + "step": 157550 + }, + { + "epoch": 0.8131213851956186, + "grad_norm": 26826.337890625, + "learning_rate": 2.235131904397058e-05, + "loss": 0.4102, + "step": 157600 + }, + { + "epoch": 0.8133793551782315, + "grad_norm": 23157.0546875, + "learning_rate": 2.232088955248491e-05, + "loss": 0.4121, + "step": 157650 + }, + { + "epoch": 0.8136373251608443, + "grad_norm": 23352.009765625, + "learning_rate": 2.229047483557245e-05, + "loss": 0.4054, + "step": 157700 + }, + { + "epoch": 0.8138952951434572, + "grad_norm": 24417.2734375, + "learning_rate": 2.2260074909467925e-05, + "loss": 0.4092, + "step": 157750 + }, + { + "epoch": 0.8141532651260699, + "grad_norm": 22345.669921875, + "learning_rate": 2.2229689790398283e-05, + "loss": 0.402, + "step": 157800 + }, + { + "epoch": 0.8144112351086827, + "grad_norm": 22904.20703125, + "learning_rate": 2.2199319494582492e-05, + "loss": 0.4067, + "step": 157850 + }, + { + "epoch": 0.8146692050912956, + "grad_norm": 24132.306640625, + "learning_rate": 2.216896403823162e-05, + "loss": 0.4094, + "step": 157900 + }, + { + "epoch": 0.8149271750739084, + "grad_norm": 24649.001953125, + "learning_rate": 2.2138623437548833e-05, + "loss": 0.4048, + "step": 157950 + }, + { + "epoch": 0.8151851450565212, + "grad_norm": 24956.458984375, + "learning_rate": 2.210829770872933e-05, + "loss": 0.4038, + "step": 158000 + }, + { + "epoch": 0.815443115039134, + "grad_norm": 24047.3515625, + "learning_rate": 2.2077986867960437e-05, + "loss": 0.407, + "step": 158050 + }, + { + "epoch": 0.8157010850217469, + "grad_norm": 22895.953125, + "learning_rate": 2.2047690931421476e-05, + "loss": 0.4033, + "step": 158100 + }, + { + "epoch": 0.8159590550043597, + "grad_norm": 22524.640625, + "learning_rate": 2.201740991528383e-05, + "loss": 0.4136, + "step": 158150 + }, + { + "epoch": 0.8162170249869725, + "grad_norm": 22507.46875, + "learning_rate": 2.1987143835710928e-05, + "loss": 0.4043, + "step": 158200 + }, + { + "epoch": 0.8164749949695853, + "grad_norm": 24044.5390625, + "learning_rate": 2.1956892708858202e-05, + "loss": 0.4099, + "step": 158250 + }, + { + "epoch": 0.8167329649521982, + "grad_norm": 26112.05859375, + "learning_rate": 2.1926656550873103e-05, + "loss": 0.4087, + "step": 158300 + }, + { + "epoch": 0.816990934934811, + "grad_norm": 25168.59375, + "learning_rate": 2.189643537789517e-05, + "loss": 0.4059, + "step": 158350 + }, + { + "epoch": 0.8172489049174239, + "grad_norm": 31289.392578125, + "learning_rate": 2.1866229206055804e-05, + "loss": 0.4048, + "step": 158400 + }, + { + "epoch": 0.8175068749000366, + "grad_norm": 27301.970703125, + "learning_rate": 2.1836038051478508e-05, + "loss": 0.4111, + "step": 158450 + }, + { + "epoch": 0.8177648448826494, + "grad_norm": 22742.66015625, + "learning_rate": 2.180586193027877e-05, + "loss": 0.3998, + "step": 158500 + }, + { + "epoch": 0.8180228148652623, + "grad_norm": 26745.51171875, + "learning_rate": 2.177570085856395e-05, + "loss": 0.4069, + "step": 158550 + }, + { + "epoch": 0.8182807848478751, + "grad_norm": 24821.93359375, + "learning_rate": 2.1745554852433502e-05, + "loss": 0.4057, + "step": 158600 + }, + { + "epoch": 0.8185387548304879, + "grad_norm": 24082.908203125, + "learning_rate": 2.1715423927978755e-05, + "loss": 0.4042, + "step": 158650 + }, + { + "epoch": 0.8187967248131007, + "grad_norm": 23584.001953125, + "learning_rate": 2.168530810128302e-05, + "loss": 0.4062, + "step": 158700 + }, + { + "epoch": 0.8190546947957136, + "grad_norm": 25795.326171875, + "learning_rate": 2.1655207388421532e-05, + "loss": 0.4101, + "step": 158750 + }, + { + "epoch": 0.8193126647783264, + "grad_norm": 22298.908203125, + "learning_rate": 2.1625121805461483e-05, + "loss": 0.4004, + "step": 158800 + }, + { + "epoch": 0.8195706347609392, + "grad_norm": 24439.970703125, + "learning_rate": 2.1595051368461943e-05, + "loss": 0.4078, + "step": 158850 + }, + { + "epoch": 0.819828604743552, + "grad_norm": 24895.5546875, + "learning_rate": 2.1564996093473975e-05, + "loss": 0.4008, + "step": 158900 + }, + { + "epoch": 0.8200865747261649, + "grad_norm": 27615.1171875, + "learning_rate": 2.153495599654048e-05, + "loss": 0.4051, + "step": 158950 + }, + { + "epoch": 0.8203445447087777, + "grad_norm": 22537.25390625, + "learning_rate": 2.150493109369628e-05, + "loss": 0.4078, + "step": 159000 + }, + { + "epoch": 0.8206025146913906, + "grad_norm": 23422.39453125, + "learning_rate": 2.1474921400968085e-05, + "loss": 0.3999, + "step": 159050 + }, + { + "epoch": 0.8208604846740033, + "grad_norm": 24678.099609375, + "learning_rate": 2.1444926934374475e-05, + "loss": 0.4038, + "step": 159100 + }, + { + "epoch": 0.8211184546566161, + "grad_norm": 25680.623046875, + "learning_rate": 2.1414947709925963e-05, + "loss": 0.4082, + "step": 159150 + }, + { + "epoch": 0.821376424639229, + "grad_norm": 26526.724609375, + "learning_rate": 2.1384983743624813e-05, + "loss": 0.4076, + "step": 159200 + }, + { + "epoch": 0.8216343946218418, + "grad_norm": 21391.701171875, + "learning_rate": 2.1355035051465265e-05, + "loss": 0.4003, + "step": 159250 + }, + { + "epoch": 0.8218923646044546, + "grad_norm": 22676.607421875, + "learning_rate": 2.1325101649433327e-05, + "loss": 0.4087, + "step": 159300 + }, + { + "epoch": 0.8221503345870674, + "grad_norm": 23139.802734375, + "learning_rate": 2.1295183553506855e-05, + "loss": 0.4102, + "step": 159350 + }, + { + "epoch": 0.8224083045696803, + "grad_norm": 23598.369140625, + "learning_rate": 2.1265280779655593e-05, + "loss": 0.4027, + "step": 159400 + }, + { + "epoch": 0.8226662745522931, + "grad_norm": 24068.453125, + "learning_rate": 2.1235393343841008e-05, + "loss": 0.4097, + "step": 159450 + }, + { + "epoch": 0.8229242445349059, + "grad_norm": 26833.779296875, + "learning_rate": 2.1205521262016476e-05, + "loss": 0.4094, + "step": 159500 + }, + { + "epoch": 0.8231822145175187, + "grad_norm": 21122.98046875, + "learning_rate": 2.1175664550127123e-05, + "loss": 0.4074, + "step": 159550 + }, + { + "epoch": 0.8234401845001316, + "grad_norm": 24398.310546875, + "learning_rate": 2.1145823224109884e-05, + "loss": 0.4081, + "step": 159600 + }, + { + "epoch": 0.8236981544827444, + "grad_norm": 20830.05078125, + "learning_rate": 2.111599729989348e-05, + "loss": 0.4031, + "step": 159650 + }, + { + "epoch": 0.8239561244653573, + "grad_norm": 24353.29296875, + "learning_rate": 2.108618679339841e-05, + "loss": 0.4037, + "step": 159700 + }, + { + "epoch": 0.82421409444797, + "grad_norm": 22828.130859375, + "learning_rate": 2.1056391720536928e-05, + "loss": 0.4021, + "step": 159750 + }, + { + "epoch": 0.8244720644305829, + "grad_norm": 21661.53515625, + "learning_rate": 2.1026612097213106e-05, + "loss": 0.4117, + "step": 159800 + }, + { + "epoch": 0.8247300344131957, + "grad_norm": 20191.279296875, + "learning_rate": 2.0996847939322707e-05, + "loss": 0.4088, + "step": 159850 + }, + { + "epoch": 0.8249880043958085, + "grad_norm": 23767.8125, + "learning_rate": 2.0967099262753258e-05, + "loss": 0.4035, + "step": 159900 + }, + { + "epoch": 0.8252459743784213, + "grad_norm": 24693.4609375, + "learning_rate": 2.093736608338405e-05, + "loss": 0.4135, + "step": 159950 + }, + { + "epoch": 0.8255039443610341, + "grad_norm": 22759.341796875, + "learning_rate": 2.0907648417086027e-05, + "loss": 0.4048, + "step": 160000 + }, + { + "epoch": 0.8255039443610341, + "eval_loss": 0.3925068974494934, + "eval_runtime": 3187.046, + "eval_samples_per_second": 973.039, + "eval_steps_per_second": 1.901, + "step": 160000 + }, + { + "epoch": 0.825761914343647, + "grad_norm": 25066.45703125, + "learning_rate": 2.0877946279721983e-05, + "loss": 0.4017, + "step": 160050 + }, + { + "epoch": 0.8260198843262598, + "grad_norm": 24734.384765625, + "learning_rate": 2.084825968714626e-05, + "loss": 0.4091, + "step": 160100 + }, + { + "epoch": 0.8262778543088726, + "grad_norm": 26498.201171875, + "learning_rate": 2.0818588655205045e-05, + "loss": 0.4028, + "step": 160150 + }, + { + "epoch": 0.8265358242914854, + "grad_norm": 23436.36328125, + "learning_rate": 2.0788933199736143e-05, + "loss": 0.4019, + "step": 160200 + }, + { + "epoch": 0.8267937942740983, + "grad_norm": 23851.89453125, + "learning_rate": 2.075929333656904e-05, + "loss": 0.4055, + "step": 160250 + }, + { + "epoch": 0.8270517642567111, + "grad_norm": 23416.0625, + "learning_rate": 2.0729669081524977e-05, + "loss": 0.4075, + "step": 160300 + }, + { + "epoch": 0.8273097342393239, + "grad_norm": 22208.994140625, + "learning_rate": 2.070006045041673e-05, + "loss": 0.4047, + "step": 160350 + }, + { + "epoch": 0.8275677042219367, + "grad_norm": 21291.3515625, + "learning_rate": 2.067046745904888e-05, + "loss": 0.405, + "step": 160400 + }, + { + "epoch": 0.8278256742045496, + "grad_norm": 24646.279296875, + "learning_rate": 2.0640890123217565e-05, + "loss": 0.4076, + "step": 160450 + }, + { + "epoch": 0.8280836441871624, + "grad_norm": 22018.609375, + "learning_rate": 2.0611328458710595e-05, + "loss": 0.406, + "step": 160500 + }, + { + "epoch": 0.8283416141697753, + "grad_norm": 30070.40234375, + "learning_rate": 2.0581782481307415e-05, + "loss": 0.4099, + "step": 160550 + }, + { + "epoch": 0.828599584152388, + "grad_norm": 24574.34375, + "learning_rate": 2.0552252206779098e-05, + "loss": 0.4035, + "step": 160600 + }, + { + "epoch": 0.8288575541350008, + "grad_norm": 23137.224609375, + "learning_rate": 2.0522737650888313e-05, + "loss": 0.4006, + "step": 160650 + }, + { + "epoch": 0.8291155241176137, + "grad_norm": 22633.23828125, + "learning_rate": 2.0493238829389393e-05, + "loss": 0.4064, + "step": 160700 + }, + { + "epoch": 0.8293734941002265, + "grad_norm": 23670.525390625, + "learning_rate": 2.046375575802822e-05, + "loss": 0.4084, + "step": 160750 + }, + { + "epoch": 0.8296314640828393, + "grad_norm": 24236.7890625, + "learning_rate": 2.043428845254229e-05, + "loss": 0.413, + "step": 160800 + }, + { + "epoch": 0.8298894340654521, + "grad_norm": 25734.12890625, + "learning_rate": 2.0404836928660676e-05, + "loss": 0.3992, + "step": 160850 + }, + { + "epoch": 0.830147404048065, + "grad_norm": 23417.83203125, + "learning_rate": 2.037540120210401e-05, + "loss": 0.4069, + "step": 160900 + }, + { + "epoch": 0.8304053740306778, + "grad_norm": 24619.853515625, + "learning_rate": 2.0345981288584575e-05, + "loss": 0.4002, + "step": 160950 + }, + { + "epoch": 0.8306633440132906, + "grad_norm": 21862.111328125, + "learning_rate": 2.031657720380608e-05, + "loss": 0.4012, + "step": 161000 + }, + { + "epoch": 0.8309213139959034, + "grad_norm": 23347.91015625, + "learning_rate": 2.0287188963463906e-05, + "loss": 0.4061, + "step": 161050 + }, + { + "epoch": 0.8311792839785163, + "grad_norm": 25119.107421875, + "learning_rate": 2.02578165832449e-05, + "loss": 0.4061, + "step": 161100 + }, + { + "epoch": 0.8314372539611291, + "grad_norm": 22684.50390625, + "learning_rate": 2.0228460078827466e-05, + "loss": 0.4062, + "step": 161150 + }, + { + "epoch": 0.831695223943742, + "grad_norm": 39309.30859375, + "learning_rate": 2.0199119465881565e-05, + "loss": 0.4091, + "step": 161200 + }, + { + "epoch": 0.8319531939263547, + "grad_norm": 22076.8125, + "learning_rate": 2.0169794760068632e-05, + "loss": 0.4052, + "step": 161250 + }, + { + "epoch": 0.8322111639089675, + "grad_norm": 26682.44140625, + "learning_rate": 2.0140485977041636e-05, + "loss": 0.405, + "step": 161300 + }, + { + "epoch": 0.8324691338915804, + "grad_norm": 24586.09375, + "learning_rate": 2.011119313244502e-05, + "loss": 0.4066, + "step": 161350 + }, + { + "epoch": 0.8327271038741932, + "grad_norm": 26363.5703125, + "learning_rate": 2.008191624191475e-05, + "loss": 0.4027, + "step": 161400 + }, + { + "epoch": 0.832985073856806, + "grad_norm": 24361.9921875, + "learning_rate": 2.0052655321078246e-05, + "loss": 0.4041, + "step": 161450 + }, + { + "epoch": 0.8332430438394188, + "grad_norm": 22026.951171875, + "learning_rate": 2.0023410385554466e-05, + "loss": 0.4068, + "step": 161500 + }, + { + "epoch": 0.8335010138220317, + "grad_norm": 24540.068359375, + "learning_rate": 1.9994181450953725e-05, + "loss": 0.4036, + "step": 161550 + }, + { + "epoch": 0.8337589838046445, + "grad_norm": 25837.857421875, + "learning_rate": 1.9964968532877916e-05, + "loss": 0.4052, + "step": 161600 + }, + { + "epoch": 0.8340169537872573, + "grad_norm": 23252.900390625, + "learning_rate": 1.993577164692031e-05, + "loss": 0.4021, + "step": 161650 + }, + { + "epoch": 0.8342749237698701, + "grad_norm": 25305.177734375, + "learning_rate": 1.990659080866562e-05, + "loss": 0.4089, + "step": 161700 + }, + { + "epoch": 0.834532893752483, + "grad_norm": 25317.89453125, + "learning_rate": 1.9877426033690066e-05, + "loss": 0.4082, + "step": 161750 + }, + { + "epoch": 0.8347908637350958, + "grad_norm": 25872.2109375, + "learning_rate": 1.984827733756117e-05, + "loss": 0.4021, + "step": 161800 + }, + { + "epoch": 0.8350488337177087, + "grad_norm": 23915.955078125, + "learning_rate": 1.9819144735837998e-05, + "loss": 0.4054, + "step": 161850 + }, + { + "epoch": 0.8353068037003214, + "grad_norm": 25145.380859375, + "learning_rate": 1.9790028244070946e-05, + "loss": 0.4119, + "step": 161900 + }, + { + "epoch": 0.8355647736829342, + "grad_norm": 24318.28125, + "learning_rate": 1.976092787780184e-05, + "loss": 0.4015, + "step": 161950 + }, + { + "epoch": 0.8358227436655471, + "grad_norm": 22675.845703125, + "learning_rate": 1.973184365256388e-05, + "loss": 0.4107, + "step": 162000 + }, + { + "epoch": 0.83608071364816, + "grad_norm": 23785.451171875, + "learning_rate": 1.9702775583881656e-05, + "loss": 0.408, + "step": 162050 + }, + { + "epoch": 0.8363386836307727, + "grad_norm": 22790.47265625, + "learning_rate": 1.9673723687271174e-05, + "loss": 0.406, + "step": 162100 + }, + { + "epoch": 0.8365966536133855, + "grad_norm": 24380.498046875, + "learning_rate": 1.9644687978239746e-05, + "loss": 0.4105, + "step": 162150 + }, + { + "epoch": 0.8368546235959984, + "grad_norm": 23812.814453125, + "learning_rate": 1.9615668472286085e-05, + "loss": 0.4032, + "step": 162200 + }, + { + "epoch": 0.8371125935786112, + "grad_norm": 22820.734375, + "learning_rate": 1.9586665184900232e-05, + "loss": 0.4072, + "step": 162250 + }, + { + "epoch": 0.837370563561224, + "grad_norm": 22347.779296875, + "learning_rate": 1.955767813156359e-05, + "loss": 0.4045, + "step": 162300 + }, + { + "epoch": 0.8376285335438368, + "grad_norm": 24328.546875, + "learning_rate": 1.9528707327748852e-05, + "loss": 0.4097, + "step": 162350 + }, + { + "epoch": 0.8378865035264497, + "grad_norm": 23850.13671875, + "learning_rate": 1.9499752788920146e-05, + "loss": 0.4085, + "step": 162400 + }, + { + "epoch": 0.8381444735090625, + "grad_norm": 24967.3203125, + "learning_rate": 1.9470814530532756e-05, + "loss": 0.4056, + "step": 162450 + }, + { + "epoch": 0.8384024434916753, + "grad_norm": 23740.197265625, + "learning_rate": 1.9441892568033426e-05, + "loss": 0.4112, + "step": 162500 + }, + { + "epoch": 0.8386604134742881, + "grad_norm": 26039.447265625, + "learning_rate": 1.941298691686012e-05, + "loss": 0.405, + "step": 162550 + }, + { + "epoch": 0.838918383456901, + "grad_norm": 22781.23828125, + "learning_rate": 1.9384097592442102e-05, + "loss": 0.4043, + "step": 162600 + }, + { + "epoch": 0.8391763534395138, + "grad_norm": 25735.17578125, + "learning_rate": 1.935522461019998e-05, + "loss": 0.4021, + "step": 162650 + }, + { + "epoch": 0.8394343234221266, + "grad_norm": 26452.810546875, + "learning_rate": 1.932636798554552e-05, + "loss": 0.4093, + "step": 162700 + }, + { + "epoch": 0.8396922934047394, + "grad_norm": 24199.3515625, + "learning_rate": 1.929752773388189e-05, + "loss": 0.4003, + "step": 162750 + }, + { + "epoch": 0.8399502633873522, + "grad_norm": 27610.30859375, + "learning_rate": 1.9268703870603434e-05, + "loss": 0.4035, + "step": 162800 + }, + { + "epoch": 0.8402082333699651, + "grad_norm": 23799.3359375, + "learning_rate": 1.9239896411095777e-05, + "loss": 0.4072, + "step": 162850 + }, + { + "epoch": 0.8404662033525779, + "grad_norm": 24182.162109375, + "learning_rate": 1.9211105370735784e-05, + "loss": 0.4056, + "step": 162900 + }, + { + "epoch": 0.8407241733351907, + "grad_norm": 21251.0625, + "learning_rate": 1.918233076489153e-05, + "loss": 0.4073, + "step": 162950 + }, + { + "epoch": 0.8409821433178035, + "grad_norm": 22723.09765625, + "learning_rate": 1.9153572608922383e-05, + "loss": 0.4041, + "step": 163000 + }, + { + "epoch": 0.8412401133004164, + "grad_norm": 23557.125, + "learning_rate": 1.9124830918178876e-05, + "loss": 0.4064, + "step": 163050 + }, + { + "epoch": 0.8414980832830292, + "grad_norm": 24273.71484375, + "learning_rate": 1.9096105708002754e-05, + "loss": 0.4072, + "step": 163100 + }, + { + "epoch": 0.841756053265642, + "grad_norm": 24078.10546875, + "learning_rate": 1.9067396993726994e-05, + "loss": 0.409, + "step": 163150 + }, + { + "epoch": 0.8420140232482548, + "grad_norm": 23370.31640625, + "learning_rate": 1.9038704790675738e-05, + "loss": 0.4082, + "step": 163200 + }, + { + "epoch": 0.8422719932308677, + "grad_norm": 23478.564453125, + "learning_rate": 1.901002911416432e-05, + "loss": 0.4082, + "step": 163250 + }, + { + "epoch": 0.8425299632134805, + "grad_norm": 22697.802734375, + "learning_rate": 1.898136997949929e-05, + "loss": 0.4107, + "step": 163300 + }, + { + "epoch": 0.8427879331960934, + "grad_norm": 25571.9765625, + "learning_rate": 1.8952727401978326e-05, + "loss": 0.3996, + "step": 163350 + }, + { + "epoch": 0.8430459031787061, + "grad_norm": 24950.283203125, + "learning_rate": 1.8924101396890264e-05, + "loss": 0.403, + "step": 163400 + }, + { + "epoch": 0.8433038731613189, + "grad_norm": 22436.380859375, + "learning_rate": 1.8895491979515162e-05, + "loss": 0.4041, + "step": 163450 + }, + { + "epoch": 0.8435618431439318, + "grad_norm": 25954.529296875, + "learning_rate": 1.8866899165124097e-05, + "loss": 0.4003, + "step": 163500 + }, + { + "epoch": 0.8438198131265446, + "grad_norm": 21477.8828125, + "learning_rate": 1.883832296897944e-05, + "loss": 0.4063, + "step": 163550 + }, + { + "epoch": 0.8440777831091574, + "grad_norm": 24669.7890625, + "learning_rate": 1.8809763406334535e-05, + "loss": 0.4049, + "step": 163600 + }, + { + "epoch": 0.8443357530917702, + "grad_norm": 27181.50390625, + "learning_rate": 1.878122049243398e-05, + "loss": 0.4007, + "step": 163650 + }, + { + "epoch": 0.8445937230743831, + "grad_norm": 25191.591796875, + "learning_rate": 1.8752694242513408e-05, + "loss": 0.4072, + "step": 163700 + }, + { + "epoch": 0.8448516930569959, + "grad_norm": 24557.42578125, + "learning_rate": 1.872418467179956e-05, + "loss": 0.4043, + "step": 163750 + }, + { + "epoch": 0.8451096630396087, + "grad_norm": 25135.6328125, + "learning_rate": 1.8695691795510335e-05, + "loss": 0.4008, + "step": 163800 + }, + { + "epoch": 0.8453676330222215, + "grad_norm": 23372.181640625, + "learning_rate": 1.8667215628854656e-05, + "loss": 0.4073, + "step": 163850 + }, + { + "epoch": 0.8456256030048344, + "grad_norm": 23332.65625, + "learning_rate": 1.8638756187032554e-05, + "loss": 0.3987, + "step": 163900 + }, + { + "epoch": 0.8458835729874472, + "grad_norm": 23423.669921875, + "learning_rate": 1.861031348523512e-05, + "loss": 0.4066, + "step": 163950 + }, + { + "epoch": 0.8461415429700601, + "grad_norm": 25873.208984375, + "learning_rate": 1.858188753864452e-05, + "loss": 0.4015, + "step": 164000 + }, + { + "epoch": 0.8463995129526728, + "grad_norm": 24766.4140625, + "learning_rate": 1.8553478362433964e-05, + "loss": 0.4076, + "step": 164050 + }, + { + "epoch": 0.8466574829352856, + "grad_norm": 25044.45703125, + "learning_rate": 1.852508597176776e-05, + "loss": 0.3972, + "step": 164100 + }, + { + "epoch": 0.8469154529178985, + "grad_norm": 23699.478515625, + "learning_rate": 1.8496710381801157e-05, + "loss": 0.3953, + "step": 164150 + }, + { + "epoch": 0.8471734229005113, + "grad_norm": 22853.53125, + "learning_rate": 1.8468351607680546e-05, + "loss": 0.4095, + "step": 164200 + }, + { + "epoch": 0.8474313928831241, + "grad_norm": 21374.96875, + "learning_rate": 1.8440009664543267e-05, + "loss": 0.4092, + "step": 164250 + }, + { + "epoch": 0.8476893628657369, + "grad_norm": 22454.515625, + "learning_rate": 1.8411684567517694e-05, + "loss": 0.4005, + "step": 164300 + }, + { + "epoch": 0.8479473328483498, + "grad_norm": 23134.24609375, + "learning_rate": 1.8383376331723258e-05, + "loss": 0.4041, + "step": 164350 + }, + { + "epoch": 0.8482053028309626, + "grad_norm": 23000.69921875, + "learning_rate": 1.835508497227028e-05, + "loss": 0.4056, + "step": 164400 + }, + { + "epoch": 0.8484632728135754, + "grad_norm": 23213.333984375, + "learning_rate": 1.8326810504260194e-05, + "loss": 0.4076, + "step": 164450 + }, + { + "epoch": 0.8487212427961882, + "grad_norm": 24883.953125, + "learning_rate": 1.8298552942785353e-05, + "loss": 0.4023, + "step": 164500 + }, + { + "epoch": 0.8489792127788011, + "grad_norm": 23075.015625, + "learning_rate": 1.827031230292908e-05, + "loss": 0.4095, + "step": 164550 + }, + { + "epoch": 0.8492371827614139, + "grad_norm": 24055.23828125, + "learning_rate": 1.824208859976569e-05, + "loss": 0.4034, + "step": 164600 + }, + { + "epoch": 0.8494951527440268, + "grad_norm": 24572.919921875, + "learning_rate": 1.8213881848360438e-05, + "loss": 0.4106, + "step": 164650 + }, + { + "epoch": 0.8497531227266395, + "grad_norm": 26111.40234375, + "learning_rate": 1.8185692063769566e-05, + "loss": 0.4051, + "step": 164700 + }, + { + "epoch": 0.8500110927092523, + "grad_norm": 22763.25, + "learning_rate": 1.8157519261040222e-05, + "loss": 0.4019, + "step": 164750 + }, + { + "epoch": 0.8502690626918652, + "grad_norm": 22230.16796875, + "learning_rate": 1.8129363455210503e-05, + "loss": 0.4085, + "step": 164800 + }, + { + "epoch": 0.850527032674478, + "grad_norm": 24729.40234375, + "learning_rate": 1.8101224661309435e-05, + "loss": 0.4042, + "step": 164850 + }, + { + "epoch": 0.8507850026570908, + "grad_norm": 23329.431640625, + "learning_rate": 1.807310289435696e-05, + "loss": 0.405, + "step": 164900 + }, + { + "epoch": 0.8510429726397036, + "grad_norm": 24267.970703125, + "learning_rate": 1.8044998169363908e-05, + "loss": 0.406, + "step": 164950 + }, + { + "epoch": 0.8513009426223165, + "grad_norm": 23587.689453125, + "learning_rate": 1.80169105013321e-05, + "loss": 0.4069, + "step": 165000 + }, + { + "epoch": 0.8513009426223165, + "eval_loss": 0.3912332057952881, + "eval_runtime": 3189.1337, + "eval_samples_per_second": 972.402, + "eval_steps_per_second": 1.899, + "step": 165000 + }, + { + "epoch": 0.8515589126049293, + "grad_norm": 23356.634765625, + "learning_rate": 1.798883990525412e-05, + "loss": 0.4022, + "step": 165050 + }, + { + "epoch": 0.8518168825875421, + "grad_norm": 23850.75, + "learning_rate": 1.7960786396113542e-05, + "loss": 0.3984, + "step": 165100 + }, + { + "epoch": 0.8520748525701549, + "grad_norm": 23898.03125, + "learning_rate": 1.7932749988884795e-05, + "loss": 0.4035, + "step": 165150 + }, + { + "epoch": 0.8523328225527678, + "grad_norm": 23517.4453125, + "learning_rate": 1.790473069853314e-05, + "loss": 0.4061, + "step": 165200 + }, + { + "epoch": 0.8525907925353806, + "grad_norm": 24264.568359375, + "learning_rate": 1.787672854001478e-05, + "loss": 0.4076, + "step": 165250 + }, + { + "epoch": 0.8528487625179934, + "grad_norm": 23741.220703125, + "learning_rate": 1.7848743528276663e-05, + "loss": 0.4063, + "step": 165300 + }, + { + "epoch": 0.8531067325006062, + "grad_norm": 25368.697265625, + "learning_rate": 1.782077567825669e-05, + "loss": 0.4027, + "step": 165350 + }, + { + "epoch": 0.853364702483219, + "grad_norm": 21610.12890625, + "learning_rate": 1.779282500488355e-05, + "loss": 0.4067, + "step": 165400 + }, + { + "epoch": 0.8536226724658319, + "grad_norm": 26066.560546875, + "learning_rate": 1.7764891523076766e-05, + "loss": 0.4091, + "step": 165450 + }, + { + "epoch": 0.8538806424484447, + "grad_norm": 22909.5234375, + "learning_rate": 1.773697524774669e-05, + "loss": 0.4035, + "step": 165500 + }, + { + "epoch": 0.8541386124310575, + "grad_norm": 23672.54296875, + "learning_rate": 1.7709076193794478e-05, + "loss": 0.407, + "step": 165550 + }, + { + "epoch": 0.8543965824136703, + "grad_norm": 22466.203125, + "learning_rate": 1.7681194376112125e-05, + "loss": 0.4057, + "step": 165600 + }, + { + "epoch": 0.8546545523962832, + "grad_norm": 23236.4296875, + "learning_rate": 1.7653329809582404e-05, + "loss": 0.4058, + "step": 165650 + }, + { + "epoch": 0.854912522378896, + "grad_norm": 23181.5, + "learning_rate": 1.7625482509078873e-05, + "loss": 0.4007, + "step": 165700 + }, + { + "epoch": 0.8551704923615088, + "grad_norm": 20621.5, + "learning_rate": 1.7597652489465877e-05, + "loss": 0.4053, + "step": 165750 + }, + { + "epoch": 0.8554284623441216, + "grad_norm": 23911.7734375, + "learning_rate": 1.756983976559855e-05, + "loss": 0.4043, + "step": 165800 + }, + { + "epoch": 0.8556864323267345, + "grad_norm": 21440.978515625, + "learning_rate": 1.7542044352322768e-05, + "loss": 0.4076, + "step": 165850 + }, + { + "epoch": 0.8559444023093473, + "grad_norm": 22439.712890625, + "learning_rate": 1.7514266264475233e-05, + "loss": 0.3999, + "step": 165900 + }, + { + "epoch": 0.8562023722919601, + "grad_norm": 24814.876953125, + "learning_rate": 1.748650551688328e-05, + "loss": 0.405, + "step": 165950 + }, + { + "epoch": 0.8564603422745729, + "grad_norm": 21705.185546875, + "learning_rate": 1.7458762124365096e-05, + "loss": 0.4007, + "step": 166000 + }, + { + "epoch": 0.8567183122571858, + "grad_norm": 25317.05078125, + "learning_rate": 1.7431036101729604e-05, + "loss": 0.4036, + "step": 166050 + }, + { + "epoch": 0.8569762822397986, + "grad_norm": 23984.142578125, + "learning_rate": 1.7403327463776343e-05, + "loss": 0.4027, + "step": 166100 + }, + { + "epoch": 0.8572342522224115, + "grad_norm": 24149.794921875, + "learning_rate": 1.7375636225295716e-05, + "loss": 0.3986, + "step": 166150 + }, + { + "epoch": 0.8574922222050242, + "grad_norm": 20085.748046875, + "learning_rate": 1.73479624010687e-05, + "loss": 0.4032, + "step": 166200 + }, + { + "epoch": 0.857750192187637, + "grad_norm": 25550.01171875, + "learning_rate": 1.732030600586711e-05, + "loss": 0.4067, + "step": 166250 + }, + { + "epoch": 0.8580081621702499, + "grad_norm": 23439.69921875, + "learning_rate": 1.7292667054453364e-05, + "loss": 0.4058, + "step": 166300 + }, + { + "epoch": 0.8582661321528627, + "grad_norm": 24064.46484375, + "learning_rate": 1.7265045561580606e-05, + "loss": 0.406, + "step": 166350 + }, + { + "epoch": 0.8585241021354755, + "grad_norm": 27679.162109375, + "learning_rate": 1.723744154199264e-05, + "loss": 0.403, + "step": 166400 + }, + { + "epoch": 0.8587820721180883, + "grad_norm": 21371.59765625, + "learning_rate": 1.7209855010423977e-05, + "loss": 0.4103, + "step": 166450 + }, + { + "epoch": 0.8590400421007012, + "grad_norm": 24340.283203125, + "learning_rate": 1.7182285981599766e-05, + "loss": 0.4073, + "step": 166500 + }, + { + "epoch": 0.859298012083314, + "grad_norm": 22603.62109375, + "learning_rate": 1.7154734470235823e-05, + "loss": 0.4026, + "step": 166550 + }, + { + "epoch": 0.8595559820659268, + "grad_norm": 21442.248046875, + "learning_rate": 1.7127200491038607e-05, + "loss": 0.4089, + "step": 166600 + }, + { + "epoch": 0.8598139520485396, + "grad_norm": 22127.478515625, + "learning_rate": 1.7099684058705212e-05, + "loss": 0.4073, + "step": 166650 + }, + { + "epoch": 0.8600719220311525, + "grad_norm": 37660.0859375, + "learning_rate": 1.707218518792342e-05, + "loss": 0.404, + "step": 166700 + }, + { + "epoch": 0.8603298920137653, + "grad_norm": 23772.982421875, + "learning_rate": 1.704470389337153e-05, + "loss": 0.4004, + "step": 166750 + }, + { + "epoch": 0.8605878619963782, + "grad_norm": 24957.23828125, + "learning_rate": 1.7017240189718575e-05, + "loss": 0.4025, + "step": 166800 + }, + { + "epoch": 0.8608458319789909, + "grad_norm": 25014.044921875, + "learning_rate": 1.6989794091624138e-05, + "loss": 0.4037, + "step": 166850 + }, + { + "epoch": 0.8611038019616037, + "grad_norm": 23370.162109375, + "learning_rate": 1.696236561373839e-05, + "loss": 0.4043, + "step": 166900 + }, + { + "epoch": 0.8613617719442166, + "grad_norm": 25212.830078125, + "learning_rate": 1.693495477070217e-05, + "loss": 0.3997, + "step": 166950 + }, + { + "epoch": 0.8616197419268294, + "grad_norm": 22828.701171875, + "learning_rate": 1.69075615771468e-05, + "loss": 0.4063, + "step": 167000 + }, + { + "epoch": 0.8618777119094422, + "grad_norm": 23862.4375, + "learning_rate": 1.6880186047694274e-05, + "loss": 0.4044, + "step": 167050 + }, + { + "epoch": 0.862135681892055, + "grad_norm": 25248.44140625, + "learning_rate": 1.685282819695711e-05, + "loss": 0.4072, + "step": 167100 + }, + { + "epoch": 0.8623936518746679, + "grad_norm": 24765.2421875, + "learning_rate": 1.68254880395384e-05, + "loss": 0.4055, + "step": 167150 + }, + { + "epoch": 0.8626516218572807, + "grad_norm": 22687.32421875, + "learning_rate": 1.6798165590031783e-05, + "loss": 0.4076, + "step": 167200 + }, + { + "epoch": 0.8629095918398935, + "grad_norm": 28427.16015625, + "learning_rate": 1.677086086302146e-05, + "loss": 0.3985, + "step": 167250 + }, + { + "epoch": 0.8631675618225063, + "grad_norm": 24114.146484375, + "learning_rate": 1.6743573873082147e-05, + "loss": 0.3993, + "step": 167300 + }, + { + "epoch": 0.8634255318051192, + "grad_norm": 22007.857421875, + "learning_rate": 1.6716304634779144e-05, + "loss": 0.4054, + "step": 167350 + }, + { + "epoch": 0.863683501787732, + "grad_norm": 24888.619140625, + "learning_rate": 1.6689053162668226e-05, + "loss": 0.3983, + "step": 167400 + }, + { + "epoch": 0.8639414717703447, + "grad_norm": 23306.1640625, + "learning_rate": 1.6661819471295704e-05, + "loss": 0.3985, + "step": 167450 + }, + { + "epoch": 0.8641994417529576, + "grad_norm": 25983.62109375, + "learning_rate": 1.6634603575198387e-05, + "loss": 0.4033, + "step": 167500 + }, + { + "epoch": 0.8644574117355704, + "grad_norm": 21851.826171875, + "learning_rate": 1.6607405488903582e-05, + "loss": 0.4067, + "step": 167550 + }, + { + "epoch": 0.8647153817181833, + "grad_norm": 23041.548828125, + "learning_rate": 1.6580225226929152e-05, + "loss": 0.4054, + "step": 167600 + }, + { + "epoch": 0.8649733517007961, + "grad_norm": 24893.72265625, + "learning_rate": 1.655306280378333e-05, + "loss": 0.4081, + "step": 167650 + }, + { + "epoch": 0.8652313216834089, + "grad_norm": 24462.869140625, + "learning_rate": 1.6525918233964933e-05, + "loss": 0.4093, + "step": 167700 + }, + { + "epoch": 0.8654892916660217, + "grad_norm": 20188.037109375, + "learning_rate": 1.6498791531963197e-05, + "loss": 0.3986, + "step": 167750 + }, + { + "epoch": 0.8657472616486346, + "grad_norm": 24806.51171875, + "learning_rate": 1.6471682712257812e-05, + "loss": 0.3988, + "step": 167800 + }, + { + "epoch": 0.8660052316312474, + "grad_norm": 21647.11328125, + "learning_rate": 1.6444591789318992e-05, + "loss": 0.4083, + "step": 167850 + }, + { + "epoch": 0.8662632016138602, + "grad_norm": 22894.3515625, + "learning_rate": 1.6417518777607277e-05, + "loss": 0.4004, + "step": 167900 + }, + { + "epoch": 0.866521171596473, + "grad_norm": 23173.974609375, + "learning_rate": 1.6390463691573765e-05, + "loss": 0.409, + "step": 167950 + }, + { + "epoch": 0.8667791415790859, + "grad_norm": 24268.001953125, + "learning_rate": 1.6363426545659927e-05, + "loss": 0.4021, + "step": 168000 + }, + { + "epoch": 0.8670371115616987, + "grad_norm": 23466.482421875, + "learning_rate": 1.6336407354297667e-05, + "loss": 0.4067, + "step": 168050 + }, + { + "epoch": 0.8672950815443115, + "grad_norm": 22965.560546875, + "learning_rate": 1.6309406131909298e-05, + "loss": 0.4127, + "step": 168100 + }, + { + "epoch": 0.8675530515269243, + "grad_norm": 22818.5859375, + "learning_rate": 1.6282422892907563e-05, + "loss": 0.4107, + "step": 168150 + }, + { + "epoch": 0.8678110215095372, + "grad_norm": 23358.80859375, + "learning_rate": 1.6255457651695565e-05, + "loss": 0.3985, + "step": 168200 + }, + { + "epoch": 0.86806899149215, + "grad_norm": 24952.044921875, + "learning_rate": 1.6228510422666865e-05, + "loss": 0.4021, + "step": 168250 + }, + { + "epoch": 0.8683269614747628, + "grad_norm": 23554.359375, + "learning_rate": 1.6201581220205353e-05, + "loss": 0.4091, + "step": 168300 + }, + { + "epoch": 0.8685849314573756, + "grad_norm": 23862.92578125, + "learning_rate": 1.6174670058685316e-05, + "loss": 0.4009, + "step": 168350 + }, + { + "epoch": 0.8688429014399884, + "grad_norm": 23549.693359375, + "learning_rate": 1.6147776952471415e-05, + "loss": 0.4062, + "step": 168400 + }, + { + "epoch": 0.8691008714226013, + "grad_norm": 25237.26953125, + "learning_rate": 1.612090191591865e-05, + "loss": 0.4009, + "step": 168450 + }, + { + "epoch": 0.8693588414052141, + "grad_norm": 24368.298828125, + "learning_rate": 1.6094044963372444e-05, + "loss": 0.4052, + "step": 168500 + }, + { + "epoch": 0.8696168113878269, + "grad_norm": 24438.0, + "learning_rate": 1.6067206109168453e-05, + "loss": 0.4077, + "step": 168550 + }, + { + "epoch": 0.8698747813704397, + "grad_norm": 30002.744140625, + "learning_rate": 1.6040385367632786e-05, + "loss": 0.4029, + "step": 168600 + }, + { + "epoch": 0.8701327513530526, + "grad_norm": 24591.333984375, + "learning_rate": 1.6013582753081824e-05, + "loss": 0.4019, + "step": 168650 + }, + { + "epoch": 0.8703907213356654, + "grad_norm": 24005.166015625, + "learning_rate": 1.5986798279822263e-05, + "loss": 0.4046, + "step": 168700 + }, + { + "epoch": 0.8706486913182782, + "grad_norm": 22198.482421875, + "learning_rate": 1.5960031962151167e-05, + "loss": 0.4003, + "step": 168750 + }, + { + "epoch": 0.870906661300891, + "grad_norm": 23392.919921875, + "learning_rate": 1.5933283814355872e-05, + "loss": 0.4039, + "step": 168800 + }, + { + "epoch": 0.8711646312835039, + "grad_norm": 26185.88671875, + "learning_rate": 1.5906553850714003e-05, + "loss": 0.4044, + "step": 168850 + }, + { + "epoch": 0.8714226012661167, + "grad_norm": 34066.59765625, + "learning_rate": 1.5879842085493514e-05, + "loss": 0.4068, + "step": 168900 + }, + { + "epoch": 0.8716805712487296, + "grad_norm": 21913.802734375, + "learning_rate": 1.5853148532952616e-05, + "loss": 0.4083, + "step": 168950 + }, + { + "epoch": 0.8719385412313423, + "grad_norm": 22491.25390625, + "learning_rate": 1.5826473207339802e-05, + "loss": 0.4037, + "step": 169000 + }, + { + "epoch": 0.8721965112139551, + "grad_norm": 23891.447265625, + "learning_rate": 1.579981612289389e-05, + "loss": 0.4033, + "step": 169050 + }, + { + "epoch": 0.872454481196568, + "grad_norm": 24374.109375, + "learning_rate": 1.5773177293843855e-05, + "loss": 0.41, + "step": 169100 + }, + { + "epoch": 0.8727124511791808, + "grad_norm": 24323.197265625, + "learning_rate": 1.574655673440903e-05, + "loss": 0.3999, + "step": 169150 + }, + { + "epoch": 0.8729704211617936, + "grad_norm": 22040.76171875, + "learning_rate": 1.5719954458798943e-05, + "loss": 0.3997, + "step": 169200 + }, + { + "epoch": 0.8732283911444064, + "grad_norm": 32067.173828125, + "learning_rate": 1.5693370481213355e-05, + "loss": 0.4028, + "step": 169250 + }, + { + "epoch": 0.8734863611270193, + "grad_norm": 27840.97265625, + "learning_rate": 1.5666804815842322e-05, + "loss": 0.4082, + "step": 169300 + }, + { + "epoch": 0.8737443311096321, + "grad_norm": 23976.154296875, + "learning_rate": 1.5640257476866033e-05, + "loss": 0.4075, + "step": 169350 + }, + { + "epoch": 0.8740023010922449, + "grad_norm": 22856.724609375, + "learning_rate": 1.5613728478454976e-05, + "loss": 0.4033, + "step": 169400 + }, + { + "epoch": 0.8742602710748577, + "grad_norm": 22639.69140625, + "learning_rate": 1.5587217834769803e-05, + "loss": 0.4052, + "step": 169450 + }, + { + "epoch": 0.8745182410574706, + "grad_norm": 24272.626953125, + "learning_rate": 1.5560725559961386e-05, + "loss": 0.4029, + "step": 169500 + }, + { + "epoch": 0.8747762110400834, + "grad_norm": 23789.333984375, + "learning_rate": 1.553425166817079e-05, + "loss": 0.4078, + "step": 169550 + }, + { + "epoch": 0.8750341810226961, + "grad_norm": 23287.294921875, + "learning_rate": 1.5507796173529248e-05, + "loss": 0.408, + "step": 169600 + }, + { + "epoch": 0.875292151005309, + "grad_norm": 22272.13671875, + "learning_rate": 1.548135909015822e-05, + "loss": 0.4017, + "step": 169650 + }, + { + "epoch": 0.8755501209879218, + "grad_norm": 24645.40234375, + "learning_rate": 1.5454940432169297e-05, + "loss": 0.4001, + "step": 169700 + }, + { + "epoch": 0.8758080909705347, + "grad_norm": 26364.072265625, + "learning_rate": 1.5428540213664243e-05, + "loss": 0.411, + "step": 169750 + }, + { + "epoch": 0.8760660609531475, + "grad_norm": 24535.76171875, + "learning_rate": 1.5402158448734987e-05, + "loss": 0.4042, + "step": 169800 + }, + { + "epoch": 0.8763240309357603, + "grad_norm": 23294.94140625, + "learning_rate": 1.53757951514636e-05, + "loss": 0.4083, + "step": 169850 + }, + { + "epoch": 0.8765820009183731, + "grad_norm": 23390.046875, + "learning_rate": 1.5349450335922295e-05, + "loss": 0.399, + "step": 169900 + }, + { + "epoch": 0.876839970900986, + "grad_norm": 23079.41796875, + "learning_rate": 1.5323124016173455e-05, + "loss": 0.4078, + "step": 169950 + }, + { + "epoch": 0.8770979408835988, + "grad_norm": 24190.23046875, + "learning_rate": 1.529681620626951e-05, + "loss": 0.4013, + "step": 170000 + }, + { + "epoch": 0.8770979408835988, + "eval_loss": 0.39030978083610535, + "eval_runtime": 3197.421, + "eval_samples_per_second": 969.882, + "eval_steps_per_second": 1.894, + "step": 170000 + }, + { + "epoch": 0.8773559108662116, + "grad_norm": 24830.658203125, + "learning_rate": 1.5270526920253098e-05, + "loss": 0.4053, + "step": 170050 + }, + { + "epoch": 0.8776138808488244, + "grad_norm": 21314.533203125, + "learning_rate": 1.5244256172156923e-05, + "loss": 0.4067, + "step": 170100 + }, + { + "epoch": 0.8778718508314373, + "grad_norm": 23271.314453125, + "learning_rate": 1.521800397600378e-05, + "loss": 0.4024, + "step": 170150 + }, + { + "epoch": 0.8781298208140501, + "grad_norm": 20112.265625, + "learning_rate": 1.5191770345806632e-05, + "loss": 0.4001, + "step": 170200 + }, + { + "epoch": 0.8783877907966628, + "grad_norm": 23957.087890625, + "learning_rate": 1.5165555295568418e-05, + "loss": 0.406, + "step": 170250 + }, + { + "epoch": 0.8786457607792757, + "grad_norm": 23699.181640625, + "learning_rate": 1.5139358839282275e-05, + "loss": 0.4005, + "step": 170300 + }, + { + "epoch": 0.8789037307618885, + "grad_norm": 23276.4453125, + "learning_rate": 1.5113180990931353e-05, + "loss": 0.4057, + "step": 170350 + }, + { + "epoch": 0.8791617007445014, + "grad_norm": 27051.26171875, + "learning_rate": 1.5087021764488867e-05, + "loss": 0.4037, + "step": 170400 + }, + { + "epoch": 0.8794196707271142, + "grad_norm": 24315.11328125, + "learning_rate": 1.5060881173918112e-05, + "loss": 0.4004, + "step": 170450 + }, + { + "epoch": 0.879677640709727, + "grad_norm": 22589.85546875, + "learning_rate": 1.5034759233172419e-05, + "loss": 0.402, + "step": 170500 + }, + { + "epoch": 0.8799356106923398, + "grad_norm": 24601.666015625, + "learning_rate": 1.5008655956195195e-05, + "loss": 0.4083, + "step": 170550 + }, + { + "epoch": 0.8801935806749527, + "grad_norm": 23203.884765625, + "learning_rate": 1.4982571356919862e-05, + "loss": 0.3971, + "step": 170600 + }, + { + "epoch": 0.8804515506575655, + "grad_norm": 28701.162109375, + "learning_rate": 1.4956505449269858e-05, + "loss": 0.3989, + "step": 170650 + }, + { + "epoch": 0.8807095206401783, + "grad_norm": 23548.541015625, + "learning_rate": 1.4930458247158668e-05, + "loss": 0.4014, + "step": 170700 + }, + { + "epoch": 0.8809674906227911, + "grad_norm": 26836.626953125, + "learning_rate": 1.4904429764489792e-05, + "loss": 0.3964, + "step": 170750 + }, + { + "epoch": 0.881225460605404, + "grad_norm": 23989.537109375, + "learning_rate": 1.4878420015156697e-05, + "loss": 0.4062, + "step": 170800 + }, + { + "epoch": 0.8814834305880168, + "grad_norm": 22008.498046875, + "learning_rate": 1.4852429013042945e-05, + "loss": 0.4034, + "step": 170850 + }, + { + "epoch": 0.8817414005706296, + "grad_norm": 22564.548828125, + "learning_rate": 1.4826456772021957e-05, + "loss": 0.3953, + "step": 170900 + }, + { + "epoch": 0.8819993705532424, + "grad_norm": 20611.005859375, + "learning_rate": 1.4800503305957264e-05, + "loss": 0.3993, + "step": 170950 + }, + { + "epoch": 0.8822573405358553, + "grad_norm": 23731.072265625, + "learning_rate": 1.4774568628702312e-05, + "loss": 0.4008, + "step": 171000 + }, + { + "epoch": 0.8825153105184681, + "grad_norm": 23515.265625, + "learning_rate": 1.4748652754100506e-05, + "loss": 0.4093, + "step": 171050 + }, + { + "epoch": 0.882773280501081, + "grad_norm": 20889.193359375, + "learning_rate": 1.4722755695985291e-05, + "loss": 0.4036, + "step": 171100 + }, + { + "epoch": 0.8830312504836937, + "grad_norm": 23561.208984375, + "learning_rate": 1.4696877468179954e-05, + "loss": 0.4009, + "step": 171150 + }, + { + "epoch": 0.8832892204663065, + "grad_norm": 29216.3046875, + "learning_rate": 1.4671018084497828e-05, + "loss": 0.4087, + "step": 171200 + }, + { + "epoch": 0.8835471904489194, + "grad_norm": 24697.615234375, + "learning_rate": 1.4645177558742147e-05, + "loss": 0.3976, + "step": 171250 + }, + { + "epoch": 0.8838051604315322, + "grad_norm": 30338.123046875, + "learning_rate": 1.4619355904706062e-05, + "loss": 0.4046, + "step": 171300 + }, + { + "epoch": 0.884063130414145, + "grad_norm": 22565.310546875, + "learning_rate": 1.4593553136172705e-05, + "loss": 0.4011, + "step": 171350 + }, + { + "epoch": 0.8843211003967578, + "grad_norm": 23498.0, + "learning_rate": 1.4567769266915077e-05, + "loss": 0.4071, + "step": 171400 + }, + { + "epoch": 0.8845790703793707, + "grad_norm": 23772.279296875, + "learning_rate": 1.4542004310696112e-05, + "loss": 0.4048, + "step": 171450 + }, + { + "epoch": 0.8848370403619835, + "grad_norm": 22418.015625, + "learning_rate": 1.4516258281268636e-05, + "loss": 0.4009, + "step": 171500 + }, + { + "epoch": 0.8850950103445963, + "grad_norm": 25706.166015625, + "learning_rate": 1.4490531192375395e-05, + "loss": 0.4017, + "step": 171550 + }, + { + "epoch": 0.8853529803272091, + "grad_norm": 23563.37890625, + "learning_rate": 1.4464823057748982e-05, + "loss": 0.4056, + "step": 171600 + }, + { + "epoch": 0.885610950309822, + "grad_norm": 23104.65234375, + "learning_rate": 1.4439133891111956e-05, + "loss": 0.4014, + "step": 171650 + }, + { + "epoch": 0.8858689202924348, + "grad_norm": 22858.935546875, + "learning_rate": 1.4413463706176627e-05, + "loss": 0.4047, + "step": 171700 + }, + { + "epoch": 0.8861268902750475, + "grad_norm": 23197.859375, + "learning_rate": 1.4387812516645299e-05, + "loss": 0.4032, + "step": 171750 + }, + { + "epoch": 0.8863848602576604, + "grad_norm": 22323.4609375, + "learning_rate": 1.4362180336210057e-05, + "loss": 0.4018, + "step": 171800 + }, + { + "epoch": 0.8866428302402732, + "grad_norm": 23677.431640625, + "learning_rate": 1.433656717855285e-05, + "loss": 0.4057, + "step": 171850 + }, + { + "epoch": 0.8869008002228861, + "grad_norm": 22975.283203125, + "learning_rate": 1.4310973057345538e-05, + "loss": 0.4077, + "step": 171900 + }, + { + "epoch": 0.8871587702054989, + "grad_norm": 23338.005859375, + "learning_rate": 1.4285397986249694e-05, + "loss": 0.4037, + "step": 171950 + }, + { + "epoch": 0.8874167401881117, + "grad_norm": 22469.08203125, + "learning_rate": 1.4259841978916849e-05, + "loss": 0.4025, + "step": 172000 + }, + { + "epoch": 0.8876747101707245, + "grad_norm": 23508.064453125, + "learning_rate": 1.4234305048988288e-05, + "loss": 0.3979, + "step": 172050 + }, + { + "epoch": 0.8879326801533374, + "grad_norm": 25113.62890625, + "learning_rate": 1.4208787210095126e-05, + "loss": 0.3988, + "step": 172100 + }, + { + "epoch": 0.8881906501359502, + "grad_norm": 23230.75, + "learning_rate": 1.4183288475858298e-05, + "loss": 0.4029, + "step": 172150 + }, + { + "epoch": 0.888448620118563, + "grad_norm": 22058.306640625, + "learning_rate": 1.4157808859888516e-05, + "loss": 0.4082, + "step": 172200 + }, + { + "epoch": 0.8887065901011758, + "grad_norm": 23375.91015625, + "learning_rate": 1.4132348375786336e-05, + "loss": 0.407, + "step": 172250 + }, + { + "epoch": 0.8889645600837887, + "grad_norm": 21199.943359375, + "learning_rate": 1.4106907037142059e-05, + "loss": 0.4039, + "step": 172300 + }, + { + "epoch": 0.8892225300664015, + "grad_norm": 22754.287109375, + "learning_rate": 1.4081484857535777e-05, + "loss": 0.4, + "step": 172350 + }, + { + "epoch": 0.8894805000490142, + "grad_norm": 23116.21484375, + "learning_rate": 1.405608185053735e-05, + "loss": 0.4026, + "step": 172400 + }, + { + "epoch": 0.8897384700316271, + "grad_norm": 22281.65625, + "learning_rate": 1.4030698029706423e-05, + "loss": 0.3992, + "step": 172450 + }, + { + "epoch": 0.8899964400142399, + "grad_norm": 22979.447265625, + "learning_rate": 1.400533340859237e-05, + "loss": 0.4027, + "step": 172500 + }, + { + "epoch": 0.8902544099968528, + "grad_norm": 25733.873046875, + "learning_rate": 1.3979988000734373e-05, + "loss": 0.4092, + "step": 172550 + }, + { + "epoch": 0.8905123799794656, + "grad_norm": 23825.38671875, + "learning_rate": 1.395466181966127e-05, + "loss": 0.3997, + "step": 172600 + }, + { + "epoch": 0.8907703499620784, + "grad_norm": 27504.0703125, + "learning_rate": 1.3929354878891715e-05, + "loss": 0.403, + "step": 172650 + }, + { + "epoch": 0.8910283199446912, + "grad_norm": 28201.208984375, + "learning_rate": 1.3904067191934067e-05, + "loss": 0.4029, + "step": 172700 + }, + { + "epoch": 0.8912862899273041, + "grad_norm": 24115.69140625, + "learning_rate": 1.3878798772286377e-05, + "loss": 0.3979, + "step": 172750 + }, + { + "epoch": 0.8915442599099169, + "grad_norm": 20489.552734375, + "learning_rate": 1.3853549633436491e-05, + "loss": 0.4001, + "step": 172800 + }, + { + "epoch": 0.8918022298925297, + "grad_norm": 23580.583984375, + "learning_rate": 1.3828319788861838e-05, + "loss": 0.3983, + "step": 172850 + }, + { + "epoch": 0.8920601998751425, + "grad_norm": 24172.771484375, + "learning_rate": 1.3803109252029678e-05, + "loss": 0.4081, + "step": 172900 + }, + { + "epoch": 0.8923181698577554, + "grad_norm": 26543.375, + "learning_rate": 1.3777918036396887e-05, + "loss": 0.4015, + "step": 172950 + }, + { + "epoch": 0.8925761398403682, + "grad_norm": 27849.654296875, + "learning_rate": 1.3752746155410046e-05, + "loss": 0.4045, + "step": 173000 + }, + { + "epoch": 0.892834109822981, + "grad_norm": 25752.724609375, + "learning_rate": 1.3727593622505424e-05, + "loss": 0.4022, + "step": 173050 + }, + { + "epoch": 0.8930920798055938, + "grad_norm": 22836.892578125, + "learning_rate": 1.3702460451108935e-05, + "loss": 0.4015, + "step": 173100 + }, + { + "epoch": 0.8933500497882066, + "grad_norm": 26556.62890625, + "learning_rate": 1.3677346654636208e-05, + "loss": 0.4017, + "step": 173150 + }, + { + "epoch": 0.8936080197708195, + "grad_norm": 24310.390625, + "learning_rate": 1.3652252246492492e-05, + "loss": 0.4015, + "step": 173200 + }, + { + "epoch": 0.8938659897534323, + "grad_norm": 23713.0859375, + "learning_rate": 1.3627177240072698e-05, + "loss": 0.4024, + "step": 173250 + }, + { + "epoch": 0.8941239597360451, + "grad_norm": 21189.57421875, + "learning_rate": 1.3602121648761373e-05, + "loss": 0.4012, + "step": 173300 + }, + { + "epoch": 0.8943819297186579, + "grad_norm": 24229.1484375, + "learning_rate": 1.3577085485932705e-05, + "loss": 0.4105, + "step": 173350 + }, + { + "epoch": 0.8946398997012708, + "grad_norm": 23998.22265625, + "learning_rate": 1.3552068764950504e-05, + "loss": 0.4004, + "step": 173400 + }, + { + "epoch": 0.8948978696838836, + "grad_norm": 24751.1171875, + "learning_rate": 1.3527071499168253e-05, + "loss": 0.4024, + "step": 173450 + }, + { + "epoch": 0.8951558396664964, + "grad_norm": 24872.029296875, + "learning_rate": 1.3502093701928948e-05, + "loss": 0.406, + "step": 173500 + }, + { + "epoch": 0.8954138096491092, + "grad_norm": 23180.771484375, + "learning_rate": 1.3477135386565297e-05, + "loss": 0.4041, + "step": 173550 + }, + { + "epoch": 0.8956717796317221, + "grad_norm": 23679.1484375, + "learning_rate": 1.3452196566399555e-05, + "loss": 0.4095, + "step": 173600 + }, + { + "epoch": 0.8959297496143349, + "grad_norm": 26730.537109375, + "learning_rate": 1.3427277254743565e-05, + "loss": 0.4058, + "step": 173650 + }, + { + "epoch": 0.8961877195969477, + "grad_norm": 23320.666015625, + "learning_rate": 1.3402377464898813e-05, + "loss": 0.4038, + "step": 173700 + }, + { + "epoch": 0.8964456895795605, + "grad_norm": 22802.87890625, + "learning_rate": 1.3377497210156276e-05, + "loss": 0.3977, + "step": 173750 + }, + { + "epoch": 0.8967036595621733, + "grad_norm": 21257.22265625, + "learning_rate": 1.3352636503796584e-05, + "loss": 0.4074, + "step": 173800 + }, + { + "epoch": 0.8969616295447862, + "grad_norm": 23935.412109375, + "learning_rate": 1.332779535908989e-05, + "loss": 0.4021, + "step": 173850 + }, + { + "epoch": 0.8972195995273989, + "grad_norm": 21819.267578125, + "learning_rate": 1.3302973789295925e-05, + "loss": 0.3992, + "step": 173900 + }, + { + "epoch": 0.8974775695100118, + "grad_norm": 23360.71875, + "learning_rate": 1.327817180766393e-05, + "loss": 0.4051, + "step": 173950 + }, + { + "epoch": 0.8977355394926246, + "grad_norm": 24474.685546875, + "learning_rate": 1.3253389427432772e-05, + "loss": 0.4046, + "step": 174000 + }, + { + "epoch": 0.8979935094752375, + "grad_norm": 29715.3359375, + "learning_rate": 1.3228626661830779e-05, + "loss": 0.4037, + "step": 174050 + }, + { + "epoch": 0.8982514794578503, + "grad_norm": 23241.20703125, + "learning_rate": 1.3203883524075833e-05, + "loss": 0.4003, + "step": 174100 + }, + { + "epoch": 0.8985094494404631, + "grad_norm": 26005.23828125, + "learning_rate": 1.3179160027375347e-05, + "loss": 0.3992, + "step": 174150 + }, + { + "epoch": 0.8987674194230759, + "grad_norm": 23322.212890625, + "learning_rate": 1.3154456184926234e-05, + "loss": 0.4037, + "step": 174200 + }, + { + "epoch": 0.8990253894056888, + "grad_norm": 22434.90234375, + "learning_rate": 1.3129772009914964e-05, + "loss": 0.4044, + "step": 174250 + }, + { + "epoch": 0.8992833593883016, + "grad_norm": 24753.904296875, + "learning_rate": 1.3105107515517418e-05, + "loss": 0.4034, + "step": 174300 + }, + { + "epoch": 0.8995413293709144, + "grad_norm": 23271.814453125, + "learning_rate": 1.3080462714899066e-05, + "loss": 0.3992, + "step": 174350 + }, + { + "epoch": 0.8997992993535272, + "grad_norm": 23929.7578125, + "learning_rate": 1.3055837621214811e-05, + "loss": 0.4018, + "step": 174400 + }, + { + "epoch": 0.90005726933614, + "grad_norm": 25211.7265625, + "learning_rate": 1.3031232247609037e-05, + "loss": 0.4052, + "step": 174450 + }, + { + "epoch": 0.9003152393187529, + "grad_norm": 24554.791015625, + "learning_rate": 1.300664660721566e-05, + "loss": 0.3987, + "step": 174500 + }, + { + "epoch": 0.9005732093013656, + "grad_norm": 26028.396484375, + "learning_rate": 1.2982080713157963e-05, + "loss": 0.4032, + "step": 174550 + }, + { + "epoch": 0.9008311792839785, + "grad_norm": 24228.72265625, + "learning_rate": 1.295753457854878e-05, + "loss": 0.4001, + "step": 174600 + }, + { + "epoch": 0.9010891492665913, + "grad_norm": 24043.064453125, + "learning_rate": 1.293300821649036e-05, + "loss": 0.4009, + "step": 174650 + }, + { + "epoch": 0.9013471192492042, + "grad_norm": 25628.208984375, + "learning_rate": 1.2908501640074388e-05, + "loss": 0.4058, + "step": 174700 + }, + { + "epoch": 0.901605089231817, + "grad_norm": 23927.81640625, + "learning_rate": 1.288401486238201e-05, + "loss": 0.4044, + "step": 174750 + }, + { + "epoch": 0.9018630592144298, + "grad_norm": 23615.923828125, + "learning_rate": 1.2859547896483793e-05, + "loss": 0.4042, + "step": 174800 + }, + { + "epoch": 0.9021210291970426, + "grad_norm": 24990.158203125, + "learning_rate": 1.2835100755439705e-05, + "loss": 0.4033, + "step": 174850 + }, + { + "epoch": 0.9023789991796555, + "grad_norm": 23908.240234375, + "learning_rate": 1.2810673452299194e-05, + "loss": 0.404, + "step": 174900 + }, + { + "epoch": 0.9026369691622683, + "grad_norm": 24776.828125, + "learning_rate": 1.278626600010106e-05, + "loss": 0.4017, + "step": 174950 + }, + { + "epoch": 0.9028949391448811, + "grad_norm": 23400.912109375, + "learning_rate": 1.276187841187354e-05, + "loss": 0.4007, + "step": 175000 + }, + { + "epoch": 0.9028949391448811, + "eval_loss": 0.389443963766098, + "eval_runtime": 3184.6844, + "eval_samples_per_second": 973.761, + "eval_steps_per_second": 1.902, + "step": 175000 + }, + { + "epoch": 0.9031529091274939, + "grad_norm": 23482.337890625, + "learning_rate": 1.2737510700634248e-05, + "loss": 0.4033, + "step": 175050 + }, + { + "epoch": 0.9034108791101068, + "grad_norm": 24351.23828125, + "learning_rate": 1.2713162879390183e-05, + "loss": 0.4031, + "step": 175100 + }, + { + "epoch": 0.9036688490927196, + "grad_norm": 28495.6796875, + "learning_rate": 1.2688834961137785e-05, + "loss": 0.4057, + "step": 175150 + }, + { + "epoch": 0.9039268190753323, + "grad_norm": 23276.583984375, + "learning_rate": 1.2664526958862765e-05, + "loss": 0.4036, + "step": 175200 + }, + { + "epoch": 0.9041847890579452, + "grad_norm": 22784.033203125, + "learning_rate": 1.2640238885540312e-05, + "loss": 0.4054, + "step": 175250 + }, + { + "epoch": 0.904442759040558, + "grad_norm": 22389.21484375, + "learning_rate": 1.2615970754134914e-05, + "loss": 0.4036, + "step": 175300 + }, + { + "epoch": 0.9047007290231709, + "grad_norm": 24767.59375, + "learning_rate": 1.2591722577600412e-05, + "loss": 0.4055, + "step": 175350 + }, + { + "epoch": 0.9049586990057837, + "grad_norm": 24981.552734375, + "learning_rate": 1.2567494368880056e-05, + "loss": 0.3997, + "step": 175400 + }, + { + "epoch": 0.9052166689883965, + "grad_norm": 24523.580078125, + "learning_rate": 1.254328614090634e-05, + "loss": 0.4009, + "step": 175450 + }, + { + "epoch": 0.9054746389710093, + "grad_norm": 29571.404296875, + "learning_rate": 1.251909790660119e-05, + "loss": 0.4013, + "step": 175500 + }, + { + "epoch": 0.9057326089536222, + "grad_norm": 23286.564453125, + "learning_rate": 1.24949296788758e-05, + "loss": 0.3997, + "step": 175550 + }, + { + "epoch": 0.905990578936235, + "grad_norm": 23124.205078125, + "learning_rate": 1.247078147063071e-05, + "loss": 0.4056, + "step": 175600 + }, + { + "epoch": 0.9062485489188478, + "grad_norm": 23467.775390625, + "learning_rate": 1.2446653294755755e-05, + "loss": 0.3976, + "step": 175650 + }, + { + "epoch": 0.9065065189014606, + "grad_norm": 23793.609375, + "learning_rate": 1.2422545164130096e-05, + "loss": 0.4018, + "step": 175700 + }, + { + "epoch": 0.9067644888840735, + "grad_norm": 24439.974609375, + "learning_rate": 1.2398457091622167e-05, + "loss": 0.4063, + "step": 175750 + }, + { + "epoch": 0.9070224588666863, + "grad_norm": 23925.22265625, + "learning_rate": 1.2374389090089744e-05, + "loss": 0.4039, + "step": 175800 + }, + { + "epoch": 0.907280428849299, + "grad_norm": 23174.416015625, + "learning_rate": 1.2350341172379853e-05, + "loss": 0.4031, + "step": 175850 + }, + { + "epoch": 0.9075383988319119, + "grad_norm": 26669.806640625, + "learning_rate": 1.2326313351328794e-05, + "loss": 0.4031, + "step": 175900 + }, + { + "epoch": 0.9077963688145247, + "grad_norm": 21128.041015625, + "learning_rate": 1.2302305639762168e-05, + "loss": 0.407, + "step": 175950 + }, + { + "epoch": 0.9080543387971376, + "grad_norm": 22798.111328125, + "learning_rate": 1.2278318050494797e-05, + "loss": 0.4035, + "step": 176000 + }, + { + "epoch": 0.9083123087797504, + "grad_norm": 23327.587890625, + "learning_rate": 1.2254350596330843e-05, + "loss": 0.3958, + "step": 176050 + }, + { + "epoch": 0.9085702787623632, + "grad_norm": 22225.3125, + "learning_rate": 1.2230403290063613e-05, + "loss": 0.4074, + "step": 176100 + }, + { + "epoch": 0.908828248744976, + "grad_norm": 22727.791015625, + "learning_rate": 1.2206476144475754e-05, + "loss": 0.4063, + "step": 176150 + }, + { + "epoch": 0.9090862187275889, + "grad_norm": 26138.931640625, + "learning_rate": 1.2182569172339098e-05, + "loss": 0.408, + "step": 176200 + }, + { + "epoch": 0.9093441887102017, + "grad_norm": 23436.91796875, + "learning_rate": 1.2158682386414716e-05, + "loss": 0.4038, + "step": 176250 + }, + { + "epoch": 0.9096021586928145, + "grad_norm": 23695.244140625, + "learning_rate": 1.2134815799452947e-05, + "loss": 0.4074, + "step": 176300 + }, + { + "epoch": 0.9098601286754273, + "grad_norm": 25616.240234375, + "learning_rate": 1.2110969424193263e-05, + "loss": 0.3971, + "step": 176350 + }, + { + "epoch": 0.9101180986580402, + "grad_norm": 27326.634765625, + "learning_rate": 1.2087143273364431e-05, + "loss": 0.4045, + "step": 176400 + }, + { + "epoch": 0.910376068640653, + "grad_norm": 23704.775390625, + "learning_rate": 1.2063337359684384e-05, + "loss": 0.4071, + "step": 176450 + }, + { + "epoch": 0.9106340386232658, + "grad_norm": 25532.234375, + "learning_rate": 1.2039551695860251e-05, + "loss": 0.4021, + "step": 176500 + }, + { + "epoch": 0.9108920086058786, + "grad_norm": 25247.884765625, + "learning_rate": 1.201578629458835e-05, + "loss": 0.4074, + "step": 176550 + }, + { + "epoch": 0.9111499785884914, + "grad_norm": 29377.486328125, + "learning_rate": 1.1992041168554236e-05, + "loss": 0.4064, + "step": 176600 + }, + { + "epoch": 0.9114079485711043, + "grad_norm": 22188.34375, + "learning_rate": 1.1968316330432527e-05, + "loss": 0.404, + "step": 176650 + }, + { + "epoch": 0.911665918553717, + "grad_norm": 23766.0546875, + "learning_rate": 1.194461179288714e-05, + "loss": 0.4016, + "step": 176700 + }, + { + "epoch": 0.9119238885363299, + "grad_norm": 21386.623046875, + "learning_rate": 1.1920927568571078e-05, + "loss": 0.4055, + "step": 176750 + }, + { + "epoch": 0.9121818585189427, + "grad_norm": 25873.052734375, + "learning_rate": 1.1897263670126507e-05, + "loss": 0.3978, + "step": 176800 + }, + { + "epoch": 0.9124398285015556, + "grad_norm": 25235.5390625, + "learning_rate": 1.1873620110184803e-05, + "loss": 0.3975, + "step": 176850 + }, + { + "epoch": 0.9126977984841684, + "grad_norm": 22841.5, + "learning_rate": 1.1849996901366383e-05, + "loss": 0.4031, + "step": 176900 + }, + { + "epoch": 0.9129557684667812, + "grad_norm": 21522.388671875, + "learning_rate": 1.1826394056280893e-05, + "loss": 0.4048, + "step": 176950 + }, + { + "epoch": 0.913213738449394, + "grad_norm": 27600.689453125, + "learning_rate": 1.1802811587527074e-05, + "loss": 0.3984, + "step": 177000 + }, + { + "epoch": 0.9134717084320069, + "grad_norm": 24698.60546875, + "learning_rate": 1.177924950769278e-05, + "loss": 0.406, + "step": 177050 + }, + { + "epoch": 0.9137296784146197, + "grad_norm": 27378.033203125, + "learning_rate": 1.1755707829355001e-05, + "loss": 0.3993, + "step": 177100 + }, + { + "epoch": 0.9139876483972325, + "grad_norm": 27578.4296875, + "learning_rate": 1.1732186565079805e-05, + "loss": 0.3984, + "step": 177150 + }, + { + "epoch": 0.9142456183798453, + "grad_norm": 24650.6953125, + "learning_rate": 1.1708685727422424e-05, + "loss": 0.401, + "step": 177200 + }, + { + "epoch": 0.9145035883624582, + "grad_norm": 25550.0859375, + "learning_rate": 1.1685205328927135e-05, + "loss": 0.399, + "step": 177250 + }, + { + "epoch": 0.914761558345071, + "grad_norm": 22760.77734375, + "learning_rate": 1.166174538212732e-05, + "loss": 0.403, + "step": 177300 + }, + { + "epoch": 0.9150195283276837, + "grad_norm": 22038.26171875, + "learning_rate": 1.1638305899545443e-05, + "loss": 0.4066, + "step": 177350 + }, + { + "epoch": 0.9152774983102966, + "grad_norm": 23857.66015625, + "learning_rate": 1.1614886893693044e-05, + "loss": 0.4038, + "step": 177400 + }, + { + "epoch": 0.9155354682929094, + "grad_norm": 24813.55859375, + "learning_rate": 1.1591488377070724e-05, + "loss": 0.3992, + "step": 177450 + }, + { + "epoch": 0.9157934382755223, + "grad_norm": 24467.5859375, + "learning_rate": 1.1568110362168199e-05, + "loss": 0.4, + "step": 177500 + }, + { + "epoch": 0.9160514082581351, + "grad_norm": 22464.98046875, + "learning_rate": 1.1544752861464143e-05, + "loss": 0.4069, + "step": 177550 + }, + { + "epoch": 0.9163093782407479, + "grad_norm": 26591.51171875, + "learning_rate": 1.1521415887426379e-05, + "loss": 0.4008, + "step": 177600 + }, + { + "epoch": 0.9165673482233607, + "grad_norm": 21086.318359375, + "learning_rate": 1.1498099452511724e-05, + "loss": 0.4036, + "step": 177650 + }, + { + "epoch": 0.9168253182059736, + "grad_norm": 24243.072265625, + "learning_rate": 1.147480356916602e-05, + "loss": 0.4019, + "step": 177700 + }, + { + "epoch": 0.9170832881885864, + "grad_norm": 26714.83984375, + "learning_rate": 1.1451528249824206e-05, + "loss": 0.3978, + "step": 177750 + }, + { + "epoch": 0.9173412581711992, + "grad_norm": 24799.712890625, + "learning_rate": 1.1428273506910132e-05, + "loss": 0.4078, + "step": 177800 + }, + { + "epoch": 0.917599228153812, + "grad_norm": 25010.435546875, + "learning_rate": 1.1405039352836777e-05, + "loss": 0.4054, + "step": 177850 + }, + { + "epoch": 0.9178571981364249, + "grad_norm": 23657.78125, + "learning_rate": 1.1381825800006068e-05, + "loss": 0.4001, + "step": 177900 + }, + { + "epoch": 0.9181151681190377, + "grad_norm": 23865.349609375, + "learning_rate": 1.1358632860808955e-05, + "loss": 0.4012, + "step": 177950 + }, + { + "epoch": 0.9183731381016504, + "grad_norm": 26476.04296875, + "learning_rate": 1.1335460547625365e-05, + "loss": 0.3998, + "step": 178000 + }, + { + "epoch": 0.9186311080842633, + "grad_norm": 24907.89453125, + "learning_rate": 1.1312308872824235e-05, + "loss": 0.401, + "step": 178050 + }, + { + "epoch": 0.9188890780668761, + "grad_norm": 24008.54296875, + "learning_rate": 1.1289177848763494e-05, + "loss": 0.3991, + "step": 178100 + }, + { + "epoch": 0.919147048049489, + "grad_norm": 23814.396484375, + "learning_rate": 1.1266067487790027e-05, + "loss": 0.4039, + "step": 178150 + }, + { + "epoch": 0.9194050180321018, + "grad_norm": 25892.994140625, + "learning_rate": 1.1242977802239696e-05, + "loss": 0.4015, + "step": 178200 + }, + { + "epoch": 0.9196629880147146, + "grad_norm": 24185.7265625, + "learning_rate": 1.1219908804437328e-05, + "loss": 0.3992, + "step": 178250 + }, + { + "epoch": 0.9199209579973274, + "grad_norm": 23890.54296875, + "learning_rate": 1.1196860506696705e-05, + "loss": 0.4087, + "step": 178300 + }, + { + "epoch": 0.9201789279799403, + "grad_norm": 25288.83203125, + "learning_rate": 1.1173832921320554e-05, + "loss": 0.4038, + "step": 178350 + }, + { + "epoch": 0.9204368979625531, + "grad_norm": 27609.994140625, + "learning_rate": 1.1150826060600594e-05, + "loss": 0.4047, + "step": 178400 + }, + { + "epoch": 0.9206948679451659, + "grad_norm": 25010.259765625, + "learning_rate": 1.112783993681738e-05, + "loss": 0.4037, + "step": 178450 + }, + { + "epoch": 0.9209528379277787, + "grad_norm": 23663.78515625, + "learning_rate": 1.1104874562240514e-05, + "loss": 0.396, + "step": 178500 + }, + { + "epoch": 0.9212108079103916, + "grad_norm": 24960.072265625, + "learning_rate": 1.108192994912844e-05, + "loss": 0.4024, + "step": 178550 + }, + { + "epoch": 0.9214687778930044, + "grad_norm": 22778.66796875, + "learning_rate": 1.1059006109728543e-05, + "loss": 0.4039, + "step": 178600 + }, + { + "epoch": 0.9217267478756171, + "grad_norm": 20177.640625, + "learning_rate": 1.1036103056277165e-05, + "loss": 0.4008, + "step": 178650 + }, + { + "epoch": 0.92198471785823, + "grad_norm": 25084.703125, + "learning_rate": 1.1013220800999452e-05, + "loss": 0.4082, + "step": 178700 + }, + { + "epoch": 0.9222426878408428, + "grad_norm": 23697.529296875, + "learning_rate": 1.0990359356109558e-05, + "loss": 0.4083, + "step": 178750 + }, + { + "epoch": 0.9225006578234557, + "grad_norm": 26252.25, + "learning_rate": 1.0967518733810462e-05, + "loss": 0.4114, + "step": 178800 + }, + { + "epoch": 0.9227586278060684, + "grad_norm": 25295.103515625, + "learning_rate": 1.094469894629403e-05, + "loss": 0.4062, + "step": 178850 + }, + { + "epoch": 0.9230165977886813, + "grad_norm": 24484.203125, + "learning_rate": 1.0921900005741053e-05, + "loss": 0.4008, + "step": 178900 + }, + { + "epoch": 0.9232745677712941, + "grad_norm": 23360.701171875, + "learning_rate": 1.0899121924321154e-05, + "loss": 0.405, + "step": 178950 + }, + { + "epoch": 0.923532537753907, + "grad_norm": 22507.24609375, + "learning_rate": 1.0876364714192822e-05, + "loss": 0.3968, + "step": 179000 + }, + { + "epoch": 0.9237905077365198, + "grad_norm": 26761.66015625, + "learning_rate": 1.0853628387503423e-05, + "loss": 0.4021, + "step": 179050 + }, + { + "epoch": 0.9240484777191326, + "grad_norm": 26596.376953125, + "learning_rate": 1.0830912956389166e-05, + "loss": 0.3984, + "step": 179100 + }, + { + "epoch": 0.9243064477017454, + "grad_norm": 23996.490234375, + "learning_rate": 1.0808218432975093e-05, + "loss": 0.3996, + "step": 179150 + }, + { + "epoch": 0.9245644176843583, + "grad_norm": 22681.4609375, + "learning_rate": 1.0785544829375143e-05, + "loss": 0.4021, + "step": 179200 + }, + { + "epoch": 0.9248223876669711, + "grad_norm": 25675.728515625, + "learning_rate": 1.0762892157691995e-05, + "loss": 0.3942, + "step": 179250 + }, + { + "epoch": 0.9250803576495839, + "grad_norm": 26039.25, + "learning_rate": 1.0740260430017247e-05, + "loss": 0.4014, + "step": 179300 + }, + { + "epoch": 0.9253383276321967, + "grad_norm": 21596.50390625, + "learning_rate": 1.0717649658431256e-05, + "loss": 0.4017, + "step": 179350 + }, + { + "epoch": 0.9255962976148095, + "grad_norm": 25318.3125, + "learning_rate": 1.0695059855003204e-05, + "loss": 0.3968, + "step": 179400 + }, + { + "epoch": 0.9258542675974224, + "grad_norm": 20999.10546875, + "learning_rate": 1.0672491031791137e-05, + "loss": 0.4032, + "step": 179450 + }, + { + "epoch": 0.9261122375800351, + "grad_norm": 25034.404296875, + "learning_rate": 1.0649943200841794e-05, + "loss": 0.3987, + "step": 179500 + }, + { + "epoch": 0.926370207562648, + "grad_norm": 23470.205078125, + "learning_rate": 1.0627416374190819e-05, + "loss": 0.4009, + "step": 179550 + }, + { + "epoch": 0.9266281775452608, + "grad_norm": 23667.298828125, + "learning_rate": 1.0604910563862575e-05, + "loss": 0.4022, + "step": 179600 + }, + { + "epoch": 0.9268861475278737, + "grad_norm": 25315.5390625, + "learning_rate": 1.058242578187023e-05, + "loss": 0.4023, + "step": 179650 + }, + { + "epoch": 0.9271441175104865, + "grad_norm": 23639.34375, + "learning_rate": 1.0559962040215727e-05, + "loss": 0.407, + "step": 179700 + }, + { + "epoch": 0.9274020874930993, + "grad_norm": 29350.244140625, + "learning_rate": 1.0537519350889764e-05, + "loss": 0.4063, + "step": 179750 + }, + { + "epoch": 0.9276600574757121, + "grad_norm": 26077.30859375, + "learning_rate": 1.051509772587183e-05, + "loss": 0.4011, + "step": 179800 + }, + { + "epoch": 0.927918027458325, + "grad_norm": 22387.8046875, + "learning_rate": 1.0492697177130157e-05, + "loss": 0.398, + "step": 179850 + }, + { + "epoch": 0.9281759974409378, + "grad_norm": 24023.2734375, + "learning_rate": 1.0470317716621719e-05, + "loss": 0.4026, + "step": 179900 + }, + { + "epoch": 0.9284339674235506, + "grad_norm": 24288.666015625, + "learning_rate": 1.044795935629223e-05, + "loss": 0.403, + "step": 179950 + }, + { + "epoch": 0.9286919374061634, + "grad_norm": 26163.923828125, + "learning_rate": 1.042562210807616e-05, + "loss": 0.4001, + "step": 180000 + }, + { + "epoch": 0.9286919374061634, + "eval_loss": 0.3886363208293915, + "eval_runtime": 3188.2841, + "eval_samples_per_second": 972.661, + "eval_steps_per_second": 1.9, + "step": 180000 + }, + { + "epoch": 0.9289499073887763, + "grad_norm": 24379.322265625, + "learning_rate": 1.0403305983896683e-05, + "loss": 0.3978, + "step": 180050 + }, + { + "epoch": 0.9292078773713891, + "grad_norm": 23249.939453125, + "learning_rate": 1.0381010995665752e-05, + "loss": 0.4055, + "step": 180100 + }, + { + "epoch": 0.9294658473540018, + "grad_norm": 25460.6875, + "learning_rate": 1.0358737155283942e-05, + "loss": 0.4059, + "step": 180150 + }, + { + "epoch": 0.9297238173366147, + "grad_norm": 23166.548828125, + "learning_rate": 1.0336484474640651e-05, + "loss": 0.4051, + "step": 180200 + }, + { + "epoch": 0.9299817873192275, + "grad_norm": 23631.94921875, + "learning_rate": 1.0314252965613908e-05, + "loss": 0.3974, + "step": 180250 + }, + { + "epoch": 0.9302397573018404, + "grad_norm": 26213.556640625, + "learning_rate": 1.0292042640070449e-05, + "loss": 0.3983, + "step": 180300 + }, + { + "epoch": 0.9304977272844532, + "grad_norm": 24056.875, + "learning_rate": 1.0269853509865751e-05, + "loss": 0.3979, + "step": 180350 + }, + { + "epoch": 0.930755697267066, + "grad_norm": 24793.658203125, + "learning_rate": 1.0247685586843897e-05, + "loss": 0.3993, + "step": 180400 + }, + { + "epoch": 0.9310136672496788, + "grad_norm": 25296.04296875, + "learning_rate": 1.0225538882837733e-05, + "loss": 0.4047, + "step": 180450 + }, + { + "epoch": 0.9312716372322917, + "grad_norm": 21486.990234375, + "learning_rate": 1.0203413409668722e-05, + "loss": 0.3995, + "step": 180500 + }, + { + "epoch": 0.9315296072149045, + "grad_norm": 24168.083984375, + "learning_rate": 1.018130917914702e-05, + "loss": 0.4081, + "step": 180550 + }, + { + "epoch": 0.9317875771975173, + "grad_norm": 25313.568359375, + "learning_rate": 1.0159226203071431e-05, + "loss": 0.4024, + "step": 180600 + }, + { + "epoch": 0.9320455471801301, + "grad_norm": 22535.845703125, + "learning_rate": 1.0137164493229411e-05, + "loss": 0.3974, + "step": 180650 + }, + { + "epoch": 0.932303517162743, + "grad_norm": 24480.0703125, + "learning_rate": 1.0115124061397102e-05, + "loss": 0.4031, + "step": 180700 + }, + { + "epoch": 0.9325614871453558, + "grad_norm": 29667.470703125, + "learning_rate": 1.0093104919339241e-05, + "loss": 0.3991, + "step": 180750 + }, + { + "epoch": 0.9328194571279685, + "grad_norm": 22311.767578125, + "learning_rate": 1.0071107078809228e-05, + "loss": 0.402, + "step": 180800 + }, + { + "epoch": 0.9330774271105814, + "grad_norm": 22752.642578125, + "learning_rate": 1.0049130551549068e-05, + "loss": 0.4022, + "step": 180850 + }, + { + "epoch": 0.9333353970931942, + "grad_norm": 26333.43359375, + "learning_rate": 1.0027175349289424e-05, + "loss": 0.4006, + "step": 180900 + }, + { + "epoch": 0.9335933670758071, + "grad_norm": 22951.927734375, + "learning_rate": 1.0005241483749533e-05, + "loss": 0.4022, + "step": 180950 + }, + { + "epoch": 0.9338513370584198, + "grad_norm": 24532.15625, + "learning_rate": 9.983328966637318e-06, + "loss": 0.398, + "step": 181000 + }, + { + "epoch": 0.9341093070410327, + "grad_norm": 24624.205078125, + "learning_rate": 9.961437809649188e-06, + "loss": 0.4021, + "step": 181050 + }, + { + "epoch": 0.9343672770236455, + "grad_norm": 23679.087890625, + "learning_rate": 9.93956802447027e-06, + "loss": 0.4038, + "step": 181100 + }, + { + "epoch": 0.9346252470062584, + "grad_norm": 22279.52734375, + "learning_rate": 9.917719622774219e-06, + "loss": 0.3987, + "step": 181150 + }, + { + "epoch": 0.9348832169888712, + "grad_norm": 25709.376953125, + "learning_rate": 9.895892616223268e-06, + "loss": 0.4062, + "step": 181200 + }, + { + "epoch": 0.935141186971484, + "grad_norm": 24607.25, + "learning_rate": 9.874087016468298e-06, + "loss": 0.3973, + "step": 181250 + }, + { + "epoch": 0.9353991569540968, + "grad_norm": 25458.861328125, + "learning_rate": 9.852302835148652e-06, + "loss": 0.3993, + "step": 181300 + }, + { + "epoch": 0.9356571269367097, + "grad_norm": 24070.654296875, + "learning_rate": 9.830540083892358e-06, + "loss": 0.4057, + "step": 181350 + }, + { + "epoch": 0.9359150969193225, + "grad_norm": 25323.736328125, + "learning_rate": 9.80879877431593e-06, + "loss": 0.407, + "step": 181400 + }, + { + "epoch": 0.9361730669019352, + "grad_norm": 27513.087890625, + "learning_rate": 9.787078918024455e-06, + "loss": 0.3979, + "step": 181450 + }, + { + "epoch": 0.9364310368845481, + "grad_norm": 22324.669921875, + "learning_rate": 9.765380526611568e-06, + "loss": 0.3984, + "step": 181500 + }, + { + "epoch": 0.936689006867161, + "grad_norm": 23778.37890625, + "learning_rate": 9.743703611659465e-06, + "loss": 0.4055, + "step": 181550 + }, + { + "epoch": 0.9369469768497738, + "grad_norm": 26777.255859375, + "learning_rate": 9.722048184738864e-06, + "loss": 0.4047, + "step": 181600 + }, + { + "epoch": 0.9372049468323865, + "grad_norm": 23210.876953125, + "learning_rate": 9.700414257409002e-06, + "loss": 0.393, + "step": 181650 + }, + { + "epoch": 0.9374629168149994, + "grad_norm": 22539.84765625, + "learning_rate": 9.67880184121765e-06, + "loss": 0.4069, + "step": 181700 + }, + { + "epoch": 0.9377208867976122, + "grad_norm": 25191.609375, + "learning_rate": 9.65721094770109e-06, + "loss": 0.4069, + "step": 181750 + }, + { + "epoch": 0.9379788567802251, + "grad_norm": 23813.578125, + "learning_rate": 9.63564158838416e-06, + "loss": 0.3954, + "step": 181800 + }, + { + "epoch": 0.9382368267628379, + "grad_norm": 23869.703125, + "learning_rate": 9.614093774780114e-06, + "loss": 0.3998, + "step": 181850 + }, + { + "epoch": 0.9384947967454507, + "grad_norm": 23316.384765625, + "learning_rate": 9.5925675183908e-06, + "loss": 0.3989, + "step": 181900 + }, + { + "epoch": 0.9387527667280635, + "grad_norm": 23641.65625, + "learning_rate": 9.571062830706496e-06, + "loss": 0.4017, + "step": 181950 + }, + { + "epoch": 0.9390107367106764, + "grad_norm": 23724.431640625, + "learning_rate": 9.549579723205982e-06, + "loss": 0.4042, + "step": 182000 + }, + { + "epoch": 0.9392687066932892, + "grad_norm": 24013.849609375, + "learning_rate": 9.528118207356556e-06, + "loss": 0.3966, + "step": 182050 + }, + { + "epoch": 0.939526676675902, + "grad_norm": 21843.55859375, + "learning_rate": 9.506678294613919e-06, + "loss": 0.4051, + "step": 182100 + }, + { + "epoch": 0.9397846466585148, + "grad_norm": 22000.7734375, + "learning_rate": 9.485259996422313e-06, + "loss": 0.4042, + "step": 182150 + }, + { + "epoch": 0.9400426166411276, + "grad_norm": 23307.556640625, + "learning_rate": 9.463863324214395e-06, + "loss": 0.4018, + "step": 182200 + }, + { + "epoch": 0.9403005866237405, + "grad_norm": 22961.353515625, + "learning_rate": 9.4424882894113e-06, + "loss": 0.3991, + "step": 182250 + }, + { + "epoch": 0.9405585566063532, + "grad_norm": 24167.134765625, + "learning_rate": 9.421134903422607e-06, + "loss": 0.4033, + "step": 182300 + }, + { + "epoch": 0.9408165265889661, + "grad_norm": 24116.75, + "learning_rate": 9.399803177646339e-06, + "loss": 0.3979, + "step": 182350 + }, + { + "epoch": 0.9410744965715789, + "grad_norm": 25658.6640625, + "learning_rate": 9.378493123468946e-06, + "loss": 0.4093, + "step": 182400 + }, + { + "epoch": 0.9413324665541918, + "grad_norm": 27761.8828125, + "learning_rate": 9.357204752265341e-06, + "loss": 0.3974, + "step": 182450 + }, + { + "epoch": 0.9415904365368046, + "grad_norm": 23456.90234375, + "learning_rate": 9.335938075398842e-06, + "loss": 0.4072, + "step": 182500 + }, + { + "epoch": 0.9418484065194174, + "grad_norm": 21258.984375, + "learning_rate": 9.314693104221184e-06, + "loss": 0.3952, + "step": 182550 + }, + { + "epoch": 0.9421063765020302, + "grad_norm": 22634.01953125, + "learning_rate": 9.293469850072522e-06, + "loss": 0.402, + "step": 182600 + }, + { + "epoch": 0.9423643464846431, + "grad_norm": 22349.267578125, + "learning_rate": 9.272268324281407e-06, + "loss": 0.3974, + "step": 182650 + }, + { + "epoch": 0.9426223164672559, + "grad_norm": 23658.505859375, + "learning_rate": 9.251088538164837e-06, + "loss": 0.3979, + "step": 182700 + }, + { + "epoch": 0.9428802864498687, + "grad_norm": 26879.39453125, + "learning_rate": 9.229930503028129e-06, + "loss": 0.3965, + "step": 182750 + }, + { + "epoch": 0.9431382564324815, + "grad_norm": 25313.255859375, + "learning_rate": 9.208794230165058e-06, + "loss": 0.4049, + "step": 182800 + }, + { + "epoch": 0.9433962264150944, + "grad_norm": 26135.587890625, + "learning_rate": 9.187679730857756e-06, + "loss": 0.408, + "step": 182850 + }, + { + "epoch": 0.9436541963977072, + "grad_norm": 24064.087890625, + "learning_rate": 9.166587016376715e-06, + "loss": 0.4025, + "step": 182900 + }, + { + "epoch": 0.9439121663803199, + "grad_norm": 24475.30859375, + "learning_rate": 9.145516097980856e-06, + "loss": 0.4019, + "step": 182950 + }, + { + "epoch": 0.9441701363629328, + "grad_norm": 23691.06640625, + "learning_rate": 9.12446698691738e-06, + "loss": 0.4031, + "step": 183000 + }, + { + "epoch": 0.9444281063455456, + "grad_norm": 25653.37109375, + "learning_rate": 9.103439694421928e-06, + "loss": 0.4007, + "step": 183050 + }, + { + "epoch": 0.9446860763281585, + "grad_norm": 22718.71875, + "learning_rate": 9.08243423171845e-06, + "loss": 0.3996, + "step": 183100 + }, + { + "epoch": 0.9449440463107712, + "grad_norm": 23337.986328125, + "learning_rate": 9.061450610019262e-06, + "loss": 0.4043, + "step": 183150 + }, + { + "epoch": 0.9452020162933841, + "grad_norm": 27628.021484375, + "learning_rate": 9.040488840525001e-06, + "loss": 0.409, + "step": 183200 + }, + { + "epoch": 0.9454599862759969, + "grad_norm": 22894.26953125, + "learning_rate": 9.01954893442467e-06, + "loss": 0.4026, + "step": 183250 + }, + { + "epoch": 0.9457179562586098, + "grad_norm": 27624.564453125, + "learning_rate": 8.998630902895566e-06, + "loss": 0.4011, + "step": 183300 + }, + { + "epoch": 0.9459759262412226, + "grad_norm": 25944.05859375, + "learning_rate": 8.977734757103351e-06, + "loss": 0.3995, + "step": 183350 + }, + { + "epoch": 0.9462338962238354, + "grad_norm": 27243.31640625, + "learning_rate": 8.95686050820197e-06, + "loss": 0.3983, + "step": 183400 + }, + { + "epoch": 0.9464918662064482, + "grad_norm": 24556.611328125, + "learning_rate": 8.936008167333699e-06, + "loss": 0.4041, + "step": 183450 + }, + { + "epoch": 0.9467498361890611, + "grad_norm": 22205.880859375, + "learning_rate": 8.915177745629112e-06, + "loss": 0.3973, + "step": 183500 + }, + { + "epoch": 0.9470078061716739, + "grad_norm": 26829.6328125, + "learning_rate": 8.894369254207069e-06, + "loss": 0.4023, + "step": 183550 + }, + { + "epoch": 0.9472657761542866, + "grad_norm": 24388.59765625, + "learning_rate": 8.873582704174776e-06, + "loss": 0.397, + "step": 183600 + }, + { + "epoch": 0.9475237461368995, + "grad_norm": 25665.98828125, + "learning_rate": 8.852818106627647e-06, + "loss": 0.4055, + "step": 183650 + }, + { + "epoch": 0.9477817161195123, + "grad_norm": 24880.47265625, + "learning_rate": 8.83207547264946e-06, + "loss": 0.4016, + "step": 183700 + }, + { + "epoch": 0.9480396861021252, + "grad_norm": 26516.6953125, + "learning_rate": 8.81135481331221e-06, + "loss": 0.3992, + "step": 183750 + }, + { + "epoch": 0.9482976560847379, + "grad_norm": 22604.123046875, + "learning_rate": 8.790656139676179e-06, + "loss": 0.401, + "step": 183800 + }, + { + "epoch": 0.9485556260673508, + "grad_norm": 24668.94921875, + "learning_rate": 8.769979462789957e-06, + "loss": 0.3974, + "step": 183850 + }, + { + "epoch": 0.9488135960499636, + "grad_norm": 26522.896484375, + "learning_rate": 8.749324793690295e-06, + "loss": 0.4048, + "step": 183900 + }, + { + "epoch": 0.9490715660325765, + "grad_norm": 26786.48046875, + "learning_rate": 8.728692143402295e-06, + "loss": 0.4075, + "step": 183950 + }, + { + "epoch": 0.9493295360151893, + "grad_norm": 23683.54296875, + "learning_rate": 8.708081522939265e-06, + "loss": 0.3996, + "step": 184000 + }, + { + "epoch": 0.9495875059978021, + "grad_norm": 23064.400390625, + "learning_rate": 8.687492943302739e-06, + "loss": 0.4036, + "step": 184050 + }, + { + "epoch": 0.9498454759804149, + "grad_norm": 24142.4921875, + "learning_rate": 8.666926415482501e-06, + "loss": 0.4023, + "step": 184100 + }, + { + "epoch": 0.9501034459630278, + "grad_norm": 24012.076171875, + "learning_rate": 8.6463819504566e-06, + "loss": 0.4024, + "step": 184150 + }, + { + "epoch": 0.9503614159456406, + "grad_norm": 22214.41015625, + "learning_rate": 8.625859559191224e-06, + "loss": 0.4002, + "step": 184200 + }, + { + "epoch": 0.9506193859282533, + "grad_norm": 24664.162109375, + "learning_rate": 8.60535925264086e-06, + "loss": 0.4027, + "step": 184250 + }, + { + "epoch": 0.9508773559108662, + "grad_norm": 21136.900390625, + "learning_rate": 8.584881041748171e-06, + "loss": 0.3957, + "step": 184300 + }, + { + "epoch": 0.951135325893479, + "grad_norm": 22411.33984375, + "learning_rate": 8.56442493744401e-06, + "loss": 0.3977, + "step": 184350 + }, + { + "epoch": 0.9513932958760919, + "grad_norm": 23004.173828125, + "learning_rate": 8.54399095064749e-06, + "loss": 0.4014, + "step": 184400 + }, + { + "epoch": 0.9516512658587046, + "grad_norm": 23692.26171875, + "learning_rate": 8.523579092265827e-06, + "loss": 0.4013, + "step": 184450 + }, + { + "epoch": 0.9519092358413175, + "grad_norm": 25310.919921875, + "learning_rate": 8.503189373194509e-06, + "loss": 0.3961, + "step": 184500 + }, + { + "epoch": 0.9521672058239303, + "grad_norm": 25963.943359375, + "learning_rate": 8.482821804317171e-06, + "loss": 0.4049, + "step": 184550 + }, + { + "epoch": 0.9524251758065432, + "grad_norm": 24282.115234375, + "learning_rate": 8.46247639650562e-06, + "loss": 0.4008, + "step": 184600 + }, + { + "epoch": 0.952683145789156, + "grad_norm": 24703.26953125, + "learning_rate": 8.442153160619837e-06, + "loss": 0.4063, + "step": 184650 + }, + { + "epoch": 0.9529411157717688, + "grad_norm": 23616.09375, + "learning_rate": 8.421852107507966e-06, + "loss": 0.3974, + "step": 184700 + }, + { + "epoch": 0.9531990857543816, + "grad_norm": 25447.408203125, + "learning_rate": 8.40157324800634e-06, + "loss": 0.4066, + "step": 184750 + }, + { + "epoch": 0.9534570557369945, + "grad_norm": 25534.3984375, + "learning_rate": 8.381316592939403e-06, + "loss": 0.4027, + "step": 184800 + }, + { + "epoch": 0.9537150257196073, + "grad_norm": 24251.138671875, + "learning_rate": 8.361082153119777e-06, + "loss": 0.3958, + "step": 184850 + }, + { + "epoch": 0.95397299570222, + "grad_norm": 26980.046875, + "learning_rate": 8.3408699393482e-06, + "loss": 0.4058, + "step": 184900 + }, + { + "epoch": 0.9542309656848329, + "grad_norm": 26143.732421875, + "learning_rate": 8.320679962413574e-06, + "loss": 0.4006, + "step": 184950 + }, + { + "epoch": 0.9544889356674457, + "grad_norm": 24566.15234375, + "learning_rate": 8.300512233092893e-06, + "loss": 0.405, + "step": 185000 + }, + { + "epoch": 0.9544889356674457, + "eval_loss": 0.3880694806575775, + "eval_runtime": 3197.8794, + "eval_samples_per_second": 969.743, + "eval_steps_per_second": 1.894, + "step": 185000 + }, + { + "epoch": 0.9547469056500586, + "grad_norm": 22463.359375, + "learning_rate": 8.280366762151349e-06, + "loss": 0.4035, + "step": 185050 + }, + { + "epoch": 0.9550048756326713, + "grad_norm": 23964.845703125, + "learning_rate": 8.260243560342146e-06, + "loss": 0.399, + "step": 185100 + }, + { + "epoch": 0.9552628456152842, + "grad_norm": 22267.978515625, + "learning_rate": 8.2401426384067e-06, + "loss": 0.4065, + "step": 185150 + }, + { + "epoch": 0.955520815597897, + "grad_norm": 23959.732421875, + "learning_rate": 8.220064007074485e-06, + "loss": 0.3988, + "step": 185200 + }, + { + "epoch": 0.9557787855805099, + "grad_norm": 22042.95703125, + "learning_rate": 8.200007677063066e-06, + "loss": 0.4005, + "step": 185250 + }, + { + "epoch": 0.9560367555631226, + "grad_norm": 23760.798828125, + "learning_rate": 8.17997365907816e-06, + "loss": 0.4043, + "step": 185300 + }, + { + "epoch": 0.9562947255457355, + "grad_norm": 23235.8828125, + "learning_rate": 8.1599619638135e-06, + "loss": 0.3999, + "step": 185350 + }, + { + "epoch": 0.9565526955283483, + "grad_norm": 22637.701171875, + "learning_rate": 8.139972601950967e-06, + "loss": 0.4004, + "step": 185400 + }, + { + "epoch": 0.9568106655109612, + "grad_norm": 28806.810546875, + "learning_rate": 8.120005584160489e-06, + "loss": 0.4022, + "step": 185450 + }, + { + "epoch": 0.957068635493574, + "grad_norm": 22143.8203125, + "learning_rate": 8.100060921100067e-06, + "loss": 0.3977, + "step": 185500 + }, + { + "epoch": 0.9573266054761868, + "grad_norm": 22921.810546875, + "learning_rate": 8.080138623415783e-06, + "loss": 0.4, + "step": 185550 + }, + { + "epoch": 0.9575845754587996, + "grad_norm": 25425.640625, + "learning_rate": 8.060238701741762e-06, + "loss": 0.4021, + "step": 185600 + }, + { + "epoch": 0.9578425454414125, + "grad_norm": 27279.6796875, + "learning_rate": 8.040361166700216e-06, + "loss": 0.4064, + "step": 185650 + }, + { + "epoch": 0.9581005154240253, + "grad_norm": 25144.322265625, + "learning_rate": 8.020506028901376e-06, + "loss": 0.4031, + "step": 185700 + }, + { + "epoch": 0.958358485406638, + "grad_norm": 21046.607421875, + "learning_rate": 8.000673298943534e-06, + "loss": 0.4041, + "step": 185750 + }, + { + "epoch": 0.9586164553892509, + "grad_norm": 23166.087890625, + "learning_rate": 7.980862987413018e-06, + "loss": 0.3996, + "step": 185800 + }, + { + "epoch": 0.9588744253718637, + "grad_norm": 23506.693359375, + "learning_rate": 7.961075104884186e-06, + "loss": 0.3973, + "step": 185850 + }, + { + "epoch": 0.9591323953544766, + "grad_norm": 25975.408203125, + "learning_rate": 7.94130966191941e-06, + "loss": 0.4048, + "step": 185900 + }, + { + "epoch": 0.9593903653370893, + "grad_norm": 23704.638671875, + "learning_rate": 7.921566669069147e-06, + "loss": 0.4045, + "step": 185950 + }, + { + "epoch": 0.9596483353197022, + "grad_norm": 27402.2421875, + "learning_rate": 7.901846136871766e-06, + "loss": 0.4007, + "step": 186000 + }, + { + "epoch": 0.959906305302315, + "grad_norm": 23186.658203125, + "learning_rate": 7.882148075853752e-06, + "loss": 0.4072, + "step": 186050 + }, + { + "epoch": 0.9601642752849279, + "grad_norm": 24789.619140625, + "learning_rate": 7.862472496529528e-06, + "loss": 0.4056, + "step": 186100 + }, + { + "epoch": 0.9604222452675407, + "grad_norm": 23849.71875, + "learning_rate": 7.842819409401524e-06, + "loss": 0.4067, + "step": 186150 + }, + { + "epoch": 0.9606802152501535, + "grad_norm": 24820.765625, + "learning_rate": 7.823188824960221e-06, + "loss": 0.4071, + "step": 186200 + }, + { + "epoch": 0.9609381852327663, + "grad_norm": 23276.568359375, + "learning_rate": 7.803580753683992e-06, + "loss": 0.3989, + "step": 186250 + }, + { + "epoch": 0.9611961552153792, + "grad_norm": 21064.8984375, + "learning_rate": 7.783995206039279e-06, + "loss": 0.3994, + "step": 186300 + }, + { + "epoch": 0.961454125197992, + "grad_norm": 27310.30078125, + "learning_rate": 7.764432192480464e-06, + "loss": 0.4015, + "step": 186350 + }, + { + "epoch": 0.9617120951806047, + "grad_norm": 24786.1796875, + "learning_rate": 7.744891723449888e-06, + "loss": 0.4042, + "step": 186400 + }, + { + "epoch": 0.9619700651632176, + "grad_norm": 22362.47265625, + "learning_rate": 7.725373809377911e-06, + "loss": 0.3991, + "step": 186450 + }, + { + "epoch": 0.9622280351458304, + "grad_norm": 23751.4296875, + "learning_rate": 7.705878460682775e-06, + "loss": 0.3988, + "step": 186500 + }, + { + "epoch": 0.9624860051284433, + "grad_norm": 22956.935546875, + "learning_rate": 7.686405687770748e-06, + "loss": 0.4049, + "step": 186550 + }, + { + "epoch": 0.962743975111056, + "grad_norm": 25276.861328125, + "learning_rate": 7.666955501036006e-06, + "loss": 0.4005, + "step": 186600 + }, + { + "epoch": 0.9630019450936689, + "grad_norm": 22390.625, + "learning_rate": 7.647527910860691e-06, + "loss": 0.4008, + "step": 186650 + }, + { + "epoch": 0.9632599150762817, + "grad_norm": 28946.125, + "learning_rate": 7.628122927614856e-06, + "loss": 0.3987, + "step": 186700 + }, + { + "epoch": 0.9635178850588946, + "grad_norm": 23663.3125, + "learning_rate": 7.608740561656541e-06, + "loss": 0.4006, + "step": 186750 + }, + { + "epoch": 0.9637758550415074, + "grad_norm": 21705.16015625, + "learning_rate": 7.589380823331632e-06, + "loss": 0.4023, + "step": 186800 + }, + { + "epoch": 0.9640338250241202, + "grad_norm": 25353.228515625, + "learning_rate": 7.570043722974019e-06, + "loss": 0.4006, + "step": 186850 + }, + { + "epoch": 0.964291795006733, + "grad_norm": 26046.412109375, + "learning_rate": 7.55072927090546e-06, + "loss": 0.3931, + "step": 186900 + }, + { + "epoch": 0.9645497649893459, + "grad_norm": 25989.2578125, + "learning_rate": 7.531437477435621e-06, + "loss": 0.3989, + "step": 186950 + }, + { + "epoch": 0.9648077349719587, + "grad_norm": 22714.423828125, + "learning_rate": 7.51216835286212e-06, + "loss": 0.4018, + "step": 187000 + }, + { + "epoch": 0.9650657049545714, + "grad_norm": 26353.42578125, + "learning_rate": 7.492921907470407e-06, + "loss": 0.4056, + "step": 187050 + }, + { + "epoch": 0.9653236749371843, + "grad_norm": 23085.212890625, + "learning_rate": 7.4736981515338864e-06, + "loss": 0.3995, + "step": 187100 + }, + { + "epoch": 0.9655816449197971, + "grad_norm": 23125.970703125, + "learning_rate": 7.454497095313817e-06, + "loss": 0.4069, + "step": 187150 + }, + { + "epoch": 0.96583961490241, + "grad_norm": 23488.2265625, + "learning_rate": 7.435318749059356e-06, + "loss": 0.4039, + "step": 187200 + }, + { + "epoch": 0.9660975848850227, + "grad_norm": 22577.46875, + "learning_rate": 7.4161631230075305e-06, + "loss": 0.4051, + "step": 187250 + }, + { + "epoch": 0.9663555548676356, + "grad_norm": 22637.890625, + "learning_rate": 7.397030227383228e-06, + "loss": 0.3986, + "step": 187300 + }, + { + "epoch": 0.9666135248502484, + "grad_norm": 26084.412109375, + "learning_rate": 7.377920072399247e-06, + "loss": 0.398, + "step": 187350 + }, + { + "epoch": 0.9668714948328613, + "grad_norm": 25263.6328125, + "learning_rate": 7.3588326682562e-06, + "loss": 0.4035, + "step": 187400 + }, + { + "epoch": 0.9671294648154741, + "grad_norm": 22348.236328125, + "learning_rate": 7.339768025142573e-06, + "loss": 0.4003, + "step": 187450 + }, + { + "epoch": 0.9673874347980869, + "grad_norm": 23006.091796875, + "learning_rate": 7.320726153234714e-06, + "loss": 0.399, + "step": 187500 + }, + { + "epoch": 0.9676454047806997, + "grad_norm": 24137.44921875, + "learning_rate": 7.301707062696794e-06, + "loss": 0.3999, + "step": 187550 + }, + { + "epoch": 0.9679033747633126, + "grad_norm": 26101.837890625, + "learning_rate": 7.282710763680828e-06, + "loss": 0.4007, + "step": 187600 + }, + { + "epoch": 0.9681613447459254, + "grad_norm": 21417.814453125, + "learning_rate": 7.263737266326709e-06, + "loss": 0.3994, + "step": 187650 + }, + { + "epoch": 0.9684193147285381, + "grad_norm": 25831.45703125, + "learning_rate": 7.244786580762075e-06, + "loss": 0.3925, + "step": 187700 + }, + { + "epoch": 0.968677284711151, + "grad_norm": 24546.84765625, + "learning_rate": 7.225858717102474e-06, + "loss": 0.4004, + "step": 187750 + }, + { + "epoch": 0.9689352546937638, + "grad_norm": 23773.09765625, + "learning_rate": 7.206953685451212e-06, + "loss": 0.4041, + "step": 187800 + }, + { + "epoch": 0.9691932246763767, + "grad_norm": 23538.923828125, + "learning_rate": 7.188071495899423e-06, + "loss": 0.3971, + "step": 187850 + }, + { + "epoch": 0.9694511946589894, + "grad_norm": 24968.310546875, + "learning_rate": 7.169212158526084e-06, + "loss": 0.4047, + "step": 187900 + }, + { + "epoch": 0.9697091646416023, + "grad_norm": 24379.23828125, + "learning_rate": 7.150375683397908e-06, + "loss": 0.3983, + "step": 187950 + }, + { + "epoch": 0.9699671346242151, + "grad_norm": 25501.638671875, + "learning_rate": 7.131562080569465e-06, + "loss": 0.4024, + "step": 188000 + }, + { + "epoch": 0.970225104606828, + "grad_norm": 24917.73046875, + "learning_rate": 7.112771360083087e-06, + "loss": 0.3998, + "step": 188050 + }, + { + "epoch": 0.9704830745894407, + "grad_norm": 24725.638671875, + "learning_rate": 7.094003531968896e-06, + "loss": 0.3964, + "step": 188100 + }, + { + "epoch": 0.9707410445720536, + "grad_norm": 23913.5703125, + "learning_rate": 7.075258606244789e-06, + "loss": 0.3987, + "step": 188150 + }, + { + "epoch": 0.9709990145546664, + "grad_norm": 25010.09375, + "learning_rate": 7.05653659291644e-06, + "loss": 0.4021, + "step": 188200 + }, + { + "epoch": 0.9712569845372793, + "grad_norm": 25357.556640625, + "learning_rate": 7.037837501977318e-06, + "loss": 0.4007, + "step": 188250 + }, + { + "epoch": 0.9715149545198921, + "grad_norm": 24599.890625, + "learning_rate": 7.019161343408625e-06, + "loss": 0.3962, + "step": 188300 + }, + { + "epoch": 0.9717729245025049, + "grad_norm": 25866.2734375, + "learning_rate": 7.000508127179328e-06, + "loss": 0.3983, + "step": 188350 + }, + { + "epoch": 0.9720308944851177, + "grad_norm": 22591.40625, + "learning_rate": 6.981877863246161e-06, + "loss": 0.3971, + "step": 188400 + }, + { + "epoch": 0.9722888644677306, + "grad_norm": 20752.091796875, + "learning_rate": 6.963270561553586e-06, + "loss": 0.3946, + "step": 188450 + }, + { + "epoch": 0.9725468344503434, + "grad_norm": 22927.109375, + "learning_rate": 6.94468623203382e-06, + "loss": 0.4036, + "step": 188500 + }, + { + "epoch": 0.9728048044329561, + "grad_norm": 27096.041015625, + "learning_rate": 6.92612488460685e-06, + "loss": 0.3982, + "step": 188550 + }, + { + "epoch": 0.973062774415569, + "grad_norm": 24426.93359375, + "learning_rate": 6.907586529180321e-06, + "loss": 0.4054, + "step": 188600 + }, + { + "epoch": 0.9733207443981818, + "grad_norm": 25097.658203125, + "learning_rate": 6.889071175649669e-06, + "loss": 0.4015, + "step": 188650 + }, + { + "epoch": 0.9735787143807947, + "grad_norm": 24646.548828125, + "learning_rate": 6.870578833898033e-06, + "loss": 0.3977, + "step": 188700 + }, + { + "epoch": 0.9738366843634074, + "grad_norm": 23465.357421875, + "learning_rate": 6.852109513796257e-06, + "loss": 0.396, + "step": 188750 + }, + { + "epoch": 0.9740946543460203, + "grad_norm": 22382.603515625, + "learning_rate": 6.83366322520293e-06, + "loss": 0.4018, + "step": 188800 + }, + { + "epoch": 0.9743526243286331, + "grad_norm": 24666.61328125, + "learning_rate": 6.815239977964283e-06, + "loss": 0.4046, + "step": 188850 + }, + { + "epoch": 0.974610594311246, + "grad_norm": 25308.685546875, + "learning_rate": 6.796839781914321e-06, + "loss": 0.3998, + "step": 188900 + }, + { + "epoch": 0.9748685642938588, + "grad_norm": 24856.64453125, + "learning_rate": 6.778462646874706e-06, + "loss": 0.4014, + "step": 188950 + }, + { + "epoch": 0.9751265342764716, + "grad_norm": 27452.50390625, + "learning_rate": 6.760108582654795e-06, + "loss": 0.4008, + "step": 189000 + }, + { + "epoch": 0.9753845042590844, + "grad_norm": 25027.416015625, + "learning_rate": 6.741777599051629e-06, + "loss": 0.4006, + "step": 189050 + }, + { + "epoch": 0.9756424742416973, + "grad_norm": 24687.740234375, + "learning_rate": 6.723469705849927e-06, + "loss": 0.4056, + "step": 189100 + }, + { + "epoch": 0.9759004442243101, + "grad_norm": 24812.55078125, + "learning_rate": 6.705184912822105e-06, + "loss": 0.4043, + "step": 189150 + }, + { + "epoch": 0.9761584142069228, + "grad_norm": 25776.005859375, + "learning_rate": 6.686923229728214e-06, + "loss": 0.4052, + "step": 189200 + }, + { + "epoch": 0.9764163841895357, + "grad_norm": 24319.34765625, + "learning_rate": 6.668684666316005e-06, + "loss": 0.4014, + "step": 189250 + }, + { + "epoch": 0.9766743541721485, + "grad_norm": 28024.419921875, + "learning_rate": 6.650469232320839e-06, + "loss": 0.3991, + "step": 189300 + }, + { + "epoch": 0.9769323241547614, + "grad_norm": 25074.068359375, + "learning_rate": 6.6322769374658085e-06, + "loss": 0.4034, + "step": 189350 + }, + { + "epoch": 0.9771902941373741, + "grad_norm": 21126.572265625, + "learning_rate": 6.61410779146156e-06, + "loss": 0.3998, + "step": 189400 + }, + { + "epoch": 0.977448264119987, + "grad_norm": 25041.337890625, + "learning_rate": 6.595961804006467e-06, + "loss": 0.4012, + "step": 189450 + }, + { + "epoch": 0.9777062341025998, + "grad_norm": 25474.263671875, + "learning_rate": 6.577838984786489e-06, + "loss": 0.3991, + "step": 189500 + }, + { + "epoch": 0.9779642040852127, + "grad_norm": 22192.98828125, + "learning_rate": 6.55973934347523e-06, + "loss": 0.3965, + "step": 189550 + }, + { + "epoch": 0.9782221740678255, + "grad_norm": 24587.9453125, + "learning_rate": 6.5416628897339625e-06, + "loss": 0.4008, + "step": 189600 + }, + { + "epoch": 0.9784801440504383, + "grad_norm": 23246.314453125, + "learning_rate": 6.523609633211497e-06, + "loss": 0.4036, + "step": 189650 + }, + { + "epoch": 0.9787381140330511, + "grad_norm": 24233.033203125, + "learning_rate": 6.505579583544353e-06, + "loss": 0.4002, + "step": 189700 + }, + { + "epoch": 0.978996084015664, + "grad_norm": 24149.6953125, + "learning_rate": 6.487572750356602e-06, + "loss": 0.4043, + "step": 189750 + }, + { + "epoch": 0.9792540539982768, + "grad_norm": 25376.3046875, + "learning_rate": 6.469589143259952e-06, + "loss": 0.3997, + "step": 189800 + }, + { + "epoch": 0.9795120239808895, + "grad_norm": 25878.90625, + "learning_rate": 6.451628771853696e-06, + "loss": 0.3936, + "step": 189850 + }, + { + "epoch": 0.9797699939635024, + "grad_norm": 24123.169921875, + "learning_rate": 6.433691645724743e-06, + "loss": 0.3976, + "step": 189900 + }, + { + "epoch": 0.9800279639461152, + "grad_norm": 23894.5625, + "learning_rate": 6.4157777744475626e-06, + "loss": 0.4025, + "step": 189950 + }, + { + "epoch": 0.9802859339287281, + "grad_norm": 27271.9609375, + "learning_rate": 6.3978871675842544e-06, + "loss": 0.4007, + "step": 190000 + }, + { + "epoch": 0.9802859339287281, + "eval_loss": 0.3872862458229065, + "eval_runtime": 3184.1416, + "eval_samples_per_second": 973.927, + "eval_steps_per_second": 1.902, + "step": 190000 + }, + { + "epoch": 0.9805439039113408, + "grad_norm": 25592.9296875, + "learning_rate": 6.380019834684475e-06, + "loss": 0.4041, + "step": 190050 + }, + { + "epoch": 0.9808018738939537, + "grad_norm": 22425.51953125, + "learning_rate": 6.362175785285457e-06, + "loss": 0.4028, + "step": 190100 + }, + { + "epoch": 0.9810598438765665, + "grad_norm": 25178.28125, + "learning_rate": 6.344355028912008e-06, + "loss": 0.3972, + "step": 190150 + }, + { + "epoch": 0.9813178138591794, + "grad_norm": 25157.537109375, + "learning_rate": 6.326557575076486e-06, + "loss": 0.3989, + "step": 190200 + }, + { + "epoch": 0.9815757838417921, + "grad_norm": 23774.67578125, + "learning_rate": 6.3087834332788695e-06, + "loss": 0.4057, + "step": 190250 + }, + { + "epoch": 0.981833753824405, + "grad_norm": 25307.736328125, + "learning_rate": 6.2910326130066035e-06, + "loss": 0.3946, + "step": 190300 + }, + { + "epoch": 0.9820917238070178, + "grad_norm": 28657.8125, + "learning_rate": 6.273305123734769e-06, + "loss": 0.4006, + "step": 190350 + }, + { + "epoch": 0.9823496937896307, + "grad_norm": 24404.603515625, + "learning_rate": 6.255600974925935e-06, + "loss": 0.3998, + "step": 190400 + }, + { + "epoch": 0.9826076637722435, + "grad_norm": 22460.1640625, + "learning_rate": 6.237920176030232e-06, + "loss": 0.4039, + "step": 190450 + }, + { + "epoch": 0.9828656337548562, + "grad_norm": 27335.625, + "learning_rate": 6.220262736485355e-06, + "loss": 0.3937, + "step": 190500 + }, + { + "epoch": 0.9831236037374691, + "grad_norm": 27996.9765625, + "learning_rate": 6.202628665716464e-06, + "loss": 0.4025, + "step": 190550 + }, + { + "epoch": 0.983381573720082, + "grad_norm": 23532.66796875, + "learning_rate": 6.18501797313632e-06, + "loss": 0.4007, + "step": 190600 + }, + { + "epoch": 0.9836395437026948, + "grad_norm": 27360.333984375, + "learning_rate": 6.167430668145146e-06, + "loss": 0.3994, + "step": 190650 + }, + { + "epoch": 0.9838975136853075, + "grad_norm": 23754.23828125, + "learning_rate": 6.149866760130718e-06, + "loss": 0.4043, + "step": 190700 + }, + { + "epoch": 0.9841554836679204, + "grad_norm": 24313.943359375, + "learning_rate": 6.1323262584683075e-06, + "loss": 0.4039, + "step": 190750 + }, + { + "epoch": 0.9844134536505332, + "grad_norm": 22932.11328125, + "learning_rate": 6.114809172520686e-06, + "loss": 0.3977, + "step": 190800 + }, + { + "epoch": 0.9846714236331461, + "grad_norm": 27614.103515625, + "learning_rate": 6.097315511638135e-06, + "loss": 0.405, + "step": 190850 + }, + { + "epoch": 0.9849293936157588, + "grad_norm": 21648.470703125, + "learning_rate": 6.079845285158447e-06, + "loss": 0.403, + "step": 190900 + }, + { + "epoch": 0.9851873635983717, + "grad_norm": 25720.76953125, + "learning_rate": 6.0623985024068854e-06, + "loss": 0.4069, + "step": 190950 + }, + { + "epoch": 0.9854453335809845, + "grad_norm": 22051.30078125, + "learning_rate": 6.044975172696199e-06, + "loss": 0.4062, + "step": 191000 + }, + { + "epoch": 0.9857033035635974, + "grad_norm": 27862.138671875, + "learning_rate": 6.027575305326621e-06, + "loss": 0.4029, + "step": 191050 + }, + { + "epoch": 0.9859612735462102, + "grad_norm": 24624.951171875, + "learning_rate": 6.010198909585862e-06, + "loss": 0.3995, + "step": 191100 + }, + { + "epoch": 0.986219243528823, + "grad_norm": 23278.45703125, + "learning_rate": 5.992845994749136e-06, + "loss": 0.3981, + "step": 191150 + }, + { + "epoch": 0.9864772135114358, + "grad_norm": 27549.26953125, + "learning_rate": 5.975516570079048e-06, + "loss": 0.3999, + "step": 191200 + }, + { + "epoch": 0.9867351834940487, + "grad_norm": 24570.40625, + "learning_rate": 5.95821064482574e-06, + "loss": 0.4052, + "step": 191250 + }, + { + "epoch": 0.9869931534766615, + "grad_norm": 23672.029296875, + "learning_rate": 5.9409282282267665e-06, + "loss": 0.4045, + "step": 191300 + }, + { + "epoch": 0.9872511234592742, + "grad_norm": 22627.697265625, + "learning_rate": 5.923669329507148e-06, + "loss": 0.4017, + "step": 191350 + }, + { + "epoch": 0.9875090934418871, + "grad_norm": 22583.0390625, + "learning_rate": 5.906433957879365e-06, + "loss": 0.399, + "step": 191400 + }, + { + "epoch": 0.9877670634244999, + "grad_norm": 22665.984375, + "learning_rate": 5.889222122543298e-06, + "loss": 0.3989, + "step": 191450 + }, + { + "epoch": 0.9880250334071128, + "grad_norm": 25125.6640625, + "learning_rate": 5.872033832686319e-06, + "loss": 0.4001, + "step": 191500 + }, + { + "epoch": 0.9882830033897255, + "grad_norm": 24863.34375, + "learning_rate": 5.8548690974831845e-06, + "loss": 0.3991, + "step": 191550 + }, + { + "epoch": 0.9885409733723384, + "grad_norm": 23538.44921875, + "learning_rate": 5.837727926096109e-06, + "loss": 0.3979, + "step": 191600 + }, + { + "epoch": 0.9887989433549512, + "grad_norm": 23396.3203125, + "learning_rate": 5.820610327674708e-06, + "loss": 0.4049, + "step": 191650 + }, + { + "epoch": 0.9890569133375641, + "grad_norm": 22553.01171875, + "learning_rate": 5.803516311356044e-06, + "loss": 0.3983, + "step": 191700 + }, + { + "epoch": 0.9893148833201769, + "grad_norm": 25163.04296875, + "learning_rate": 5.786445886264541e-06, + "loss": 0.3969, + "step": 191750 + }, + { + "epoch": 0.9895728533027897, + "grad_norm": 22826.181640625, + "learning_rate": 5.769399061512093e-06, + "loss": 0.4016, + "step": 191800 + }, + { + "epoch": 0.9898308232854025, + "grad_norm": 22302.7265625, + "learning_rate": 5.752375846197944e-06, + "loss": 0.3988, + "step": 191850 + }, + { + "epoch": 0.9900887932680154, + "grad_norm": 20985.990234375, + "learning_rate": 5.735376249408753e-06, + "loss": 0.3952, + "step": 191900 + }, + { + "epoch": 0.9903467632506282, + "grad_norm": 23513.19921875, + "learning_rate": 5.718400280218611e-06, + "loss": 0.4052, + "step": 191950 + }, + { + "epoch": 0.9906047332332409, + "grad_norm": 23184.818359375, + "learning_rate": 5.7014479476889145e-06, + "loss": 0.399, + "step": 192000 + }, + { + "epoch": 0.9908627032158538, + "grad_norm": 23472.9453125, + "learning_rate": 5.684519260868521e-06, + "loss": 0.3946, + "step": 192050 + }, + { + "epoch": 0.9911206731984666, + "grad_norm": 26255.388671875, + "learning_rate": 5.667614228793622e-06, + "loss": 0.3964, + "step": 192100 + }, + { + "epoch": 0.9913786431810795, + "grad_norm": 23894.54296875, + "learning_rate": 5.650732860487806e-06, + "loss": 0.3928, + "step": 192150 + }, + { + "epoch": 0.9916366131636922, + "grad_norm": 24135.478515625, + "learning_rate": 5.633875164962016e-06, + "loss": 0.4019, + "step": 192200 + }, + { + "epoch": 0.9918945831463051, + "grad_norm": 26928.08984375, + "learning_rate": 5.617041151214553e-06, + "loss": 0.3958, + "step": 192250 + }, + { + "epoch": 0.9921525531289179, + "grad_norm": 22469.884765625, + "learning_rate": 5.600230828231107e-06, + "loss": 0.4031, + "step": 192300 + }, + { + "epoch": 0.9924105231115308, + "grad_norm": 23694.59765625, + "learning_rate": 5.583444204984695e-06, + "loss": 0.3926, + "step": 192350 + }, + { + "epoch": 0.9926684930941435, + "grad_norm": 23482.986328125, + "learning_rate": 5.566681290435688e-06, + "loss": 0.4112, + "step": 192400 + }, + { + "epoch": 0.9929264630767564, + "grad_norm": 22524.994140625, + "learning_rate": 5.549942093531812e-06, + "loss": 0.3981, + "step": 192450 + }, + { + "epoch": 0.9931844330593692, + "grad_norm": 27258.35546875, + "learning_rate": 5.5332266232081155e-06, + "loss": 0.4024, + "step": 192500 + }, + { + "epoch": 0.9934424030419821, + "grad_norm": 19928.40625, + "learning_rate": 5.516534888386992e-06, + "loss": 0.4028, + "step": 192550 + }, + { + "epoch": 0.9937003730245949, + "grad_norm": 21809.205078125, + "learning_rate": 5.499866897978189e-06, + "loss": 0.3996, + "step": 192600 + }, + { + "epoch": 0.9939583430072076, + "grad_norm": 22132.6171875, + "learning_rate": 5.483222660878729e-06, + "loss": 0.4012, + "step": 192650 + }, + { + "epoch": 0.9942163129898205, + "grad_norm": 25306.728515625, + "learning_rate": 5.466602185973002e-06, + "loss": 0.3987, + "step": 192700 + }, + { + "epoch": 0.9944742829724333, + "grad_norm": 29266.78515625, + "learning_rate": 5.4500054821326865e-06, + "loss": 0.4028, + "step": 192750 + }, + { + "epoch": 0.9947322529550462, + "grad_norm": 23506.931640625, + "learning_rate": 5.433432558216778e-06, + "loss": 0.3948, + "step": 192800 + }, + { + "epoch": 0.9949902229376589, + "grad_norm": 22564.177734375, + "learning_rate": 5.416883423071606e-06, + "loss": 0.4015, + "step": 192850 + }, + { + "epoch": 0.9952481929202718, + "grad_norm": 24564.380859375, + "learning_rate": 5.400358085530738e-06, + "loss": 0.4046, + "step": 192900 + }, + { + "epoch": 0.9955061629028846, + "grad_norm": 24793.91796875, + "learning_rate": 5.383856554415117e-06, + "loss": 0.4003, + "step": 192950 + }, + { + "epoch": 0.9957641328854975, + "grad_norm": 23798.228515625, + "learning_rate": 5.367378838532927e-06, + "loss": 0.3982, + "step": 193000 + }, + { + "epoch": 0.9960221028681102, + "grad_norm": 23164.642578125, + "learning_rate": 5.350924946679653e-06, + "loss": 0.3977, + "step": 193050 + }, + { + "epoch": 0.9962800728507231, + "grad_norm": 25646.29296875, + "learning_rate": 5.334494887638058e-06, + "loss": 0.3992, + "step": 193100 + }, + { + "epoch": 0.9965380428333359, + "grad_norm": 24146.2421875, + "learning_rate": 5.318088670178189e-06, + "loss": 0.4037, + "step": 193150 + }, + { + "epoch": 0.9967960128159488, + "grad_norm": 22594.72265625, + "learning_rate": 5.301706303057386e-06, + "loss": 0.4004, + "step": 193200 + }, + { + "epoch": 0.9970539827985616, + "grad_norm": 23395.515625, + "learning_rate": 5.285347795020224e-06, + "loss": 0.3958, + "step": 193250 + }, + { + "epoch": 0.9973119527811743, + "grad_norm": 23383.431640625, + "learning_rate": 5.269013154798558e-06, + "loss": 0.3998, + "step": 193300 + }, + { + "epoch": 0.9975699227637872, + "grad_norm": 20586.341796875, + "learning_rate": 5.252702391111508e-06, + "loss": 0.3979, + "step": 193350 + }, + { + "epoch": 0.9978278927464, + "grad_norm": 26526.83203125, + "learning_rate": 5.236415512665438e-06, + "loss": 0.4036, + "step": 193400 + }, + { + "epoch": 0.9980858627290129, + "grad_norm": 25045.224609375, + "learning_rate": 5.220152528153965e-06, + "loss": 0.4028, + "step": 193450 + }, + { + "epoch": 0.9983438327116256, + "grad_norm": 23480.755859375, + "learning_rate": 5.20391344625798e-06, + "loss": 0.4053, + "step": 193500 + }, + { + "epoch": 0.9986018026942385, + "grad_norm": 25235.927734375, + "learning_rate": 5.187698275645553e-06, + "loss": 0.3964, + "step": 193550 + }, + { + "epoch": 0.9988597726768513, + "grad_norm": 24883.29296875, + "learning_rate": 5.1715070249720555e-06, + "loss": 0.3978, + "step": 193600 + }, + { + "epoch": 0.9991177426594642, + "grad_norm": 25161.71484375, + "learning_rate": 5.155339702880052e-06, + "loss": 0.3998, + "step": 193650 + }, + { + "epoch": 0.9993757126420769, + "grad_norm": 21524.724609375, + "learning_rate": 5.13919631799934e-06, + "loss": 0.3955, + "step": 193700 + }, + { + "epoch": 0.9996336826246898, + "grad_norm": 23394.1015625, + "learning_rate": 5.123076878946981e-06, + "loss": 0.3962, + "step": 193750 + }, + { + "epoch": 0.9998916526073026, + "grad_norm": 24562.419921875, + "learning_rate": 5.106981394327165e-06, + "loss": 0.4, + "step": 193800 + }, + { + "epoch": 1.0001496225899154, + "grad_norm": 23818.201171875, + "learning_rate": 5.090909872731392e-06, + "loss": 0.4065, + "step": 193850 + }, + { + "epoch": 1.0004075925725282, + "grad_norm": 25973.83984375, + "learning_rate": 5.074862322738316e-06, + "loss": 0.4015, + "step": 193900 + }, + { + "epoch": 1.000665562555141, + "grad_norm": 26476.041015625, + "learning_rate": 5.0588387529138085e-06, + "loss": 0.401, + "step": 193950 + }, + { + "epoch": 1.000923532537754, + "grad_norm": 22776.267578125, + "learning_rate": 5.042839171810937e-06, + "loss": 0.4021, + "step": 194000 + }, + { + "epoch": 1.0011815025203668, + "grad_norm": 22484.884765625, + "learning_rate": 5.026863587969966e-06, + "loss": 0.4013, + "step": 194050 + }, + { + "epoch": 1.0014394725029796, + "grad_norm": 21445.009765625, + "learning_rate": 5.010912009918361e-06, + "loss": 0.4001, + "step": 194100 + }, + { + "epoch": 1.0016974424855924, + "grad_norm": 23748.365234375, + "learning_rate": 4.994984446170764e-06, + "loss": 0.3985, + "step": 194150 + }, + { + "epoch": 1.0019554124682053, + "grad_norm": 25007.73828125, + "learning_rate": 4.9790809052289996e-06, + "loss": 0.403, + "step": 194200 + }, + { + "epoch": 1.002213382450818, + "grad_norm": 26824.900390625, + "learning_rate": 4.963201395582062e-06, + "loss": 0.3966, + "step": 194250 + }, + { + "epoch": 1.0024713524334308, + "grad_norm": 21838.662109375, + "learning_rate": 4.947345925706148e-06, + "loss": 0.3955, + "step": 194300 + }, + { + "epoch": 1.0027293224160436, + "grad_norm": 20830.59375, + "learning_rate": 4.931514504064566e-06, + "loss": 0.3976, + "step": 194350 + }, + { + "epoch": 1.0029872923986565, + "grad_norm": 24187.484375, + "learning_rate": 4.915707139107856e-06, + "loss": 0.4009, + "step": 194400 + }, + { + "epoch": 1.0032452623812693, + "grad_norm": 23026.99609375, + "learning_rate": 4.899923839273662e-06, + "loss": 0.4017, + "step": 194450 + }, + { + "epoch": 1.0035032323638822, + "grad_norm": 25855.919921875, + "learning_rate": 4.884164612986808e-06, + "loss": 0.3966, + "step": 194500 + }, + { + "epoch": 1.003761202346495, + "grad_norm": 23424.58984375, + "learning_rate": 4.86842946865928e-06, + "loss": 0.4007, + "step": 194550 + }, + { + "epoch": 1.0040191723291079, + "grad_norm": 20644.318359375, + "learning_rate": 4.852718414690166e-06, + "loss": 0.405, + "step": 194600 + }, + { + "epoch": 1.0042771423117207, + "grad_norm": 24923.30078125, + "learning_rate": 4.8370314594657405e-06, + "loss": 0.3961, + "step": 194650 + }, + { + "epoch": 1.0045351122943333, + "grad_norm": 23334.19921875, + "learning_rate": 4.821368611359395e-06, + "loss": 0.3981, + "step": 194700 + }, + { + "epoch": 1.0047930822769462, + "grad_norm": 24258.54296875, + "learning_rate": 4.8057298787316516e-06, + "loss": 0.3998, + "step": 194750 + }, + { + "epoch": 1.005051052259559, + "grad_norm": 23366.234375, + "learning_rate": 4.790115269930162e-06, + "loss": 0.3998, + "step": 194800 + }, + { + "epoch": 1.005309022242172, + "grad_norm": 22389.498046875, + "learning_rate": 4.774524793289692e-06, + "loss": 0.4025, + "step": 194850 + }, + { + "epoch": 1.0055669922247847, + "grad_norm": 25497.361328125, + "learning_rate": 4.758958457132157e-06, + "loss": 0.3979, + "step": 194900 + }, + { + "epoch": 1.0058249622073976, + "grad_norm": 24179.626953125, + "learning_rate": 4.7434162697665595e-06, + "loss": 0.3984, + "step": 194950 + }, + { + "epoch": 1.0060829321900104, + "grad_norm": 24002.955078125, + "learning_rate": 4.727898239489015e-06, + "loss": 0.398, + "step": 195000 + }, + { + "epoch": 1.0060829321900104, + "eval_loss": 0.3868441879749298, + "eval_runtime": 3205.6792, + "eval_samples_per_second": 967.383, + "eval_steps_per_second": 1.889, + "step": 195000 + }, + { + "epoch": 1.0063409021726233, + "grad_norm": 26567.27734375, + "learning_rate": 4.712404374582741e-06, + "loss": 0.399, + "step": 195050 + }, + { + "epoch": 1.006598872155236, + "grad_norm": 25244.615234375, + "learning_rate": 4.696934683318077e-06, + "loss": 0.3998, + "step": 195100 + }, + { + "epoch": 1.0068568421378488, + "grad_norm": 23278.265625, + "learning_rate": 4.6814891739524195e-06, + "loss": 0.4002, + "step": 195150 + }, + { + "epoch": 1.0071148121204616, + "grad_norm": 23141.138671875, + "learning_rate": 4.666067854730322e-06, + "loss": 0.3965, + "step": 195200 + }, + { + "epoch": 1.0073727821030745, + "grad_norm": 23506.640625, + "learning_rate": 4.650670733883344e-06, + "loss": 0.3962, + "step": 195250 + }, + { + "epoch": 1.0076307520856873, + "grad_norm": 26591.212890625, + "learning_rate": 4.635297819630202e-06, + "loss": 0.3992, + "step": 195300 + }, + { + "epoch": 1.0078887220683002, + "grad_norm": 22111.640625, + "learning_rate": 4.619949120176642e-06, + "loss": 0.401, + "step": 195350 + }, + { + "epoch": 1.008146692050913, + "grad_norm": 25048.17578125, + "learning_rate": 4.604624643715505e-06, + "loss": 0.4016, + "step": 195400 + }, + { + "epoch": 1.0084046620335259, + "grad_norm": 23263.23828125, + "learning_rate": 4.589324398426714e-06, + "loss": 0.3942, + "step": 195450 + }, + { + "epoch": 1.0086626320161387, + "grad_norm": 23640.9296875, + "learning_rate": 4.57404839247722e-06, + "loss": 0.4039, + "step": 195500 + }, + { + "epoch": 1.0089206019987513, + "grad_norm": 25680.390625, + "learning_rate": 4.558796634021079e-06, + "loss": 0.3986, + "step": 195550 + }, + { + "epoch": 1.0091785719813642, + "grad_norm": 23321.78125, + "learning_rate": 4.543569131199382e-06, + "loss": 0.4039, + "step": 195600 + }, + { + "epoch": 1.009436541963977, + "grad_norm": 24123.205078125, + "learning_rate": 4.528365892140263e-06, + "loss": 0.397, + "step": 195650 + }, + { + "epoch": 1.0096945119465899, + "grad_norm": 23332.673828125, + "learning_rate": 4.513186924958928e-06, + "loss": 0.3941, + "step": 195700 + }, + { + "epoch": 1.0099524819292027, + "grad_norm": 25583.609375, + "learning_rate": 4.498032237757605e-06, + "loss": 0.4046, + "step": 195750 + }, + { + "epoch": 1.0102104519118156, + "grad_norm": 25230.3515625, + "learning_rate": 4.482901838625586e-06, + "loss": 0.4012, + "step": 195800 + }, + { + "epoch": 1.0104684218944284, + "grad_norm": 24376.5859375, + "learning_rate": 4.46779573563918e-06, + "loss": 0.3911, + "step": 195850 + }, + { + "epoch": 1.0107263918770413, + "grad_norm": 23978.17578125, + "learning_rate": 4.452713936861724e-06, + "loss": 0.4031, + "step": 195900 + }, + { + "epoch": 1.010984361859654, + "grad_norm": 23535.03515625, + "learning_rate": 4.437656450343602e-06, + "loss": 0.3933, + "step": 195950 + }, + { + "epoch": 1.0112423318422668, + "grad_norm": 24465.794921875, + "learning_rate": 4.422623284122207e-06, + "loss": 0.4027, + "step": 196000 + }, + { + "epoch": 1.0115003018248796, + "grad_norm": 23942.03125, + "learning_rate": 4.407614446221936e-06, + "loss": 0.4024, + "step": 196050 + }, + { + "epoch": 1.0117582718074924, + "grad_norm": 23610.720703125, + "learning_rate": 4.392629944654248e-06, + "loss": 0.3982, + "step": 196100 + }, + { + "epoch": 1.0120162417901053, + "grad_norm": 25937.53125, + "learning_rate": 4.3776697874175375e-06, + "loss": 0.3991, + "step": 196150 + }, + { + "epoch": 1.0122742117727181, + "grad_norm": 24008.5234375, + "learning_rate": 4.362733982497286e-06, + "loss": 0.3968, + "step": 196200 + }, + { + "epoch": 1.012532181755331, + "grad_norm": 23377.744140625, + "learning_rate": 4.347822537865914e-06, + "loss": 0.3958, + "step": 196250 + }, + { + "epoch": 1.0127901517379438, + "grad_norm": 23768.7421875, + "learning_rate": 4.332935461482862e-06, + "loss": 0.4004, + "step": 196300 + }, + { + "epoch": 1.0130481217205567, + "grad_norm": 25974.603515625, + "learning_rate": 4.3180727612945896e-06, + "loss": 0.4038, + "step": 196350 + }, + { + "epoch": 1.0133060917031693, + "grad_norm": 22376.34765625, + "learning_rate": 4.303234445234477e-06, + "loss": 0.3991, + "step": 196400 + }, + { + "epoch": 1.0135640616857822, + "grad_norm": 22145.03515625, + "learning_rate": 4.288420521222963e-06, + "loss": 0.3971, + "step": 196450 + }, + { + "epoch": 1.013822031668395, + "grad_norm": 21512.77734375, + "learning_rate": 4.273630997167422e-06, + "loss": 0.399, + "step": 196500 + }, + { + "epoch": 1.0140800016510079, + "grad_norm": 22957.626953125, + "learning_rate": 4.258865880962215e-06, + "loss": 0.3995, + "step": 196550 + }, + { + "epoch": 1.0143379716336207, + "grad_norm": 21951.89453125, + "learning_rate": 4.244125180488673e-06, + "loss": 0.3961, + "step": 196600 + }, + { + "epoch": 1.0145959416162336, + "grad_norm": 23440.005859375, + "learning_rate": 4.229408903615095e-06, + "loss": 0.4057, + "step": 196650 + }, + { + "epoch": 1.0148539115988464, + "grad_norm": 23987.21484375, + "learning_rate": 4.214717058196754e-06, + "loss": 0.3999, + "step": 196700 + }, + { + "epoch": 1.0151118815814593, + "grad_norm": 24526.482421875, + "learning_rate": 4.200049652075866e-06, + "loss": 0.3964, + "step": 196750 + }, + { + "epoch": 1.0153698515640721, + "grad_norm": 23351.193359375, + "learning_rate": 4.185406693081612e-06, + "loss": 0.3978, + "step": 196800 + }, + { + "epoch": 1.0156278215466847, + "grad_norm": 25014.873046875, + "learning_rate": 4.170788189030106e-06, + "loss": 0.3963, + "step": 196850 + }, + { + "epoch": 1.0158857915292976, + "grad_norm": 21085.181640625, + "learning_rate": 4.156194147724451e-06, + "loss": 0.4015, + "step": 196900 + }, + { + "epoch": 1.0161437615119104, + "grad_norm": 20203.427734375, + "learning_rate": 4.141624576954634e-06, + "loss": 0.4037, + "step": 196950 + }, + { + "epoch": 1.0164017314945233, + "grad_norm": 23869.416015625, + "learning_rate": 4.1270794844976255e-06, + "loss": 0.4038, + "step": 197000 + }, + { + "epoch": 1.0166597014771361, + "grad_norm": 24936.158203125, + "learning_rate": 4.112558878117318e-06, + "loss": 0.4073, + "step": 197050 + }, + { + "epoch": 1.016917671459749, + "grad_norm": 23021.921875, + "learning_rate": 4.098062765564509e-06, + "loss": 0.4056, + "step": 197100 + }, + { + "epoch": 1.0171756414423618, + "grad_norm": 21626.19921875, + "learning_rate": 4.083591154576971e-06, + "loss": 0.3989, + "step": 197150 + }, + { + "epoch": 1.0174336114249747, + "grad_norm": 25556.169921875, + "learning_rate": 4.069144052879342e-06, + "loss": 0.3975, + "step": 197200 + }, + { + "epoch": 1.0176915814075873, + "grad_norm": 23286.365234375, + "learning_rate": 4.054721468183226e-06, + "loss": 0.3974, + "step": 197250 + }, + { + "epoch": 1.0179495513902002, + "grad_norm": 24497.57421875, + "learning_rate": 4.040323408187113e-06, + "loss": 0.4028, + "step": 197300 + }, + { + "epoch": 1.018207521372813, + "grad_norm": 26279.40625, + "learning_rate": 4.025949880576407e-06, + "loss": 0.4034, + "step": 197350 + }, + { + "epoch": 1.0184654913554259, + "grad_norm": 22679.267578125, + "learning_rate": 4.011600893023421e-06, + "loss": 0.3991, + "step": 197400 + }, + { + "epoch": 1.0187234613380387, + "grad_norm": 25421.83984375, + "learning_rate": 3.997276453187365e-06, + "loss": 0.4023, + "step": 197450 + }, + { + "epoch": 1.0189814313206516, + "grad_norm": 25313.75, + "learning_rate": 3.982976568714336e-06, + "loss": 0.4018, + "step": 197500 + }, + { + "epoch": 1.0192394013032644, + "grad_norm": 24318.505859375, + "learning_rate": 3.96870124723736e-06, + "loss": 0.4027, + "step": 197550 + }, + { + "epoch": 1.0194973712858773, + "grad_norm": 22409.70703125, + "learning_rate": 3.9544504963763105e-06, + "loss": 0.3982, + "step": 197600 + }, + { + "epoch": 1.01975534126849, + "grad_norm": 25028.7265625, + "learning_rate": 3.9402243237379675e-06, + "loss": 0.4037, + "step": 197650 + }, + { + "epoch": 1.0200133112511027, + "grad_norm": 21235.19140625, + "learning_rate": 3.926022736915985e-06, + "loss": 0.3972, + "step": 197700 + }, + { + "epoch": 1.0202712812337156, + "grad_norm": 24214.41015625, + "learning_rate": 3.911845743490889e-06, + "loss": 0.3984, + "step": 197750 + }, + { + "epoch": 1.0205292512163284, + "grad_norm": 24445.375, + "learning_rate": 3.897693351030102e-06, + "loss": 0.4025, + "step": 197800 + }, + { + "epoch": 1.0207872211989413, + "grad_norm": 25233.3515625, + "learning_rate": 3.883565567087871e-06, + "loss": 0.3993, + "step": 197850 + }, + { + "epoch": 1.0210451911815541, + "grad_norm": 23982.43359375, + "learning_rate": 3.8694623992053534e-06, + "loss": 0.4023, + "step": 197900 + }, + { + "epoch": 1.021303161164167, + "grad_norm": 28533.689453125, + "learning_rate": 3.855383854910549e-06, + "loss": 0.3917, + "step": 197950 + }, + { + "epoch": 1.0215611311467798, + "grad_norm": 26334.77734375, + "learning_rate": 3.841329941718286e-06, + "loss": 0.3989, + "step": 198000 + }, + { + "epoch": 1.0218191011293927, + "grad_norm": 24765.802734375, + "learning_rate": 3.827300667130312e-06, + "loss": 0.398, + "step": 198050 + }, + { + "epoch": 1.0220770711120055, + "grad_norm": 25089.34765625, + "learning_rate": 3.8132960386351445e-06, + "loss": 0.4049, + "step": 198100 + }, + { + "epoch": 1.0223350410946181, + "grad_norm": 23840.72265625, + "learning_rate": 3.7993160637082027e-06, + "loss": 0.3998, + "step": 198150 + }, + { + "epoch": 1.022593011077231, + "grad_norm": 21590.1328125, + "learning_rate": 3.7853607498117282e-06, + "loss": 0.404, + "step": 198200 + }, + { + "epoch": 1.0228509810598438, + "grad_norm": 24620.478515625, + "learning_rate": 3.7714301043947855e-06, + "loss": 0.3958, + "step": 198250 + }, + { + "epoch": 1.0231089510424567, + "grad_norm": 22476.82421875, + "learning_rate": 3.757524134893292e-06, + "loss": 0.3993, + "step": 198300 + }, + { + "epoch": 1.0233669210250695, + "grad_norm": 22550.45703125, + "learning_rate": 3.7436428487299836e-06, + "loss": 0.3983, + "step": 198350 + }, + { + "epoch": 1.0236248910076824, + "grad_norm": 23764.958984375, + "learning_rate": 3.7297862533144045e-06, + "loss": 0.4005, + "step": 198400 + }, + { + "epoch": 1.0238828609902952, + "grad_norm": 23600.103515625, + "learning_rate": 3.7159543560429667e-06, + "loss": 0.3976, + "step": 198450 + }, + { + "epoch": 1.024140830972908, + "grad_norm": 24258.537109375, + "learning_rate": 3.7021471642988583e-06, + "loss": 0.4015, + "step": 198500 + }, + { + "epoch": 1.0243988009555207, + "grad_norm": 22559.609375, + "learning_rate": 3.6883646854520837e-06, + "loss": 0.4028, + "step": 198550 + }, + { + "epoch": 1.0246567709381336, + "grad_norm": 20827.234375, + "learning_rate": 3.67460692685947e-06, + "loss": 0.3954, + "step": 198600 + }, + { + "epoch": 1.0249147409207464, + "grad_norm": 24864.171875, + "learning_rate": 3.6608738958646303e-06, + "loss": 0.3919, + "step": 198650 + }, + { + "epoch": 1.0251727109033593, + "grad_norm": 25603.6796875, + "learning_rate": 3.647165599798019e-06, + "loss": 0.3984, + "step": 198700 + }, + { + "epoch": 1.0254306808859721, + "grad_norm": 21448.0234375, + "learning_rate": 3.6334820459768217e-06, + "loss": 0.4031, + "step": 198750 + }, + { + "epoch": 1.025688650868585, + "grad_norm": 24923.51953125, + "learning_rate": 3.6198232417050782e-06, + "loss": 0.4023, + "step": 198800 + }, + { + "epoch": 1.0259466208511978, + "grad_norm": 21672.09765625, + "learning_rate": 3.6061891942735957e-06, + "loss": 0.4027, + "step": 198850 + }, + { + "epoch": 1.0262045908338107, + "grad_norm": 24733.31640625, + "learning_rate": 3.5925799109599423e-06, + "loss": 0.401, + "step": 198900 + }, + { + "epoch": 1.0264625608164235, + "grad_norm": 25941.05859375, + "learning_rate": 3.5789953990285284e-06, + "loss": 0.3944, + "step": 198950 + }, + { + "epoch": 1.0267205307990361, + "grad_norm": 25462.96875, + "learning_rate": 3.56543566573046e-06, + "loss": 0.4021, + "step": 199000 + }, + { + "epoch": 1.026978500781649, + "grad_norm": 24243.462890625, + "learning_rate": 3.5519007183036856e-06, + "loss": 0.4009, + "step": 199050 + }, + { + "epoch": 1.0272364707642618, + "grad_norm": 22507.208984375, + "learning_rate": 3.5383905639728987e-06, + "loss": 0.3968, + "step": 199100 + }, + { + "epoch": 1.0274944407468747, + "grad_norm": 22496.060546875, + "learning_rate": 3.524905209949553e-06, + "loss": 0.3988, + "step": 199150 + }, + { + "epoch": 1.0277524107294875, + "grad_norm": 22755.974609375, + "learning_rate": 3.511444663431862e-06, + "loss": 0.3944, + "step": 199200 + }, + { + "epoch": 1.0280103807121004, + "grad_norm": 24945.93359375, + "learning_rate": 3.498008931604818e-06, + "loss": 0.4015, + "step": 199250 + }, + { + "epoch": 1.0282683506947132, + "grad_norm": 23216.15625, + "learning_rate": 3.484598021640134e-06, + "loss": 0.3982, + "step": 199300 + }, + { + "epoch": 1.028526320677326, + "grad_norm": 24690.8203125, + "learning_rate": 3.4712119406963174e-06, + "loss": 0.4, + "step": 199350 + }, + { + "epoch": 1.0287842906599387, + "grad_norm": 23324.27734375, + "learning_rate": 3.4578506959185907e-06, + "loss": 0.4005, + "step": 199400 + }, + { + "epoch": 1.0290422606425516, + "grad_norm": 22831.544921875, + "learning_rate": 3.444514294438922e-06, + "loss": 0.3987, + "step": 199450 + }, + { + "epoch": 1.0293002306251644, + "grad_norm": 22126.681640625, + "learning_rate": 3.4312027433760383e-06, + "loss": 0.4044, + "step": 199500 + }, + { + "epoch": 1.0295582006077773, + "grad_norm": 22105.94140625, + "learning_rate": 3.417916049835368e-06, + "loss": 0.4023, + "step": 199550 + }, + { + "epoch": 1.02981617059039, + "grad_norm": 24164.646484375, + "learning_rate": 3.4046542209091037e-06, + "loss": 0.3968, + "step": 199600 + }, + { + "epoch": 1.030074140573003, + "grad_norm": 23752.33203125, + "learning_rate": 3.3914172636761554e-06, + "loss": 0.3974, + "step": 199650 + }, + { + "epoch": 1.0303321105556158, + "grad_norm": 21793.787109375, + "learning_rate": 3.3782051852021433e-06, + "loss": 0.3981, + "step": 199700 + }, + { + "epoch": 1.0305900805382286, + "grad_norm": 26727.91796875, + "learning_rate": 3.365017992539432e-06, + "loss": 0.4025, + "step": 199750 + }, + { + "epoch": 1.0308480505208415, + "grad_norm": 21089.958984375, + "learning_rate": 3.3518556927270683e-06, + "loss": 0.4001, + "step": 199800 + }, + { + "epoch": 1.0311060205034541, + "grad_norm": 23690.0390625, + "learning_rate": 3.33871829279086e-06, + "loss": 0.3956, + "step": 199850 + }, + { + "epoch": 1.031363990486067, + "grad_norm": 24266.84375, + "learning_rate": 3.325605799743281e-06, + "loss": 0.3966, + "step": 199900 + }, + { + "epoch": 1.0316219604686798, + "grad_norm": 22199.455078125, + "learning_rate": 3.312518220583527e-06, + "loss": 0.4058, + "step": 199950 + }, + { + "epoch": 1.0318799304512927, + "grad_norm": 21272.033203125, + "learning_rate": 3.299455562297504e-06, + "loss": 0.3969, + "step": 200000 + }, + { + "epoch": 1.0318799304512927, + "eval_loss": 0.38684460520744324, + "eval_runtime": 3230.0057, + "eval_samples_per_second": 960.097, + "eval_steps_per_second": 1.875, + "step": 200000 + }, + { + "epoch": 1.0321379004339055, + "grad_norm": 23089.7578125, + "learning_rate": 3.286417831857791e-06, + "loss": 0.4011, + "step": 200050 + }, + { + "epoch": 1.0323958704165184, + "grad_norm": 27875.5859375, + "learning_rate": 3.2734050362236814e-06, + "loss": 0.4014, + "step": 200100 + }, + { + "epoch": 1.0326538403991312, + "grad_norm": 22023.40234375, + "learning_rate": 3.260417182341169e-06, + "loss": 0.398, + "step": 200150 + }, + { + "epoch": 1.032911810381744, + "grad_norm": 23899.208984375, + "learning_rate": 3.247454277142892e-06, + "loss": 0.3976, + "step": 200200 + }, + { + "epoch": 1.0331697803643567, + "grad_norm": 22874.44921875, + "learning_rate": 3.2345163275482147e-06, + "loss": 0.4014, + "step": 200250 + }, + { + "epoch": 1.0334277503469695, + "grad_norm": 21650.296875, + "learning_rate": 3.221603340463164e-06, + "loss": 0.4012, + "step": 200300 + }, + { + "epoch": 1.0336857203295824, + "grad_norm": 24189.89453125, + "learning_rate": 3.2087153227804314e-06, + "loss": 0.401, + "step": 200350 + }, + { + "epoch": 1.0339436903121952, + "grad_norm": 21525.12109375, + "learning_rate": 3.1958522813794134e-06, + "loss": 0.4016, + "step": 200400 + }, + { + "epoch": 1.034201660294808, + "grad_norm": 23732.640625, + "learning_rate": 3.1830142231261294e-06, + "loss": 0.4021, + "step": 200450 + }, + { + "epoch": 1.034459630277421, + "grad_norm": 24911.607421875, + "learning_rate": 3.170201154873298e-06, + "loss": 0.3943, + "step": 200500 + }, + { + "epoch": 1.0347176002600338, + "grad_norm": 25295.861328125, + "learning_rate": 3.1574130834602813e-06, + "loss": 0.401, + "step": 200550 + }, + { + "epoch": 1.0349755702426466, + "grad_norm": 23536.498046875, + "learning_rate": 3.1446500157131075e-06, + "loss": 0.3964, + "step": 200600 + }, + { + "epoch": 1.0352335402252595, + "grad_norm": 26484.287109375, + "learning_rate": 3.131911958444461e-06, + "loss": 0.4068, + "step": 200650 + }, + { + "epoch": 1.0354915102078721, + "grad_norm": 24330.001953125, + "learning_rate": 3.1191989184536474e-06, + "loss": 0.3911, + "step": 200700 + }, + { + "epoch": 1.035749480190485, + "grad_norm": 21095.994140625, + "learning_rate": 3.1065109025266713e-06, + "loss": 0.4, + "step": 200750 + }, + { + "epoch": 1.0360074501730978, + "grad_norm": 21829.64453125, + "learning_rate": 3.093847917436132e-06, + "loss": 0.4016, + "step": 200800 + }, + { + "epoch": 1.0362654201557107, + "grad_norm": 25772.79296875, + "learning_rate": 3.0812099699412953e-06, + "loss": 0.4032, + "step": 200850 + }, + { + "epoch": 1.0365233901383235, + "grad_norm": 25614.240234375, + "learning_rate": 3.0685970667880425e-06, + "loss": 0.3976, + "step": 200900 + }, + { + "epoch": 1.0367813601209364, + "grad_norm": 26170.455078125, + "learning_rate": 3.056009214708905e-06, + "loss": 0.4001, + "step": 200950 + }, + { + "epoch": 1.0370393301035492, + "grad_norm": 24801.76171875, + "learning_rate": 3.0434464204230186e-06, + "loss": 0.3924, + "step": 201000 + }, + { + "epoch": 1.037297300086162, + "grad_norm": 28940.640625, + "learning_rate": 3.0309086906361917e-06, + "loss": 0.3998, + "step": 201050 + }, + { + "epoch": 1.037555270068775, + "grad_norm": 23856.90625, + "learning_rate": 3.018396032040788e-06, + "loss": 0.397, + "step": 201100 + }, + { + "epoch": 1.0378132400513875, + "grad_norm": 23309.861328125, + "learning_rate": 3.005908451315842e-06, + "loss": 0.4026, + "step": 201150 + }, + { + "epoch": 1.0380712100340004, + "grad_norm": 23592.7265625, + "learning_rate": 2.993445955126978e-06, + "loss": 0.3971, + "step": 201200 + }, + { + "epoch": 1.0383291800166132, + "grad_norm": 23301.861328125, + "learning_rate": 2.9810085501264296e-06, + "loss": 0.403, + "step": 201250 + }, + { + "epoch": 1.038587149999226, + "grad_norm": 23200.0859375, + "learning_rate": 2.968596242953059e-06, + "loss": 0.4001, + "step": 201300 + }, + { + "epoch": 1.038845119981839, + "grad_norm": 26894.70703125, + "learning_rate": 2.956209040232294e-06, + "loss": 0.3988, + "step": 201350 + }, + { + "epoch": 1.0391030899644518, + "grad_norm": 22423.931640625, + "learning_rate": 2.9438469485761956e-06, + "loss": 0.3981, + "step": 201400 + }, + { + "epoch": 1.0393610599470646, + "grad_norm": 24167.068359375, + "learning_rate": 2.9315099745834073e-06, + "loss": 0.4024, + "step": 201450 + }, + { + "epoch": 1.0396190299296775, + "grad_norm": 25832.712890625, + "learning_rate": 2.9191981248391677e-06, + "loss": 0.3937, + "step": 201500 + }, + { + "epoch": 1.03987699991229, + "grad_norm": 26923.005859375, + "learning_rate": 2.9069114059153024e-06, + "loss": 0.3922, + "step": 201550 + }, + { + "epoch": 1.040134969894903, + "grad_norm": 23295.380859375, + "learning_rate": 2.8946498243702158e-06, + "loss": 0.4011, + "step": 201600 + }, + { + "epoch": 1.0403929398775158, + "grad_norm": 23378.5234375, + "learning_rate": 2.882413386748922e-06, + "loss": 0.4033, + "step": 201650 + }, + { + "epoch": 1.0406509098601286, + "grad_norm": 24349.9140625, + "learning_rate": 2.8702020995829803e-06, + "loss": 0.3964, + "step": 201700 + }, + { + "epoch": 1.0409088798427415, + "grad_norm": 24178.61328125, + "learning_rate": 2.8580159693905485e-06, + "loss": 0.3978, + "step": 201750 + }, + { + "epoch": 1.0411668498253543, + "grad_norm": 24998.189453125, + "learning_rate": 2.8458550026763344e-06, + "loss": 0.3943, + "step": 201800 + }, + { + "epoch": 1.0414248198079672, + "grad_norm": 28928.828125, + "learning_rate": 2.8337192059316344e-06, + "loss": 0.3998, + "step": 201850 + }, + { + "epoch": 1.04168278979058, + "grad_norm": 24329.37890625, + "learning_rate": 2.8216085856342946e-06, + "loss": 0.3976, + "step": 201900 + }, + { + "epoch": 1.041940759773193, + "grad_norm": 24121.482421875, + "learning_rate": 2.809523148248744e-06, + "loss": 0.3952, + "step": 201950 + }, + { + "epoch": 1.0421987297558055, + "grad_norm": 23812.671875, + "learning_rate": 2.7974629002259443e-06, + "loss": 0.4052, + "step": 202000 + }, + { + "epoch": 1.0424566997384184, + "grad_norm": 25162.40234375, + "learning_rate": 2.785427848003419e-06, + "loss": 0.3948, + "step": 202050 + }, + { + "epoch": 1.0427146697210312, + "grad_norm": 23631.462890625, + "learning_rate": 2.773417998005262e-06, + "loss": 0.3982, + "step": 202100 + }, + { + "epoch": 1.042972639703644, + "grad_norm": 24178.177734375, + "learning_rate": 2.761433356642079e-06, + "loss": 0.4012, + "step": 202150 + }, + { + "epoch": 1.043230609686257, + "grad_norm": 24726.37890625, + "learning_rate": 2.7494739303110527e-06, + "loss": 0.3926, + "step": 202200 + }, + { + "epoch": 1.0434885796688698, + "grad_norm": 23798.73828125, + "learning_rate": 2.7375397253958935e-06, + "loss": 0.3998, + "step": 202250 + }, + { + "epoch": 1.0437465496514826, + "grad_norm": 25162.677734375, + "learning_rate": 2.725630748266844e-06, + "loss": 0.4038, + "step": 202300 + }, + { + "epoch": 1.0440045196340955, + "grad_norm": 28668.78515625, + "learning_rate": 2.7137470052806814e-06, + "loss": 0.3989, + "step": 202350 + }, + { + "epoch": 1.0442624896167083, + "grad_norm": 22550.810546875, + "learning_rate": 2.7018885027807195e-06, + "loss": 0.3994, + "step": 202400 + }, + { + "epoch": 1.044520459599321, + "grad_norm": 26758.71484375, + "learning_rate": 2.6900552470968064e-06, + "loss": 0.4063, + "step": 202450 + }, + { + "epoch": 1.0447784295819338, + "grad_norm": 24895.77734375, + "learning_rate": 2.678247244545301e-06, + "loss": 0.3968, + "step": 202500 + }, + { + "epoch": 1.0450363995645466, + "grad_norm": 22442.416015625, + "learning_rate": 2.6664645014290833e-06, + "loss": 0.4009, + "step": 202550 + }, + { + "epoch": 1.0452943695471595, + "grad_norm": 24647.232421875, + "learning_rate": 2.654707024037556e-06, + "loss": 0.3984, + "step": 202600 + }, + { + "epoch": 1.0455523395297723, + "grad_norm": 24156.189453125, + "learning_rate": 2.6429748186466265e-06, + "loss": 0.3983, + "step": 202650 + }, + { + "epoch": 1.0458103095123852, + "grad_norm": 24131.658203125, + "learning_rate": 2.6312678915187185e-06, + "loss": 0.3941, + "step": 202700 + }, + { + "epoch": 1.046068279494998, + "grad_norm": 24890.5625, + "learning_rate": 2.6195862489027833e-06, + "loss": 0.3936, + "step": 202750 + }, + { + "epoch": 1.0463262494776109, + "grad_norm": 26486.58203125, + "learning_rate": 2.607929897034228e-06, + "loss": 0.4073, + "step": 202800 + }, + { + "epoch": 1.0465842194602235, + "grad_norm": 24554.09375, + "learning_rate": 2.5962988421350033e-06, + "loss": 0.3985, + "step": 202850 + }, + { + "epoch": 1.0468421894428364, + "grad_norm": 24964.349609375, + "learning_rate": 2.584693090413537e-06, + "loss": 0.3974, + "step": 202900 + }, + { + "epoch": 1.0471001594254492, + "grad_norm": 21256.87890625, + "learning_rate": 2.5731126480647516e-06, + "loss": 0.3969, + "step": 202950 + }, + { + "epoch": 1.047358129408062, + "grad_norm": 23721.197265625, + "learning_rate": 2.5615575212700804e-06, + "loss": 0.4039, + "step": 203000 + }, + { + "epoch": 1.047616099390675, + "grad_norm": 25096.4609375, + "learning_rate": 2.550027716197395e-06, + "loss": 0.3953, + "step": 203050 + }, + { + "epoch": 1.0478740693732878, + "grad_norm": 22199.11328125, + "learning_rate": 2.5385232390011114e-06, + "loss": 0.3979, + "step": 203100 + }, + { + "epoch": 1.0481320393559006, + "grad_norm": 24967.4609375, + "learning_rate": 2.527044095822084e-06, + "loss": 0.4023, + "step": 203150 + }, + { + "epoch": 1.0483900093385135, + "grad_norm": 28301.302734375, + "learning_rate": 2.5155902927876564e-06, + "loss": 0.4047, + "step": 203200 + }, + { + "epoch": 1.0486479793211263, + "grad_norm": 22268.037109375, + "learning_rate": 2.504161836011648e-06, + "loss": 0.4032, + "step": 203250 + }, + { + "epoch": 1.048905949303739, + "grad_norm": 28254.658203125, + "learning_rate": 2.4927587315943414e-06, + "loss": 0.3915, + "step": 203300 + }, + { + "epoch": 1.0491639192863518, + "grad_norm": 24471.462890625, + "learning_rate": 2.4813809856225112e-06, + "loss": 0.3986, + "step": 203350 + }, + { + "epoch": 1.0494218892689646, + "grad_norm": 24208.7578125, + "learning_rate": 2.470028604169361e-06, + "loss": 0.3969, + "step": 203400 + }, + { + "epoch": 1.0496798592515775, + "grad_norm": 23962.025390625, + "learning_rate": 2.4587015932945824e-06, + "loss": 0.3992, + "step": 203450 + }, + { + "epoch": 1.0499378292341903, + "grad_norm": 24777.421875, + "learning_rate": 2.4473999590443054e-06, + "loss": 0.4042, + "step": 203500 + }, + { + "epoch": 1.0501957992168032, + "grad_norm": 26705.40234375, + "learning_rate": 2.4361237074511323e-06, + "loss": 0.3985, + "step": 203550 + }, + { + "epoch": 1.050453769199416, + "grad_norm": 22508.51171875, + "learning_rate": 2.424872844534093e-06, + "loss": 0.3967, + "step": 203600 + }, + { + "epoch": 1.0507117391820289, + "grad_norm": 24678.62109375, + "learning_rate": 2.4136473762987057e-06, + "loss": 0.4002, + "step": 203650 + }, + { + "epoch": 1.0509697091646415, + "grad_norm": 24190.259765625, + "learning_rate": 2.402447308736883e-06, + "loss": 0.4002, + "step": 203700 + }, + { + "epoch": 1.0512276791472543, + "grad_norm": 27986.912109375, + "learning_rate": 2.391272647827014e-06, + "loss": 0.406, + "step": 203750 + }, + { + "epoch": 1.0514856491298672, + "grad_norm": 23664.740234375, + "learning_rate": 2.3801233995339236e-06, + "loss": 0.3988, + "step": 203800 + }, + { + "epoch": 1.05174361911248, + "grad_norm": 32503.17578125, + "learning_rate": 2.368999569808844e-06, + "loss": 0.3996, + "step": 203850 + }, + { + "epoch": 1.052001589095093, + "grad_norm": 24140.591796875, + "learning_rate": 2.3579011645894933e-06, + "loss": 0.4021, + "step": 203900 + }, + { + "epoch": 1.0522595590777057, + "grad_norm": 24920.033203125, + "learning_rate": 2.3468281897999487e-06, + "loss": 0.4038, + "step": 203950 + }, + { + "epoch": 1.0525175290603186, + "grad_norm": 20836.1796875, + "learning_rate": 2.335780651350772e-06, + "loss": 0.3929, + "step": 204000 + }, + { + "epoch": 1.0527754990429314, + "grad_norm": 22305.021484375, + "learning_rate": 2.324758555138923e-06, + "loss": 0.3963, + "step": 204050 + }, + { + "epoch": 1.0530334690255443, + "grad_norm": 22536.13671875, + "learning_rate": 2.3137619070477788e-06, + "loss": 0.3923, + "step": 204100 + }, + { + "epoch": 1.053291439008157, + "grad_norm": 23319.326171875, + "learning_rate": 2.3027907129471395e-06, + "loss": 0.4034, + "step": 204150 + }, + { + "epoch": 1.0535494089907698, + "grad_norm": 25774.677734375, + "learning_rate": 2.2918449786932085e-06, + "loss": 0.4015, + "step": 204200 + }, + { + "epoch": 1.0538073789733826, + "grad_norm": 23130.119140625, + "learning_rate": 2.280924710128618e-06, + "loss": 0.3971, + "step": 204250 + }, + { + "epoch": 1.0540653489559955, + "grad_norm": 23122.1875, + "learning_rate": 2.270029913082394e-06, + "loss": 0.3969, + "step": 204300 + }, + { + "epoch": 1.0543233189386083, + "grad_norm": 21518.763671875, + "learning_rate": 2.2591605933699632e-06, + "loss": 0.3992, + "step": 204350 + }, + { + "epoch": 1.0545812889212212, + "grad_norm": 25077.322265625, + "learning_rate": 2.248316756793156e-06, + "loss": 0.405, + "step": 204400 + }, + { + "epoch": 1.054839258903834, + "grad_norm": 23907.869140625, + "learning_rate": 2.237498409140215e-06, + "loss": 0.4009, + "step": 204450 + }, + { + "epoch": 1.0550972288864469, + "grad_norm": 22796.865234375, + "learning_rate": 2.2267055561857484e-06, + "loss": 0.4044, + "step": 204500 + }, + { + "epoch": 1.0553551988690595, + "grad_norm": 33471.05859375, + "learning_rate": 2.2159382036907927e-06, + "loss": 0.4021, + "step": 204550 + }, + { + "epoch": 1.0556131688516723, + "grad_norm": 23975.6640625, + "learning_rate": 2.2051963574027225e-06, + "loss": 0.3922, + "step": 204600 + }, + { + "epoch": 1.0558711388342852, + "grad_norm": 24563.220703125, + "learning_rate": 2.194480023055351e-06, + "loss": 0.3952, + "step": 204650 + }, + { + "epoch": 1.056129108816898, + "grad_norm": 24479.20703125, + "learning_rate": 2.1837892063688525e-06, + "loss": 0.4005, + "step": 204700 + }, + { + "epoch": 1.0563870787995109, + "grad_norm": 24895.6640625, + "learning_rate": 2.173123913049757e-06, + "loss": 0.3985, + "step": 204750 + }, + { + "epoch": 1.0566450487821237, + "grad_norm": 25606.34765625, + "learning_rate": 2.1624841487910052e-06, + "loss": 0.4019, + "step": 204800 + }, + { + "epoch": 1.0569030187647366, + "grad_norm": 23026.8828125, + "learning_rate": 2.151869919271904e-06, + "loss": 0.4023, + "step": 204850 + }, + { + "epoch": 1.0571609887473494, + "grad_norm": 24365.9609375, + "learning_rate": 2.1412812301581097e-06, + "loss": 0.3992, + "step": 204900 + }, + { + "epoch": 1.0574189587299623, + "grad_norm": 25374.990234375, + "learning_rate": 2.130718087101663e-06, + "loss": 0.4009, + "step": 204950 + }, + { + "epoch": 1.057676928712575, + "grad_norm": 23697.388671875, + "learning_rate": 2.1201804957409697e-06, + "loss": 0.4042, + "step": 205000 + }, + { + "epoch": 1.057676928712575, + "eval_loss": 0.386392205953598, + "eval_runtime": 3213.2768, + "eval_samples_per_second": 965.096, + "eval_steps_per_second": 1.885, + "step": 205000 + } + ], + "logging_steps": 50, + "max_steps": 225000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 5000, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 5, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.734735392877381e+17, + "train_batch_size": 128, + "trial_name": null, + "trial_params": null +} diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/training_args.bin b/pretrain_glome_nano_model_tiny/checkpoint-205000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..afd49ab13e1adc210b7ee9755ab768f1bc6434dc --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c41aa9c6023a3a9650c2ca731b440abde601b316b41906bb1dab8748c3c13ed +size 5304 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/vocab.json b/pretrain_glome_nano_model_tiny/checkpoint-205000/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..54045330cccae0d703647b73183868a84aa6c91f --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/vocab.json @@ -0,0 +1 @@ +{"A":0,"R":1,"N":2,"D":3,"C":4,"Q":5,"E":6,"G":7,"H":8,"I":9,"L":10,"K":11,"M":12,"F":13,"P":14,"S":15,"T":16,"W":17,"Y":18,"V":19,"X":20,"B":21,"U":22,"Z":23,"O":24,".":25,"-":26,"":27,"":28,"":29,"":30,"":31} \ No newline at end of file diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/config.json b/pretrain_glome_nano_model_tiny/checkpoint-210000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4e7a54fdf8bdda8d2a7ac6356523b75cecb2eb5 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/config.json @@ -0,0 +1,44 @@ +{ + "architectures": [ + "GloMeModelForMaskedLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "bos_token_id": 28, + "classifier_activation": "gelu", + "classifier_bias": false, + "classifier_dropout": 0.1, + "classifier_pooling": "cls", + "cls_token_id": 28, + "compress_block_size": 16, + "compress_block_sliding_stride": 16, + "decoder_bias": true, + "dice_weight": 0.0, + "embedding_dropout": 0.1, + "eos_token_id": 29, + "hidden_activation": "gelu", + "hidden_size": 320, + "inner_rank": 32, + "intermediate_size": 1280, + "kv_heads": 10, + "mask_token_id": 31, + "mlp_bias": false, + "mlp_dropout": 0.1, + "model_size": "tiny", + "model_type": "glome", + "norm_bias": false, + "norm_eps": 1e-05, + "num_attention_heads": 20, + "num_hidden_layers": 6, + "num_selected_blocks": 8, + "num_slots": 64, + "pad_token_id": 30, + "reference_compile": null, + "selection_block_size": 16, + "sep_token_id": 29, + "sliding_window_size": 0, + "torch_dtype": "float32", + "transformers_version": "4.52.3", + "unk_token_id": 27, + "vocab_size": 36 +} diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/merges.txt b/pretrain_glome_nano_model_tiny/checkpoint-210000/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..5e7f1fd94996c8e2b65adea828af1b398eace61f --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/merges.txt @@ -0,0 +1 @@ +#version: 0.2 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/model.safetensors b/pretrain_glome_nano_model_tiny/checkpoint-210000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95a3b6a9bfde0c4d32af985aedcef72b13300f4d --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de68fc77e7b20dac383ec0e6c5c9f4baaeb2013fe9c78e7b390b0a225406fc89 +size 61429032 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/optimizer.pt b/pretrain_glome_nano_model_tiny/checkpoint-210000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5efd9997b0a989d6541a8a7732455055a7abf972 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:661961eea16b3e466cb1016be7687726369ef24c60264457890d1510ba8ebdd7 +size 122968954 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/rng_state.pth b/pretrain_glome_nano_model_tiny/checkpoint-210000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3808c33d3bf4e4535570f36326852699dbd68afe --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f6c94708df2e1ca974b8d47e998d435a2b275d0a6e954f260928cb8d4f7a245 +size 14244 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/scaler.pt b/pretrain_glome_nano_model_tiny/checkpoint-210000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363 +size 988 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/scheduler.pt b/pretrain_glome_nano_model_tiny/checkpoint-210000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cef1f7bc109f33247a08372c0cca5bef9791d7c7 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e9fd2bdd7b201e0f82b39359f0fb72a520ab1415a4d65a2db92b0caae70f33a +size 1064 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/special_tokens_map.json b/pretrain_glome_nano_model_tiny/checkpoint-210000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..14f7c9ed7b0bde6d23ee7b6a24ac2996789d1a0b --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/special_tokens_map.json @@ -0,0 +1,51 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/tokenizer.json b/pretrain_glome_nano_model_tiny/checkpoint-210000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..0ece9b8e6fa70a006c5c10c47e30c9cff4ff95f0 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/tokenizer.json @@ -0,0 +1,123 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 27, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 28, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 29, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 30, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 31, + "content": "", + "single_word": false, + "lstrip": true, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": true + }, + "post_processor": { + "type": "RobertaProcessing", + "sep": [ + "", + 29 + ], + "cls": [ + "", + 28 + ], + "trim_offsets": true, + "add_prefix_space": false + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": "", + "end_of_word_suffix": "", + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "A": 0, + "R": 1, + "N": 2, + "D": 3, + "C": 4, + "Q": 5, + "E": 6, + "G": 7, + "H": 8, + "I": 9, + "L": 10, + "K": 11, + "M": 12, + "F": 13, + "P": 14, + "S": 15, + "T": 16, + "W": 17, + "Y": 18, + "V": 19, + "X": 20, + "B": 21, + "U": 22, + "Z": 23, + "O": 24, + ".": 25, + "-": 26, + "": 27, + "": 28, + "": 29, + "": 30, + "": 31 + }, + "merges": [] + } +} \ No newline at end of file diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/tokenizer_config.json b/pretrain_glome_nano_model_tiny/checkpoint-210000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1c77f0533c6d3bd60b0a23b8adfacc351923d671 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/tokenizer_config.json @@ -0,0 +1,58 @@ +{ + "add_prefix_space": false, + "added_tokens_decoder": { + "27": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "28": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "29": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "30": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "31": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "errors": "replace", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "RobertaTokenizer", + "trim_offsets": true, + "unk_token": "" +} diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/trainer_state.json b/pretrain_glome_nano_model_tiny/checkpoint-210000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..07787f659992eebbebb199804ee4b405febb0930 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/trainer_state.json @@ -0,0 +1,29779 @@ +{ + "best_global_step": null, + "best_metric": 0.3863469064235687, + "best_model_checkpoint": null, + "epoch": 1.0834739269738574, + "eval_steps": 5000, + "global_step": 210000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002579699826128232, + "grad_norm": 314643.03125, + "learning_rate": 4.9e-07, + "loss": 3.7018, + "step": 50 + }, + { + "epoch": 0.0005159399652256464, + "grad_norm": 286448.65625, + "learning_rate": 9.9e-07, + "loss": 3.6146, + "step": 100 + }, + { + "epoch": 0.0007739099478384695, + "grad_norm": 214582.28125, + "learning_rate": 1.4900000000000001e-06, + "loss": 3.4562, + "step": 150 + }, + { + "epoch": 0.0010318799304512927, + "grad_norm": 137193.9375, + "learning_rate": 1.99e-06, + "loss": 3.2558, + "step": 200 + }, + { + "epoch": 0.0012898499130641159, + "grad_norm": 82222.84375, + "learning_rate": 2.49e-06, + "loss": 3.0641, + "step": 250 + }, + { + "epoch": 0.001547819895676939, + "grad_norm": 56772.109375, + "learning_rate": 2.99e-06, + "loss": 2.9123, + "step": 300 + }, + { + "epoch": 0.0018057898782897622, + "grad_norm": 46737.6796875, + "learning_rate": 3.49e-06, + "loss": 2.8131, + "step": 350 + }, + { + "epoch": 0.0020637598609025854, + "grad_norm": 38769.04296875, + "learning_rate": 3.99e-06, + "loss": 2.7526, + "step": 400 + }, + { + "epoch": 0.0023217298435154084, + "grad_norm": 34701.5546875, + "learning_rate": 4.49e-06, + "loss": 2.7178, + "step": 450 + }, + { + "epoch": 0.0025796998261282318, + "grad_norm": 30332.826171875, + "learning_rate": 4.9900000000000005e-06, + "loss": 2.6967, + "step": 500 + }, + { + "epoch": 0.0028376698087410547, + "grad_norm": 27192.7421875, + "learning_rate": 5.49e-06, + "loss": 2.6798, + "step": 550 + }, + { + "epoch": 0.003095639791353878, + "grad_norm": 36959.33984375, + "learning_rate": 5.99e-06, + "loss": 2.668, + "step": 600 + }, + { + "epoch": 0.003353609773966701, + "grad_norm": 30939.365234375, + "learning_rate": 6.4900000000000005e-06, + "loss": 2.6572, + "step": 650 + }, + { + "epoch": 0.0036115797565795245, + "grad_norm": 26976.78125, + "learning_rate": 6.990000000000001e-06, + "loss": 2.6397, + "step": 700 + }, + { + "epoch": 0.0038695497391923474, + "grad_norm": 32308.447265625, + "learning_rate": 7.4899999999999994e-06, + "loss": 2.6021, + "step": 750 + }, + { + "epoch": 0.004127519721805171, + "grad_norm": 33838.8046875, + "learning_rate": 7.99e-06, + "loss": 2.5058, + "step": 800 + }, + { + "epoch": 0.004385489704417994, + "grad_norm": 49298.84765625, + "learning_rate": 8.49e-06, + "loss": 2.4095, + "step": 850 + }, + { + "epoch": 0.004643459687030817, + "grad_norm": 36241.56640625, + "learning_rate": 8.99e-06, + "loss": 2.2961, + "step": 900 + }, + { + "epoch": 0.004901429669643641, + "grad_norm": 37884.82421875, + "learning_rate": 9.49e-06, + "loss": 2.1526, + "step": 950 + }, + { + "epoch": 0.0051593996522564635, + "grad_norm": 36827.66796875, + "learning_rate": 9.990000000000001e-06, + "loss": 2.0338, + "step": 1000 + }, + { + "epoch": 0.0054173696348692865, + "grad_norm": 41398.421875, + "learning_rate": 1.049e-05, + "loss": 1.939, + "step": 1050 + }, + { + "epoch": 0.0056753396174821094, + "grad_norm": 42648.38671875, + "learning_rate": 1.099e-05, + "loss": 1.8325, + "step": 1100 + }, + { + "epoch": 0.005933309600094933, + "grad_norm": 40172.9453125, + "learning_rate": 1.149e-05, + "loss": 1.7643, + "step": 1150 + }, + { + "epoch": 0.006191279582707756, + "grad_norm": 35860.8515625, + "learning_rate": 1.199e-05, + "loss": 1.6742, + "step": 1200 + }, + { + "epoch": 0.006449249565320579, + "grad_norm": 44456.93359375, + "learning_rate": 1.249e-05, + "loss": 1.6026, + "step": 1250 + }, + { + "epoch": 0.006707219547933402, + "grad_norm": 36839.08984375, + "learning_rate": 1.299e-05, + "loss": 1.521, + "step": 1300 + }, + { + "epoch": 0.006965189530546226, + "grad_norm": 44026.68359375, + "learning_rate": 1.349e-05, + "loss": 1.4436, + "step": 1350 + }, + { + "epoch": 0.007223159513159049, + "grad_norm": 35557.578125, + "learning_rate": 1.399e-05, + "loss": 1.3773, + "step": 1400 + }, + { + "epoch": 0.007481129495771872, + "grad_norm": 38767.60546875, + "learning_rate": 1.449e-05, + "loss": 1.3023, + "step": 1450 + }, + { + "epoch": 0.007739099478384695, + "grad_norm": 36654.796875, + "learning_rate": 1.499e-05, + "loss": 1.2627, + "step": 1500 + }, + { + "epoch": 0.007997069460997519, + "grad_norm": 41690.328125, + "learning_rate": 1.5490000000000002e-05, + "loss": 1.2063, + "step": 1550 + }, + { + "epoch": 0.008255039443610342, + "grad_norm": 38743.59375, + "learning_rate": 1.599e-05, + "loss": 1.1626, + "step": 1600 + }, + { + "epoch": 0.008513009426223165, + "grad_norm": 41839.7890625, + "learning_rate": 1.649e-05, + "loss": 1.1225, + "step": 1650 + }, + { + "epoch": 0.008770979408835988, + "grad_norm": 42897.0703125, + "learning_rate": 1.699e-05, + "loss": 1.0864, + "step": 1700 + }, + { + "epoch": 0.00902894939144881, + "grad_norm": 37412.30859375, + "learning_rate": 1.749e-05, + "loss": 1.0613, + "step": 1750 + }, + { + "epoch": 0.009286919374061633, + "grad_norm": 37235.484375, + "learning_rate": 1.7990000000000002e-05, + "loss": 1.0354, + "step": 1800 + }, + { + "epoch": 0.009544889356674458, + "grad_norm": 39117.6328125, + "learning_rate": 1.849e-05, + "loss": 1.0059, + "step": 1850 + }, + { + "epoch": 0.009802859339287281, + "grad_norm": 37297.6875, + "learning_rate": 1.8990000000000003e-05, + "loss": 0.9795, + "step": 1900 + }, + { + "epoch": 0.010060829321900104, + "grad_norm": 33772.24609375, + "learning_rate": 1.949e-05, + "loss": 0.9639, + "step": 1950 + }, + { + "epoch": 0.010318799304512927, + "grad_norm": 39775.046875, + "learning_rate": 1.999e-05, + "loss": 0.9386, + "step": 2000 + }, + { + "epoch": 0.01057676928712575, + "grad_norm": 38412.2109375, + "learning_rate": 2.0490000000000002e-05, + "loss": 0.9212, + "step": 2050 + }, + { + "epoch": 0.010834739269738573, + "grad_norm": 39548.98046875, + "learning_rate": 2.099e-05, + "loss": 0.9112, + "step": 2100 + }, + { + "epoch": 0.011092709252351396, + "grad_norm": 38127.77734375, + "learning_rate": 2.1490000000000003e-05, + "loss": 0.8866, + "step": 2150 + }, + { + "epoch": 0.011350679234964219, + "grad_norm": 39877.0390625, + "learning_rate": 2.199e-05, + "loss": 0.8806, + "step": 2200 + }, + { + "epoch": 0.011608649217577044, + "grad_norm": 34642.28515625, + "learning_rate": 2.249e-05, + "loss": 0.8645, + "step": 2250 + }, + { + "epoch": 0.011866619200189867, + "grad_norm": 38508.0078125, + "learning_rate": 2.2990000000000002e-05, + "loss": 0.8609, + "step": 2300 + }, + { + "epoch": 0.01212458918280269, + "grad_norm": 33287.765625, + "learning_rate": 2.349e-05, + "loss": 0.8443, + "step": 2350 + }, + { + "epoch": 0.012382559165415512, + "grad_norm": 35477.5546875, + "learning_rate": 2.3990000000000002e-05, + "loss": 0.839, + "step": 2400 + }, + { + "epoch": 0.012640529148028335, + "grad_norm": 32204.408203125, + "learning_rate": 2.449e-05, + "loss": 0.8204, + "step": 2450 + }, + { + "epoch": 0.012898499130641158, + "grad_norm": 35113.59765625, + "learning_rate": 2.4990000000000003e-05, + "loss": 0.8214, + "step": 2500 + }, + { + "epoch": 0.013156469113253981, + "grad_norm": 36591.2421875, + "learning_rate": 2.549e-05, + "loss": 0.8066, + "step": 2550 + }, + { + "epoch": 0.013414439095866804, + "grad_norm": 37926.3125, + "learning_rate": 2.5990000000000004e-05, + "loss": 0.7993, + "step": 2600 + }, + { + "epoch": 0.013672409078479627, + "grad_norm": 35413.01171875, + "learning_rate": 2.6490000000000002e-05, + "loss": 0.8012, + "step": 2650 + }, + { + "epoch": 0.013930379061092452, + "grad_norm": 33275.1796875, + "learning_rate": 2.6989999999999997e-05, + "loss": 0.7879, + "step": 2700 + }, + { + "epoch": 0.014188349043705275, + "grad_norm": 35463.87109375, + "learning_rate": 2.749e-05, + "loss": 0.7808, + "step": 2750 + }, + { + "epoch": 0.014446319026318098, + "grad_norm": 33143.234375, + "learning_rate": 2.7989999999999998e-05, + "loss": 0.7813, + "step": 2800 + }, + { + "epoch": 0.01470428900893092, + "grad_norm": 32908.71484375, + "learning_rate": 2.849e-05, + "loss": 0.7725, + "step": 2850 + }, + { + "epoch": 0.014962258991543744, + "grad_norm": 36443.578125, + "learning_rate": 2.8990000000000002e-05, + "loss": 0.761, + "step": 2900 + }, + { + "epoch": 0.015220228974156567, + "grad_norm": 32331.728515625, + "learning_rate": 2.949e-05, + "loss": 0.7588, + "step": 2950 + }, + { + "epoch": 0.01547819895676939, + "grad_norm": 33401.546875, + "learning_rate": 2.9990000000000003e-05, + "loss": 0.7462, + "step": 3000 + }, + { + "epoch": 0.015736168939382213, + "grad_norm": 32041.26171875, + "learning_rate": 3.049e-05, + "loss": 0.7449, + "step": 3050 + }, + { + "epoch": 0.015994138921995037, + "grad_norm": 32035.814453125, + "learning_rate": 3.099e-05, + "loss": 0.7373, + "step": 3100 + }, + { + "epoch": 0.01625210890460786, + "grad_norm": 31430.421875, + "learning_rate": 3.1490000000000005e-05, + "loss": 0.7371, + "step": 3150 + }, + { + "epoch": 0.016510078887220683, + "grad_norm": 30911.267578125, + "learning_rate": 3.1990000000000004e-05, + "loss": 0.7315, + "step": 3200 + }, + { + "epoch": 0.016768048869833505, + "grad_norm": 31906.193359375, + "learning_rate": 3.249e-05, + "loss": 0.7405, + "step": 3250 + }, + { + "epoch": 0.01702601885244633, + "grad_norm": 30320.1640625, + "learning_rate": 3.299e-05, + "loss": 0.7323, + "step": 3300 + }, + { + "epoch": 0.017283988835059154, + "grad_norm": 32357.072265625, + "learning_rate": 3.349e-05, + "loss": 0.7244, + "step": 3350 + }, + { + "epoch": 0.017541958817671975, + "grad_norm": 34023.2109375, + "learning_rate": 3.399e-05, + "loss": 0.7214, + "step": 3400 + }, + { + "epoch": 0.0177999288002848, + "grad_norm": 33940.8046875, + "learning_rate": 3.449e-05, + "loss": 0.7158, + "step": 3450 + }, + { + "epoch": 0.01805789878289762, + "grad_norm": 31701.14453125, + "learning_rate": 3.499e-05, + "loss": 0.7102, + "step": 3500 + }, + { + "epoch": 0.018315868765510446, + "grad_norm": 32291.861328125, + "learning_rate": 3.549e-05, + "loss": 0.7104, + "step": 3550 + }, + { + "epoch": 0.018573838748123267, + "grad_norm": 28074.177734375, + "learning_rate": 3.599e-05, + "loss": 0.7001, + "step": 3600 + }, + { + "epoch": 0.01883180873073609, + "grad_norm": 29823.787109375, + "learning_rate": 3.6490000000000005e-05, + "loss": 0.7029, + "step": 3650 + }, + { + "epoch": 0.019089778713348916, + "grad_norm": 29792.24609375, + "learning_rate": 3.699e-05, + "loss": 0.6949, + "step": 3700 + }, + { + "epoch": 0.019347748695961738, + "grad_norm": 31345.296875, + "learning_rate": 3.749e-05, + "loss": 0.6989, + "step": 3750 + }, + { + "epoch": 0.019605718678574562, + "grad_norm": 33923.0625, + "learning_rate": 3.799e-05, + "loss": 0.6984, + "step": 3800 + }, + { + "epoch": 0.019863688661187383, + "grad_norm": 30762.97265625, + "learning_rate": 3.8490000000000006e-05, + "loss": 0.6931, + "step": 3850 + }, + { + "epoch": 0.020121658643800208, + "grad_norm": 30794.13671875, + "learning_rate": 3.8990000000000004e-05, + "loss": 0.6923, + "step": 3900 + }, + { + "epoch": 0.02037962862641303, + "grad_norm": 29854.923828125, + "learning_rate": 3.9489999999999996e-05, + "loss": 0.6895, + "step": 3950 + }, + { + "epoch": 0.020637598609025854, + "grad_norm": 27336.958984375, + "learning_rate": 3.999e-05, + "loss": 0.6853, + "step": 4000 + }, + { + "epoch": 0.020895568591638675, + "grad_norm": 31836.81640625, + "learning_rate": 4.049e-05, + "loss": 0.6821, + "step": 4050 + }, + { + "epoch": 0.0211535385742515, + "grad_norm": 28508.548828125, + "learning_rate": 4.099e-05, + "loss": 0.6857, + "step": 4100 + }, + { + "epoch": 0.021411508556864325, + "grad_norm": 30309.2421875, + "learning_rate": 4.1490000000000004e-05, + "loss": 0.6791, + "step": 4150 + }, + { + "epoch": 0.021669478539477146, + "grad_norm": 31035.0703125, + "learning_rate": 4.199e-05, + "loss": 0.6762, + "step": 4200 + }, + { + "epoch": 0.02192744852208997, + "grad_norm": 30893.951171875, + "learning_rate": 4.249e-05, + "loss": 0.6739, + "step": 4250 + }, + { + "epoch": 0.022185418504702792, + "grad_norm": 28317.12890625, + "learning_rate": 4.299e-05, + "loss": 0.6635, + "step": 4300 + }, + { + "epoch": 0.022443388487315617, + "grad_norm": 27140.29296875, + "learning_rate": 4.3490000000000005e-05, + "loss": 0.6694, + "step": 4350 + }, + { + "epoch": 0.022701358469928438, + "grad_norm": 27948.32421875, + "learning_rate": 4.3990000000000004e-05, + "loss": 0.6667, + "step": 4400 + }, + { + "epoch": 0.022959328452541262, + "grad_norm": 27243.44140625, + "learning_rate": 4.449e-05, + "loss": 0.6689, + "step": 4450 + }, + { + "epoch": 0.023217298435154087, + "grad_norm": 29163.98828125, + "learning_rate": 4.499e-05, + "loss": 0.6639, + "step": 4500 + }, + { + "epoch": 0.02347526841776691, + "grad_norm": 27801.79296875, + "learning_rate": 4.549000000000001e-05, + "loss": 0.6612, + "step": 4550 + }, + { + "epoch": 0.023733238400379733, + "grad_norm": 28201.7265625, + "learning_rate": 4.599e-05, + "loss": 0.6608, + "step": 4600 + }, + { + "epoch": 0.023991208382992554, + "grad_norm": 28875.06640625, + "learning_rate": 4.649e-05, + "loss": 0.6642, + "step": 4650 + }, + { + "epoch": 0.02424917836560538, + "grad_norm": 25467.376953125, + "learning_rate": 4.699e-05, + "loss": 0.6513, + "step": 4700 + }, + { + "epoch": 0.0245071483482182, + "grad_norm": 27359.97265625, + "learning_rate": 4.749e-05, + "loss": 0.6554, + "step": 4750 + }, + { + "epoch": 0.024765118330831025, + "grad_norm": 30614.15234375, + "learning_rate": 4.799e-05, + "loss": 0.6574, + "step": 4800 + }, + { + "epoch": 0.025023088313443846, + "grad_norm": 29069.677734375, + "learning_rate": 4.8490000000000005e-05, + "loss": 0.6562, + "step": 4850 + }, + { + "epoch": 0.02528105829605667, + "grad_norm": 27337.37109375, + "learning_rate": 4.8990000000000004e-05, + "loss": 0.6507, + "step": 4900 + }, + { + "epoch": 0.025539028278669496, + "grad_norm": 26784.7265625, + "learning_rate": 4.949e-05, + "loss": 0.64, + "step": 4950 + }, + { + "epoch": 0.025796998261282317, + "grad_norm": 27480.509765625, + "learning_rate": 4.999e-05, + "loss": 0.6515, + "step": 5000 + }, + { + "epoch": 0.025796998261282317, + "eval_loss": 0.6312834024429321, + "eval_runtime": 3280.995, + "eval_samples_per_second": 945.177, + "eval_steps_per_second": 1.846, + "step": 5000 + }, + { + "epoch": 0.02605496824389514, + "grad_norm": 27871.740234375, + "learning_rate": 5.0490000000000006e-05, + "loss": 0.6424, + "step": 5050 + }, + { + "epoch": 0.026312938226507963, + "grad_norm": 31187.00390625, + "learning_rate": 5.0990000000000005e-05, + "loss": 0.643, + "step": 5100 + }, + { + "epoch": 0.026570908209120787, + "grad_norm": 25956.521484375, + "learning_rate": 5.149e-05, + "loss": 0.65, + "step": 5150 + }, + { + "epoch": 0.02682887819173361, + "grad_norm": 25967.70703125, + "learning_rate": 5.199000000000001e-05, + "loss": 0.6466, + "step": 5200 + }, + { + "epoch": 0.027086848174346433, + "grad_norm": 25310.275390625, + "learning_rate": 5.249000000000001e-05, + "loss": 0.6429, + "step": 5250 + }, + { + "epoch": 0.027344818156959255, + "grad_norm": 24740.033203125, + "learning_rate": 5.2990000000000006e-05, + "loss": 0.6415, + "step": 5300 + }, + { + "epoch": 0.02760278813957208, + "grad_norm": 30795.58984375, + "learning_rate": 5.3490000000000005e-05, + "loss": 0.6424, + "step": 5350 + }, + { + "epoch": 0.027860758122184904, + "grad_norm": 30625.59375, + "learning_rate": 5.399000000000001e-05, + "loss": 0.6361, + "step": 5400 + }, + { + "epoch": 0.028118728104797725, + "grad_norm": 27036.14453125, + "learning_rate": 5.449000000000001e-05, + "loss": 0.6351, + "step": 5450 + }, + { + "epoch": 0.02837669808741055, + "grad_norm": 26934.447265625, + "learning_rate": 5.499000000000001e-05, + "loss": 0.6304, + "step": 5500 + }, + { + "epoch": 0.02863466807002337, + "grad_norm": 25540.291015625, + "learning_rate": 5.549e-05, + "loss": 0.6304, + "step": 5550 + }, + { + "epoch": 0.028892638052636196, + "grad_norm": 26574.9375, + "learning_rate": 5.599e-05, + "loss": 0.6444, + "step": 5600 + }, + { + "epoch": 0.029150608035249017, + "grad_norm": 26941.955078125, + "learning_rate": 5.6489999999999996e-05, + "loss": 0.6373, + "step": 5650 + }, + { + "epoch": 0.02940857801786184, + "grad_norm": 26957.7734375, + "learning_rate": 5.699e-05, + "loss": 0.6363, + "step": 5700 + }, + { + "epoch": 0.029666548000474666, + "grad_norm": 24377.55859375, + "learning_rate": 5.749e-05, + "loss": 0.6213, + "step": 5750 + }, + { + "epoch": 0.029924517983087488, + "grad_norm": 25600.697265625, + "learning_rate": 5.799e-05, + "loss": 0.6362, + "step": 5800 + }, + { + "epoch": 0.030182487965700312, + "grad_norm": 23841.47265625, + "learning_rate": 5.849e-05, + "loss": 0.6274, + "step": 5850 + }, + { + "epoch": 0.030440457948313134, + "grad_norm": 23847.73046875, + "learning_rate": 5.899e-05, + "loss": 0.624, + "step": 5900 + }, + { + "epoch": 0.030698427930925958, + "grad_norm": 25549.033203125, + "learning_rate": 5.949e-05, + "loss": 0.627, + "step": 5950 + }, + { + "epoch": 0.03095639791353878, + "grad_norm": 25286.8046875, + "learning_rate": 5.999e-05, + "loss": 0.6272, + "step": 6000 + }, + { + "epoch": 0.031214367896151604, + "grad_norm": 25137.384765625, + "learning_rate": 6.0490000000000005e-05, + "loss": 0.622, + "step": 6050 + }, + { + "epoch": 0.031472337878764425, + "grad_norm": 23606.23828125, + "learning_rate": 6.0990000000000004e-05, + "loss": 0.6262, + "step": 6100 + }, + { + "epoch": 0.031730307861377254, + "grad_norm": 32101.404296875, + "learning_rate": 6.149000000000001e-05, + "loss": 0.619, + "step": 6150 + }, + { + "epoch": 0.031988277843990075, + "grad_norm": 23683.73046875, + "learning_rate": 6.199000000000001e-05, + "loss": 0.6129, + "step": 6200 + }, + { + "epoch": 0.032246247826602896, + "grad_norm": 25243.49609375, + "learning_rate": 6.249e-05, + "loss": 0.6194, + "step": 6250 + }, + { + "epoch": 0.03250421780921572, + "grad_norm": 28690.10546875, + "learning_rate": 6.299e-05, + "loss": 0.6199, + "step": 6300 + }, + { + "epoch": 0.032762187791828545, + "grad_norm": 24198.47265625, + "learning_rate": 6.349e-05, + "loss": 0.6077, + "step": 6350 + }, + { + "epoch": 0.03302015777444137, + "grad_norm": 24742.998046875, + "learning_rate": 6.399e-05, + "loss": 0.6168, + "step": 6400 + }, + { + "epoch": 0.03327812775705419, + "grad_norm": 27489.93359375, + "learning_rate": 6.449e-05, + "loss": 0.6136, + "step": 6450 + }, + { + "epoch": 0.03353609773966701, + "grad_norm": 28733.7265625, + "learning_rate": 6.499000000000001e-05, + "loss": 0.6184, + "step": 6500 + }, + { + "epoch": 0.03379406772227984, + "grad_norm": 23810.544921875, + "learning_rate": 6.549000000000001e-05, + "loss": 0.6167, + "step": 6550 + }, + { + "epoch": 0.03405203770489266, + "grad_norm": 25503.98828125, + "learning_rate": 6.599000000000001e-05, + "loss": 0.6184, + "step": 6600 + }, + { + "epoch": 0.03431000768750548, + "grad_norm": 24550.26171875, + "learning_rate": 6.649000000000001e-05, + "loss": 0.6146, + "step": 6650 + }, + { + "epoch": 0.03456797767011831, + "grad_norm": 22774.71875, + "learning_rate": 6.699000000000001e-05, + "loss": 0.6132, + "step": 6700 + }, + { + "epoch": 0.03482594765273113, + "grad_norm": 23878.90625, + "learning_rate": 6.749e-05, + "loss": 0.6127, + "step": 6750 + }, + { + "epoch": 0.03508391763534395, + "grad_norm": 28744.9921875, + "learning_rate": 6.799e-05, + "loss": 0.6203, + "step": 6800 + }, + { + "epoch": 0.03534188761795677, + "grad_norm": 24239.826171875, + "learning_rate": 6.849e-05, + "loss": 0.6069, + "step": 6850 + }, + { + "epoch": 0.0355998576005696, + "grad_norm": 27030.513671875, + "learning_rate": 6.899e-05, + "loss": 0.614, + "step": 6900 + }, + { + "epoch": 0.03585782758318242, + "grad_norm": 22872.59375, + "learning_rate": 6.949e-05, + "loss": 0.6068, + "step": 6950 + }, + { + "epoch": 0.03611579756579524, + "grad_norm": 23280.333984375, + "learning_rate": 6.999e-05, + "loss": 0.6064, + "step": 7000 + }, + { + "epoch": 0.03637376754840807, + "grad_norm": 24819.060546875, + "learning_rate": 7.049e-05, + "loss": 0.606, + "step": 7050 + }, + { + "epoch": 0.03663173753102089, + "grad_norm": 23739.595703125, + "learning_rate": 7.099e-05, + "loss": 0.6065, + "step": 7100 + }, + { + "epoch": 0.03688970751363371, + "grad_norm": 24261.28515625, + "learning_rate": 7.149e-05, + "loss": 0.6037, + "step": 7150 + }, + { + "epoch": 0.037147677496246534, + "grad_norm": 24133.744140625, + "learning_rate": 7.199000000000001e-05, + "loss": 0.6097, + "step": 7200 + }, + { + "epoch": 0.03740564747885936, + "grad_norm": 22903.197265625, + "learning_rate": 7.249e-05, + "loss": 0.6048, + "step": 7250 + }, + { + "epoch": 0.03766361746147218, + "grad_norm": 23503.970703125, + "learning_rate": 7.299e-05, + "loss": 0.6039, + "step": 7300 + }, + { + "epoch": 0.037921587444085005, + "grad_norm": 20935.388671875, + "learning_rate": 7.349e-05, + "loss": 0.6016, + "step": 7350 + }, + { + "epoch": 0.03817955742669783, + "grad_norm": 22991.720703125, + "learning_rate": 7.399e-05, + "loss": 0.6111, + "step": 7400 + }, + { + "epoch": 0.038437527409310654, + "grad_norm": 21915.90234375, + "learning_rate": 7.449e-05, + "loss": 0.5969, + "step": 7450 + }, + { + "epoch": 0.038695497391923475, + "grad_norm": 22474.25390625, + "learning_rate": 7.499e-05, + "loss": 0.6068, + "step": 7500 + }, + { + "epoch": 0.038953467374536296, + "grad_norm": 24122.150390625, + "learning_rate": 7.549000000000001e-05, + "loss": 0.6037, + "step": 7550 + }, + { + "epoch": 0.039211437357149125, + "grad_norm": 22262.220703125, + "learning_rate": 7.599000000000001e-05, + "loss": 0.5946, + "step": 7600 + }, + { + "epoch": 0.039469407339761946, + "grad_norm": 23959.7265625, + "learning_rate": 7.649000000000001e-05, + "loss": 0.598, + "step": 7650 + }, + { + "epoch": 0.03972737732237477, + "grad_norm": 21918.5859375, + "learning_rate": 7.699e-05, + "loss": 0.5959, + "step": 7700 + }, + { + "epoch": 0.03998534730498759, + "grad_norm": 23740.5390625, + "learning_rate": 7.749e-05, + "loss": 0.594, + "step": 7750 + }, + { + "epoch": 0.040243317287600416, + "grad_norm": 23406.4296875, + "learning_rate": 7.799e-05, + "loss": 0.6048, + "step": 7800 + }, + { + "epoch": 0.04050128727021324, + "grad_norm": 23423.201171875, + "learning_rate": 7.849e-05, + "loss": 0.5944, + "step": 7850 + }, + { + "epoch": 0.04075925725282606, + "grad_norm": 23187.76171875, + "learning_rate": 7.899000000000001e-05, + "loss": 0.5944, + "step": 7900 + }, + { + "epoch": 0.04101722723543889, + "grad_norm": 25532.4375, + "learning_rate": 7.949000000000001e-05, + "loss": 0.5978, + "step": 7950 + }, + { + "epoch": 0.04127519721805171, + "grad_norm": 23045.28515625, + "learning_rate": 7.999000000000001e-05, + "loss": 0.5968, + "step": 8000 + }, + { + "epoch": 0.04153316720066453, + "grad_norm": 22853.826171875, + "learning_rate": 8.049e-05, + "loss": 0.5915, + "step": 8050 + }, + { + "epoch": 0.04179113718327735, + "grad_norm": 21853.658203125, + "learning_rate": 8.099e-05, + "loss": 0.5932, + "step": 8100 + }, + { + "epoch": 0.04204910716589018, + "grad_norm": 22395.74609375, + "learning_rate": 8.149e-05, + "loss": 0.5925, + "step": 8150 + }, + { + "epoch": 0.042307077148503, + "grad_norm": 23933.40625, + "learning_rate": 8.199e-05, + "loss": 0.5878, + "step": 8200 + }, + { + "epoch": 0.04256504713111582, + "grad_norm": 21773.087890625, + "learning_rate": 8.249e-05, + "loss": 0.5916, + "step": 8250 + }, + { + "epoch": 0.04282301711372865, + "grad_norm": 22665.11328125, + "learning_rate": 8.299e-05, + "loss": 0.5906, + "step": 8300 + }, + { + "epoch": 0.04308098709634147, + "grad_norm": 22157.091796875, + "learning_rate": 8.349e-05, + "loss": 0.5873, + "step": 8350 + }, + { + "epoch": 0.04333895707895429, + "grad_norm": 21506.8125, + "learning_rate": 8.399e-05, + "loss": 0.5927, + "step": 8400 + }, + { + "epoch": 0.04359692706156711, + "grad_norm": 22143.341796875, + "learning_rate": 8.449e-05, + "loss": 0.5828, + "step": 8450 + }, + { + "epoch": 0.04385489704417994, + "grad_norm": 23341.23828125, + "learning_rate": 8.499e-05, + "loss": 0.5885, + "step": 8500 + }, + { + "epoch": 0.04411286702679276, + "grad_norm": 21876.96484375, + "learning_rate": 8.549000000000001e-05, + "loss": 0.5913, + "step": 8550 + }, + { + "epoch": 0.044370837009405584, + "grad_norm": 22307.29296875, + "learning_rate": 8.599000000000001e-05, + "loss": 0.583, + "step": 8600 + }, + { + "epoch": 0.04462880699201841, + "grad_norm": 22859.017578125, + "learning_rate": 8.649000000000001e-05, + "loss": 0.5889, + "step": 8650 + }, + { + "epoch": 0.04488677697463123, + "grad_norm": 22058.24609375, + "learning_rate": 8.699e-05, + "loss": 0.5848, + "step": 8700 + }, + { + "epoch": 0.045144746957244054, + "grad_norm": 22116.837890625, + "learning_rate": 8.749e-05, + "loss": 0.5858, + "step": 8750 + }, + { + "epoch": 0.045402716939856876, + "grad_norm": 23110.17578125, + "learning_rate": 8.799e-05, + "loss": 0.5855, + "step": 8800 + }, + { + "epoch": 0.045660686922469704, + "grad_norm": 24173.064453125, + "learning_rate": 8.849e-05, + "loss": 0.5878, + "step": 8850 + }, + { + "epoch": 0.045918656905082525, + "grad_norm": 21521.48046875, + "learning_rate": 8.899e-05, + "loss": 0.5914, + "step": 8900 + }, + { + "epoch": 0.046176626887695346, + "grad_norm": 24516.0, + "learning_rate": 8.949000000000001e-05, + "loss": 0.5849, + "step": 8950 + }, + { + "epoch": 0.046434596870308174, + "grad_norm": 22074.9609375, + "learning_rate": 8.999000000000001e-05, + "loss": 0.5848, + "step": 9000 + }, + { + "epoch": 0.046692566852920996, + "grad_norm": 21495.4140625, + "learning_rate": 9.049000000000001e-05, + "loss": 0.579, + "step": 9050 + }, + { + "epoch": 0.04695053683553382, + "grad_norm": 23548.224609375, + "learning_rate": 9.099000000000001e-05, + "loss": 0.5826, + "step": 9100 + }, + { + "epoch": 0.04720850681814664, + "grad_norm": 22144.51953125, + "learning_rate": 9.149e-05, + "loss": 0.5879, + "step": 9150 + }, + { + "epoch": 0.047466476800759466, + "grad_norm": 20656.185546875, + "learning_rate": 9.199e-05, + "loss": 0.5806, + "step": 9200 + }, + { + "epoch": 0.04772444678337229, + "grad_norm": 21228.814453125, + "learning_rate": 9.249e-05, + "loss": 0.5858, + "step": 9250 + }, + { + "epoch": 0.04798241676598511, + "grad_norm": 20801.869140625, + "learning_rate": 9.299e-05, + "loss": 0.5816, + "step": 9300 + }, + { + "epoch": 0.04824038674859793, + "grad_norm": 24044.283203125, + "learning_rate": 9.349e-05, + "loss": 0.5811, + "step": 9350 + }, + { + "epoch": 0.04849835673121076, + "grad_norm": 22395.47265625, + "learning_rate": 9.399e-05, + "loss": 0.5782, + "step": 9400 + }, + { + "epoch": 0.04875632671382358, + "grad_norm": 22353.078125, + "learning_rate": 9.449e-05, + "loss": 0.5758, + "step": 9450 + }, + { + "epoch": 0.0490142966964364, + "grad_norm": 22520.72265625, + "learning_rate": 9.499e-05, + "loss": 0.5752, + "step": 9500 + }, + { + "epoch": 0.04927226667904923, + "grad_norm": 22016.951171875, + "learning_rate": 9.549e-05, + "loss": 0.5764, + "step": 9550 + }, + { + "epoch": 0.04953023666166205, + "grad_norm": 20046.615234375, + "learning_rate": 9.599000000000001e-05, + "loss": 0.5759, + "step": 9600 + }, + { + "epoch": 0.04978820664427487, + "grad_norm": 21346.029296875, + "learning_rate": 9.649e-05, + "loss": 0.5798, + "step": 9650 + }, + { + "epoch": 0.05004617662688769, + "grad_norm": 22449.796875, + "learning_rate": 9.699e-05, + "loss": 0.5829, + "step": 9700 + }, + { + "epoch": 0.05030414660950052, + "grad_norm": 20538.751953125, + "learning_rate": 9.749e-05, + "loss": 0.5809, + "step": 9750 + }, + { + "epoch": 0.05056211659211334, + "grad_norm": 21123.19921875, + "learning_rate": 9.799e-05, + "loss": 0.5726, + "step": 9800 + }, + { + "epoch": 0.05082008657472616, + "grad_norm": 20853.08203125, + "learning_rate": 9.849e-05, + "loss": 0.5726, + "step": 9850 + }, + { + "epoch": 0.05107805655733899, + "grad_norm": 22160.841796875, + "learning_rate": 9.899e-05, + "loss": 0.5783, + "step": 9900 + }, + { + "epoch": 0.05133602653995181, + "grad_norm": 19711.109375, + "learning_rate": 9.949000000000001e-05, + "loss": 0.5722, + "step": 9950 + }, + { + "epoch": 0.051593996522564634, + "grad_norm": 21442.310546875, + "learning_rate": 9.999000000000001e-05, + "loss": 0.5773, + "step": 10000 + }, + { + "epoch": 0.051593996522564634, + "eval_loss": 0.5661358833312988, + "eval_runtime": 3272.6524, + "eval_samples_per_second": 947.586, + "eval_steps_per_second": 1.851, + "step": 10000 + }, + { + "epoch": 0.051851966505177455, + "grad_norm": 21442.943359375, + "learning_rate": 9.999998718392692e-05, + "loss": 0.5727, + "step": 10050 + }, + { + "epoch": 0.05210993648779028, + "grad_norm": 21711.177734375, + "learning_rate": 9.999994768416664e-05, + "loss": 0.5707, + "step": 10100 + }, + { + "epoch": 0.052367906470403104, + "grad_norm": 21793.666015625, + "learning_rate": 9.999988149540251e-05, + "loss": 0.5727, + "step": 10150 + }, + { + "epoch": 0.052625876453015925, + "grad_norm": 18847.970703125, + "learning_rate": 9.999978861766983e-05, + "loss": 0.5726, + "step": 10200 + }, + { + "epoch": 0.052883846435628754, + "grad_norm": 22870.91796875, + "learning_rate": 9.999966905101816e-05, + "loss": 0.5751, + "step": 10250 + }, + { + "epoch": 0.053141816418241575, + "grad_norm": 23970.431640625, + "learning_rate": 9.999952279551135e-05, + "loss": 0.5745, + "step": 10300 + }, + { + "epoch": 0.053399786400854396, + "grad_norm": 19482.65625, + "learning_rate": 9.999934985122746e-05, + "loss": 0.5734, + "step": 10350 + }, + { + "epoch": 0.05365775638346722, + "grad_norm": 19720.65625, + "learning_rate": 9.999915021825879e-05, + "loss": 0.5697, + "step": 10400 + }, + { + "epoch": 0.053915726366080045, + "grad_norm": 21484.8203125, + "learning_rate": 9.99989238967119e-05, + "loss": 0.5678, + "step": 10450 + }, + { + "epoch": 0.05417369634869287, + "grad_norm": 20198.669921875, + "learning_rate": 9.999867088670762e-05, + "loss": 0.5731, + "step": 10500 + }, + { + "epoch": 0.05443166633130569, + "grad_norm": 19887.86328125, + "learning_rate": 9.999839118838099e-05, + "loss": 0.5711, + "step": 10550 + }, + { + "epoch": 0.05468963631391851, + "grad_norm": 21250.41796875, + "learning_rate": 9.999808480188131e-05, + "loss": 0.5653, + "step": 10600 + }, + { + "epoch": 0.05494760629653134, + "grad_norm": 21179.904296875, + "learning_rate": 9.999775172737211e-05, + "loss": 0.5666, + "step": 10650 + }, + { + "epoch": 0.05520557627914416, + "grad_norm": 21106.083984375, + "learning_rate": 9.999739196503119e-05, + "loss": 0.5656, + "step": 10700 + }, + { + "epoch": 0.05546354626175698, + "grad_norm": 19393.994140625, + "learning_rate": 9.999700551505057e-05, + "loss": 0.566, + "step": 10750 + }, + { + "epoch": 0.05572151624436981, + "grad_norm": 22788.060546875, + "learning_rate": 9.999659237763656e-05, + "loss": 0.5681, + "step": 10800 + }, + { + "epoch": 0.05597948622698263, + "grad_norm": 20106.75390625, + "learning_rate": 9.999615255300966e-05, + "loss": 0.5668, + "step": 10850 + }, + { + "epoch": 0.05623745620959545, + "grad_norm": 22390.466796875, + "learning_rate": 9.999568604140464e-05, + "loss": 0.5665, + "step": 10900 + }, + { + "epoch": 0.05649542619220827, + "grad_norm": 21145.044921875, + "learning_rate": 9.999519284307053e-05, + "loss": 0.5645, + "step": 10950 + }, + { + "epoch": 0.0567533961748211, + "grad_norm": 22501.64453125, + "learning_rate": 9.999467295827059e-05, + "loss": 0.5663, + "step": 11000 + }, + { + "epoch": 0.05701136615743392, + "grad_norm": 21079.431640625, + "learning_rate": 9.999412638728229e-05, + "loss": 0.5605, + "step": 11050 + }, + { + "epoch": 0.05726933614004674, + "grad_norm": 21501.4375, + "learning_rate": 9.999355313039742e-05, + "loss": 0.5643, + "step": 11100 + }, + { + "epoch": 0.05752730612265957, + "grad_norm": 22092.6328125, + "learning_rate": 9.999295318792194e-05, + "loss": 0.5602, + "step": 11150 + }, + { + "epoch": 0.05778527610527239, + "grad_norm": 19948.81640625, + "learning_rate": 9.999232656017613e-05, + "loss": 0.5649, + "step": 11200 + }, + { + "epoch": 0.05804324608788521, + "grad_norm": 20543.5859375, + "learning_rate": 9.999167324749443e-05, + "loss": 0.5598, + "step": 11250 + }, + { + "epoch": 0.058301216070498034, + "grad_norm": 20948.060546875, + "learning_rate": 9.99909932502256e-05, + "loss": 0.5631, + "step": 11300 + }, + { + "epoch": 0.05855918605311086, + "grad_norm": 20384.732421875, + "learning_rate": 9.999028656873257e-05, + "loss": 0.5592, + "step": 11350 + }, + { + "epoch": 0.05881715603572368, + "grad_norm": 20027.615234375, + "learning_rate": 9.99895532033926e-05, + "loss": 0.5658, + "step": 11400 + }, + { + "epoch": 0.059075126018336505, + "grad_norm": 20702.263671875, + "learning_rate": 9.99887931545971e-05, + "loss": 0.56, + "step": 11450 + }, + { + "epoch": 0.05933309600094933, + "grad_norm": 21589.52734375, + "learning_rate": 9.99880064227518e-05, + "loss": 0.5595, + "step": 11500 + }, + { + "epoch": 0.059591065983562154, + "grad_norm": 20375.181640625, + "learning_rate": 9.998719300827663e-05, + "loss": 0.5627, + "step": 11550 + }, + { + "epoch": 0.059849035966174975, + "grad_norm": 20207.677734375, + "learning_rate": 9.998635291160577e-05, + "loss": 0.5615, + "step": 11600 + }, + { + "epoch": 0.060107005948787796, + "grad_norm": 20898.291015625, + "learning_rate": 9.998548613318767e-05, + "loss": 0.5594, + "step": 11650 + }, + { + "epoch": 0.060364975931400625, + "grad_norm": 20133.822265625, + "learning_rate": 9.998459267348497e-05, + "loss": 0.5631, + "step": 11700 + }, + { + "epoch": 0.060622945914013446, + "grad_norm": 19021.533203125, + "learning_rate": 9.99836725329746e-05, + "loss": 0.5576, + "step": 11750 + }, + { + "epoch": 0.06088091589662627, + "grad_norm": 19088.32421875, + "learning_rate": 9.998272571214772e-05, + "loss": 0.5619, + "step": 11800 + }, + { + "epoch": 0.061138885879239095, + "grad_norm": 19742.841796875, + "learning_rate": 9.99817522115097e-05, + "loss": 0.5626, + "step": 11850 + }, + { + "epoch": 0.061396855861851916, + "grad_norm": 21584.271484375, + "learning_rate": 9.99807520315802e-05, + "loss": 0.555, + "step": 11900 + }, + { + "epoch": 0.06165482584446474, + "grad_norm": 19766.76953125, + "learning_rate": 9.997972517289309e-05, + "loss": 0.5584, + "step": 11950 + }, + { + "epoch": 0.06191279582707756, + "grad_norm": 19821.556640625, + "learning_rate": 9.997867163599646e-05, + "loss": 0.5623, + "step": 12000 + }, + { + "epoch": 0.06217076580969039, + "grad_norm": 19488.490234375, + "learning_rate": 9.997759142145271e-05, + "loss": 0.5591, + "step": 12050 + }, + { + "epoch": 0.06242873579230321, + "grad_norm": 20093.806640625, + "learning_rate": 9.997648452983842e-05, + "loss": 0.5597, + "step": 12100 + }, + { + "epoch": 0.06268670577491603, + "grad_norm": 20202.154296875, + "learning_rate": 9.997535096174441e-05, + "loss": 0.5542, + "step": 12150 + }, + { + "epoch": 0.06294467575752885, + "grad_norm": 19978.154296875, + "learning_rate": 9.99741907177758e-05, + "loss": 0.5629, + "step": 12200 + }, + { + "epoch": 0.06320264574014167, + "grad_norm": 19697.005859375, + "learning_rate": 9.997300379855186e-05, + "loss": 0.5571, + "step": 12250 + }, + { + "epoch": 0.06346061572275451, + "grad_norm": 20384.287109375, + "learning_rate": 9.997179020470618e-05, + "loss": 0.5526, + "step": 12300 + }, + { + "epoch": 0.06371858570536733, + "grad_norm": 18652.044921875, + "learning_rate": 9.997054993688651e-05, + "loss": 0.5531, + "step": 12350 + }, + { + "epoch": 0.06397655568798015, + "grad_norm": 20133.990234375, + "learning_rate": 9.996928299575493e-05, + "loss": 0.5561, + "step": 12400 + }, + { + "epoch": 0.06423452567059297, + "grad_norm": 20575.875, + "learning_rate": 9.996798938198766e-05, + "loss": 0.5559, + "step": 12450 + }, + { + "epoch": 0.06449249565320579, + "grad_norm": 19524.828125, + "learning_rate": 9.996666909627525e-05, + "loss": 0.5437, + "step": 12500 + }, + { + "epoch": 0.06475046563581861, + "grad_norm": 22106.927734375, + "learning_rate": 9.996532213932242e-05, + "loss": 0.5691, + "step": 12550 + }, + { + "epoch": 0.06500843561843143, + "grad_norm": 18443.4609375, + "learning_rate": 9.996394851184814e-05, + "loss": 0.553, + "step": 12600 + }, + { + "epoch": 0.06526640560104426, + "grad_norm": 21786.943359375, + "learning_rate": 9.996254821458565e-05, + "loss": 0.562, + "step": 12650 + }, + { + "epoch": 0.06552437558365709, + "grad_norm": 22699.578125, + "learning_rate": 9.996112124828241e-05, + "loss": 0.5526, + "step": 12700 + }, + { + "epoch": 0.06578234556626991, + "grad_norm": 18522.822265625, + "learning_rate": 9.995966761370006e-05, + "loss": 0.5525, + "step": 12750 + }, + { + "epoch": 0.06604031554888273, + "grad_norm": 19723.44140625, + "learning_rate": 9.995818731161458e-05, + "loss": 0.5555, + "step": 12800 + }, + { + "epoch": 0.06629828553149555, + "grad_norm": 20643.173828125, + "learning_rate": 9.995668034281606e-05, + "loss": 0.5506, + "step": 12850 + }, + { + "epoch": 0.06655625551410838, + "grad_norm": 19303.68359375, + "learning_rate": 9.995514670810896e-05, + "loss": 0.5599, + "step": 12900 + }, + { + "epoch": 0.0668142254967212, + "grad_norm": 19837.240234375, + "learning_rate": 9.995358640831187e-05, + "loss": 0.5514, + "step": 12950 + }, + { + "epoch": 0.06707219547933402, + "grad_norm": 19212.25390625, + "learning_rate": 9.995199944425764e-05, + "loss": 0.5542, + "step": 13000 + }, + { + "epoch": 0.06733016546194685, + "grad_norm": 19908.70703125, + "learning_rate": 9.995038581679337e-05, + "loss": 0.5421, + "step": 13050 + }, + { + "epoch": 0.06758813544455967, + "grad_norm": 18933.306640625, + "learning_rate": 9.994874552678038e-05, + "loss": 0.549, + "step": 13100 + }, + { + "epoch": 0.0678461054271725, + "grad_norm": 19313.990234375, + "learning_rate": 9.994707857509422e-05, + "loss": 0.5569, + "step": 13150 + }, + { + "epoch": 0.06810407540978532, + "grad_norm": 20800.984375, + "learning_rate": 9.99453849626247e-05, + "loss": 0.5518, + "step": 13200 + }, + { + "epoch": 0.06836204539239814, + "grad_norm": 18623.361328125, + "learning_rate": 9.994366469027583e-05, + "loss": 0.5549, + "step": 13250 + }, + { + "epoch": 0.06862001537501096, + "grad_norm": 19761.654296875, + "learning_rate": 9.994191775896584e-05, + "loss": 0.5467, + "step": 13300 + }, + { + "epoch": 0.06887798535762378, + "grad_norm": 20618.501953125, + "learning_rate": 9.994014416962723e-05, + "loss": 0.5554, + "step": 13350 + }, + { + "epoch": 0.06913595534023662, + "grad_norm": 19279.791015625, + "learning_rate": 9.993834392320668e-05, + "loss": 0.5567, + "step": 13400 + }, + { + "epoch": 0.06939392532284944, + "grad_norm": 18802.34375, + "learning_rate": 9.993651702066516e-05, + "loss": 0.5608, + "step": 13450 + }, + { + "epoch": 0.06965189530546226, + "grad_norm": 20132.15625, + "learning_rate": 9.993466346297779e-05, + "loss": 0.547, + "step": 13500 + }, + { + "epoch": 0.06990986528807508, + "grad_norm": 19165.26171875, + "learning_rate": 9.993278325113403e-05, + "loss": 0.5485, + "step": 13550 + }, + { + "epoch": 0.0701678352706879, + "grad_norm": 18493.01171875, + "learning_rate": 9.993087638613743e-05, + "loss": 0.5455, + "step": 13600 + }, + { + "epoch": 0.07042580525330072, + "grad_norm": 18225.78125, + "learning_rate": 9.992894286900589e-05, + "loss": 0.5499, + "step": 13650 + }, + { + "epoch": 0.07068377523591354, + "grad_norm": 20189.802734375, + "learning_rate": 9.992698270077146e-05, + "loss": 0.5468, + "step": 13700 + }, + { + "epoch": 0.07094174521852638, + "grad_norm": 20861.2734375, + "learning_rate": 9.992499588248043e-05, + "loss": 0.5588, + "step": 13750 + }, + { + "epoch": 0.0711997152011392, + "grad_norm": 19876.689453125, + "learning_rate": 9.992298241519335e-05, + "loss": 0.5486, + "step": 13800 + }, + { + "epoch": 0.07145768518375202, + "grad_norm": 18371.142578125, + "learning_rate": 9.992094229998497e-05, + "loss": 0.5475, + "step": 13850 + }, + { + "epoch": 0.07171565516636484, + "grad_norm": 18274.396484375, + "learning_rate": 9.991887553794423e-05, + "loss": 0.549, + "step": 13900 + }, + { + "epoch": 0.07197362514897766, + "grad_norm": 18204.947265625, + "learning_rate": 9.991678213017437e-05, + "loss": 0.5419, + "step": 13950 + }, + { + "epoch": 0.07223159513159048, + "grad_norm": 18634.162109375, + "learning_rate": 9.991466207779278e-05, + "loss": 0.5528, + "step": 14000 + }, + { + "epoch": 0.0724895651142033, + "grad_norm": 21840.685546875, + "learning_rate": 9.991251538193112e-05, + "loss": 0.5492, + "step": 14050 + }, + { + "epoch": 0.07274753509681614, + "grad_norm": 18888.935546875, + "learning_rate": 9.991034204373524e-05, + "loss": 0.5504, + "step": 14100 + }, + { + "epoch": 0.07300550507942896, + "grad_norm": 19353.263671875, + "learning_rate": 9.990814206436524e-05, + "loss": 0.5425, + "step": 14150 + }, + { + "epoch": 0.07326347506204178, + "grad_norm": 18891.79296875, + "learning_rate": 9.990591544499543e-05, + "loss": 0.551, + "step": 14200 + }, + { + "epoch": 0.0735214450446546, + "grad_norm": 17878.33203125, + "learning_rate": 9.99036621868143e-05, + "loss": 0.5403, + "step": 14250 + }, + { + "epoch": 0.07377941502726743, + "grad_norm": 18997.544921875, + "learning_rate": 9.990138229102465e-05, + "loss": 0.5458, + "step": 14300 + }, + { + "epoch": 0.07403738500988025, + "grad_norm": 22162.03125, + "learning_rate": 9.989907575884341e-05, + "loss": 0.5482, + "step": 14350 + }, + { + "epoch": 0.07429535499249307, + "grad_norm": 17026.828125, + "learning_rate": 9.989674259150177e-05, + "loss": 0.5487, + "step": 14400 + }, + { + "epoch": 0.0745533249751059, + "grad_norm": 18335.169921875, + "learning_rate": 9.989438279024513e-05, + "loss": 0.5459, + "step": 14450 + }, + { + "epoch": 0.07481129495771872, + "grad_norm": 19508.666015625, + "learning_rate": 9.989199635633309e-05, + "loss": 0.5456, + "step": 14500 + }, + { + "epoch": 0.07506926494033155, + "grad_norm": 20281.28515625, + "learning_rate": 9.98895832910395e-05, + "loss": 0.5455, + "step": 14550 + }, + { + "epoch": 0.07532723492294437, + "grad_norm": 20196.259765625, + "learning_rate": 9.98871435956524e-05, + "loss": 0.5474, + "step": 14600 + }, + { + "epoch": 0.07558520490555719, + "grad_norm": 18934.544921875, + "learning_rate": 9.988467727147409e-05, + "loss": 0.546, + "step": 14650 + }, + { + "epoch": 0.07584317488817001, + "grad_norm": 20257.126953125, + "learning_rate": 9.988218431982098e-05, + "loss": 0.5443, + "step": 14700 + }, + { + "epoch": 0.07610114487078283, + "grad_norm": 20330.86328125, + "learning_rate": 9.98796647420238e-05, + "loss": 0.5423, + "step": 14750 + }, + { + "epoch": 0.07635911485339567, + "grad_norm": 19077.765625, + "learning_rate": 9.987711853942745e-05, + "loss": 0.5446, + "step": 14800 + }, + { + "epoch": 0.07661708483600849, + "grad_norm": 20855.169921875, + "learning_rate": 9.987454571339103e-05, + "loss": 0.5427, + "step": 14850 + }, + { + "epoch": 0.07687505481862131, + "grad_norm": 20556.005859375, + "learning_rate": 9.987194626528788e-05, + "loss": 0.5417, + "step": 14900 + }, + { + "epoch": 0.07713302480123413, + "grad_norm": 19028.7421875, + "learning_rate": 9.986932019650553e-05, + "loss": 0.5412, + "step": 14950 + }, + { + "epoch": 0.07739099478384695, + "grad_norm": 18669.166015625, + "learning_rate": 9.986666750844572e-05, + "loss": 0.5404, + "step": 15000 + }, + { + "epoch": 0.07739099478384695, + "eval_loss": 0.5350670218467712, + "eval_runtime": 3217.7876, + "eval_samples_per_second": 963.743, + "eval_steps_per_second": 1.882, + "step": 15000 + }, + { + "epoch": 0.07764896476645977, + "grad_norm": 19965.779296875, + "learning_rate": 9.98639882025244e-05, + "loss": 0.5439, + "step": 15050 + }, + { + "epoch": 0.07790693474907259, + "grad_norm": 18329.9921875, + "learning_rate": 9.986128228017173e-05, + "loss": 0.5425, + "step": 15100 + }, + { + "epoch": 0.07816490473168543, + "grad_norm": 20102.005859375, + "learning_rate": 9.985854974283211e-05, + "loss": 0.5444, + "step": 15150 + }, + { + "epoch": 0.07842287471429825, + "grad_norm": 19234.671875, + "learning_rate": 9.985579059196406e-05, + "loss": 0.5443, + "step": 15200 + }, + { + "epoch": 0.07868084469691107, + "grad_norm": 18324.298828125, + "learning_rate": 9.985300482904041e-05, + "loss": 0.5419, + "step": 15250 + }, + { + "epoch": 0.07893881467952389, + "grad_norm": 18766.2734375, + "learning_rate": 9.985019245554814e-05, + "loss": 0.5412, + "step": 15300 + }, + { + "epoch": 0.07919678466213671, + "grad_norm": 18805.765625, + "learning_rate": 9.984735347298841e-05, + "loss": 0.5443, + "step": 15350 + }, + { + "epoch": 0.07945475464474953, + "grad_norm": 17677.30078125, + "learning_rate": 9.984448788287665e-05, + "loss": 0.5421, + "step": 15400 + }, + { + "epoch": 0.07971272462736236, + "grad_norm": 19851.3515625, + "learning_rate": 9.984159568674243e-05, + "loss": 0.5426, + "step": 15450 + }, + { + "epoch": 0.07997069460997518, + "grad_norm": 18453.05859375, + "learning_rate": 9.983867688612956e-05, + "loss": 0.5445, + "step": 15500 + }, + { + "epoch": 0.08022866459258801, + "grad_norm": 17366.869140625, + "learning_rate": 9.983573148259603e-05, + "loss": 0.5451, + "step": 15550 + }, + { + "epoch": 0.08048663457520083, + "grad_norm": 18628.716796875, + "learning_rate": 9.983275947771407e-05, + "loss": 0.5373, + "step": 15600 + }, + { + "epoch": 0.08074460455781365, + "grad_norm": 19403.87890625, + "learning_rate": 9.982976087307003e-05, + "loss": 0.5489, + "step": 15650 + }, + { + "epoch": 0.08100257454042648, + "grad_norm": 18485.71875, + "learning_rate": 9.982673567026455e-05, + "loss": 0.538, + "step": 15700 + }, + { + "epoch": 0.0812605445230393, + "grad_norm": 19837.1796875, + "learning_rate": 9.982368387091241e-05, + "loss": 0.5356, + "step": 15750 + }, + { + "epoch": 0.08151851450565212, + "grad_norm": 19505.34375, + "learning_rate": 9.982060547664258e-05, + "loss": 0.5356, + "step": 15800 + }, + { + "epoch": 0.08177648448826494, + "grad_norm": 18645.48828125, + "learning_rate": 9.981750048909828e-05, + "loss": 0.5381, + "step": 15850 + }, + { + "epoch": 0.08203445447087777, + "grad_norm": 20191.73828125, + "learning_rate": 9.981436890993689e-05, + "loss": 0.535, + "step": 15900 + }, + { + "epoch": 0.0822924244534906, + "grad_norm": 18908.15625, + "learning_rate": 9.981121074082995e-05, + "loss": 0.5405, + "step": 15950 + }, + { + "epoch": 0.08255039443610342, + "grad_norm": 19517.73828125, + "learning_rate": 9.980802598346326e-05, + "loss": 0.5407, + "step": 16000 + }, + { + "epoch": 0.08280836441871624, + "grad_norm": 18368.16015625, + "learning_rate": 9.980481463953679e-05, + "loss": 0.5391, + "step": 16050 + }, + { + "epoch": 0.08306633440132906, + "grad_norm": 19727.35546875, + "learning_rate": 9.980157671076466e-05, + "loss": 0.537, + "step": 16100 + }, + { + "epoch": 0.08332430438394188, + "grad_norm": 20757.890625, + "learning_rate": 9.979831219887525e-05, + "loss": 0.5408, + "step": 16150 + }, + { + "epoch": 0.0835822743665547, + "grad_norm": 19334.708984375, + "learning_rate": 9.979502110561108e-05, + "loss": 0.5371, + "step": 16200 + }, + { + "epoch": 0.08384024434916754, + "grad_norm": 19338.498046875, + "learning_rate": 9.979170343272886e-05, + "loss": 0.531, + "step": 16250 + }, + { + "epoch": 0.08409821433178036, + "grad_norm": 18722.365234375, + "learning_rate": 9.978835918199949e-05, + "loss": 0.5398, + "step": 16300 + }, + { + "epoch": 0.08435618431439318, + "grad_norm": 18026.109375, + "learning_rate": 9.97849883552081e-05, + "loss": 0.5423, + "step": 16350 + }, + { + "epoch": 0.084614154297006, + "grad_norm": 19646.78125, + "learning_rate": 9.978159095415396e-05, + "loss": 0.5387, + "step": 16400 + }, + { + "epoch": 0.08487212427961882, + "grad_norm": 20091.552734375, + "learning_rate": 9.977816698065052e-05, + "loss": 0.5376, + "step": 16450 + }, + { + "epoch": 0.08513009426223164, + "grad_norm": 20539.73046875, + "learning_rate": 9.977471643652546e-05, + "loss": 0.5333, + "step": 16500 + }, + { + "epoch": 0.08538806424484446, + "grad_norm": 18306.24609375, + "learning_rate": 9.977123932362059e-05, + "loss": 0.5405, + "step": 16550 + }, + { + "epoch": 0.0856460342274573, + "grad_norm": 20133.513671875, + "learning_rate": 9.976773564379193e-05, + "loss": 0.541, + "step": 16600 + }, + { + "epoch": 0.08590400421007012, + "grad_norm": 19533.50390625, + "learning_rate": 9.976420539890969e-05, + "loss": 0.5333, + "step": 16650 + }, + { + "epoch": 0.08616197419268294, + "grad_norm": 19509.087890625, + "learning_rate": 9.976064859085822e-05, + "loss": 0.5347, + "step": 16700 + }, + { + "epoch": 0.08641994417529576, + "grad_norm": 19590.818359375, + "learning_rate": 9.97570652215361e-05, + "loss": 0.5377, + "step": 16750 + }, + { + "epoch": 0.08667791415790858, + "grad_norm": 19510.705078125, + "learning_rate": 9.975345529285605e-05, + "loss": 0.5367, + "step": 16800 + }, + { + "epoch": 0.0869358841405214, + "grad_norm": 20015.8046875, + "learning_rate": 9.974981880674499e-05, + "loss": 0.5386, + "step": 16850 + }, + { + "epoch": 0.08719385412313423, + "grad_norm": 18704.03125, + "learning_rate": 9.974615576514399e-05, + "loss": 0.5361, + "step": 16900 + }, + { + "epoch": 0.08745182410574706, + "grad_norm": 18257.869140625, + "learning_rate": 9.974246617000832e-05, + "loss": 0.5304, + "step": 16950 + }, + { + "epoch": 0.08770979408835988, + "grad_norm": 18150.517578125, + "learning_rate": 9.973875002330743e-05, + "loss": 0.5289, + "step": 17000 + }, + { + "epoch": 0.0879677640709727, + "grad_norm": 18326.041015625, + "learning_rate": 9.97350073270249e-05, + "loss": 0.5347, + "step": 17050 + }, + { + "epoch": 0.08822573405358553, + "grad_norm": 18199.224609375, + "learning_rate": 9.973123808315852e-05, + "loss": 0.5269, + "step": 17100 + }, + { + "epoch": 0.08848370403619835, + "grad_norm": 20351.447265625, + "learning_rate": 9.972744229372025e-05, + "loss": 0.5334, + "step": 17150 + }, + { + "epoch": 0.08874167401881117, + "grad_norm": 19200.703125, + "learning_rate": 9.97236199607362e-05, + "loss": 0.5316, + "step": 17200 + }, + { + "epoch": 0.08899964400142399, + "grad_norm": 18855.7890625, + "learning_rate": 9.971977108624664e-05, + "loss": 0.5342, + "step": 17250 + }, + { + "epoch": 0.08925761398403682, + "grad_norm": 18889.56640625, + "learning_rate": 9.971589567230606e-05, + "loss": 0.5361, + "step": 17300 + }, + { + "epoch": 0.08951558396664965, + "grad_norm": 18003.9921875, + "learning_rate": 9.971199372098304e-05, + "loss": 0.5353, + "step": 17350 + }, + { + "epoch": 0.08977355394926247, + "grad_norm": 19555.30078125, + "learning_rate": 9.970806523436041e-05, + "loss": 0.5306, + "step": 17400 + }, + { + "epoch": 0.09003152393187529, + "grad_norm": 19433.37890625, + "learning_rate": 9.97041102145351e-05, + "loss": 0.5341, + "step": 17450 + }, + { + "epoch": 0.09028949391448811, + "grad_norm": 19238.341796875, + "learning_rate": 9.97001286636182e-05, + "loss": 0.5372, + "step": 17500 + }, + { + "epoch": 0.09054746389710093, + "grad_norm": 18698.78125, + "learning_rate": 9.969612058373502e-05, + "loss": 0.5356, + "step": 17550 + }, + { + "epoch": 0.09080543387971375, + "grad_norm": 17953.580078125, + "learning_rate": 9.969208597702497e-05, + "loss": 0.529, + "step": 17600 + }, + { + "epoch": 0.09106340386232659, + "grad_norm": 17678.716796875, + "learning_rate": 9.968802484564168e-05, + "loss": 0.5329, + "step": 17650 + }, + { + "epoch": 0.09132137384493941, + "grad_norm": 20412.287109375, + "learning_rate": 9.968393719175286e-05, + "loss": 0.534, + "step": 17700 + }, + { + "epoch": 0.09157934382755223, + "grad_norm": 20080.16015625, + "learning_rate": 9.967982301754044e-05, + "loss": 0.5307, + "step": 17750 + }, + { + "epoch": 0.09183731381016505, + "grad_norm": 18570.314453125, + "learning_rate": 9.96756823252005e-05, + "loss": 0.526, + "step": 17800 + }, + { + "epoch": 0.09209528379277787, + "grad_norm": 18329.107421875, + "learning_rate": 9.967151511694324e-05, + "loss": 0.5273, + "step": 17850 + }, + { + "epoch": 0.09235325377539069, + "grad_norm": 19036.18359375, + "learning_rate": 9.966732139499304e-05, + "loss": 0.5275, + "step": 17900 + }, + { + "epoch": 0.09261122375800351, + "grad_norm": 18708.826171875, + "learning_rate": 9.966310116158844e-05, + "loss": 0.5313, + "step": 17950 + }, + { + "epoch": 0.09286919374061635, + "grad_norm": 18660.791015625, + "learning_rate": 9.96588544189821e-05, + "loss": 0.5303, + "step": 18000 + }, + { + "epoch": 0.09312716372322917, + "grad_norm": 19709.181640625, + "learning_rate": 9.965458116944086e-05, + "loss": 0.5347, + "step": 18050 + }, + { + "epoch": 0.09338513370584199, + "grad_norm": 19683.798828125, + "learning_rate": 9.96502814152457e-05, + "loss": 0.5359, + "step": 18100 + }, + { + "epoch": 0.09364310368845481, + "grad_norm": 19533.09765625, + "learning_rate": 9.964595515869175e-05, + "loss": 0.5263, + "step": 18150 + }, + { + "epoch": 0.09390107367106763, + "grad_norm": 20254.892578125, + "learning_rate": 9.964160240208826e-05, + "loss": 0.5307, + "step": 18200 + }, + { + "epoch": 0.09415904365368045, + "grad_norm": 21316.876953125, + "learning_rate": 9.963722314775868e-05, + "loss": 0.5316, + "step": 18250 + }, + { + "epoch": 0.09441701363629328, + "grad_norm": 20027.03515625, + "learning_rate": 9.963281739804054e-05, + "loss": 0.5274, + "step": 18300 + }, + { + "epoch": 0.0946749836189061, + "grad_norm": 18551.994140625, + "learning_rate": 9.962838515528554e-05, + "loss": 0.5339, + "step": 18350 + }, + { + "epoch": 0.09493295360151893, + "grad_norm": 17779.97265625, + "learning_rate": 9.962392642185956e-05, + "loss": 0.5301, + "step": 18400 + }, + { + "epoch": 0.09519092358413175, + "grad_norm": 20620.232421875, + "learning_rate": 9.961944120014256e-05, + "loss": 0.522, + "step": 18450 + }, + { + "epoch": 0.09544889356674457, + "grad_norm": 18669.73046875, + "learning_rate": 9.961492949252868e-05, + "loss": 0.5261, + "step": 18500 + }, + { + "epoch": 0.0957068635493574, + "grad_norm": 19528.4765625, + "learning_rate": 9.961039130142617e-05, + "loss": 0.5276, + "step": 18550 + }, + { + "epoch": 0.09596483353197022, + "grad_norm": 19643.099609375, + "learning_rate": 9.960582662925744e-05, + "loss": 0.5332, + "step": 18600 + }, + { + "epoch": 0.09622280351458304, + "grad_norm": 19024.4375, + "learning_rate": 9.960123547845901e-05, + "loss": 0.529, + "step": 18650 + }, + { + "epoch": 0.09648077349719586, + "grad_norm": 20228.248046875, + "learning_rate": 9.959661785148155e-05, + "loss": 0.5322, + "step": 18700 + }, + { + "epoch": 0.0967387434798087, + "grad_norm": 20120.126953125, + "learning_rate": 9.959197375078986e-05, + "loss": 0.5256, + "step": 18750 + }, + { + "epoch": 0.09699671346242152, + "grad_norm": 19894.423828125, + "learning_rate": 9.95873031788629e-05, + "loss": 0.5257, + "step": 18800 + }, + { + "epoch": 0.09725468344503434, + "grad_norm": 18450.8671875, + "learning_rate": 9.958260613819367e-05, + "loss": 0.5268, + "step": 18850 + }, + { + "epoch": 0.09751265342764716, + "grad_norm": 22775.53125, + "learning_rate": 9.95778826312894e-05, + "loss": 0.5293, + "step": 18900 + }, + { + "epoch": 0.09777062341025998, + "grad_norm": 17769.38671875, + "learning_rate": 9.95731326606714e-05, + "loss": 0.5281, + "step": 18950 + }, + { + "epoch": 0.0980285933928728, + "grad_norm": 20731.322265625, + "learning_rate": 9.956835622887514e-05, + "loss": 0.5327, + "step": 19000 + }, + { + "epoch": 0.09828656337548562, + "grad_norm": 20059.11328125, + "learning_rate": 9.956355333845014e-05, + "loss": 0.5279, + "step": 19050 + }, + { + "epoch": 0.09854453335809846, + "grad_norm": 17477.626953125, + "learning_rate": 9.955872399196012e-05, + "loss": 0.5257, + "step": 19100 + }, + { + "epoch": 0.09880250334071128, + "grad_norm": 20293.232421875, + "learning_rate": 9.955386819198287e-05, + "loss": 0.5258, + "step": 19150 + }, + { + "epoch": 0.0990604733233241, + "grad_norm": 19330.4140625, + "learning_rate": 9.954898594111035e-05, + "loss": 0.5231, + "step": 19200 + }, + { + "epoch": 0.09931844330593692, + "grad_norm": 19410.818359375, + "learning_rate": 9.954407724194858e-05, + "loss": 0.5286, + "step": 19250 + }, + { + "epoch": 0.09957641328854974, + "grad_norm": 18320.552734375, + "learning_rate": 9.953914209711775e-05, + "loss": 0.5287, + "step": 19300 + }, + { + "epoch": 0.09983438327116256, + "grad_norm": 17585.583984375, + "learning_rate": 9.953418050925213e-05, + "loss": 0.5265, + "step": 19350 + }, + { + "epoch": 0.10009235325377538, + "grad_norm": 20318.298828125, + "learning_rate": 9.952919248100012e-05, + "loss": 0.5292, + "step": 19400 + }, + { + "epoch": 0.10035032323638822, + "grad_norm": 20239.33984375, + "learning_rate": 9.952417801502426e-05, + "loss": 0.522, + "step": 19450 + }, + { + "epoch": 0.10060829321900104, + "grad_norm": 18922.158203125, + "learning_rate": 9.951913711400115e-05, + "loss": 0.5275, + "step": 19500 + }, + { + "epoch": 0.10086626320161386, + "grad_norm": 18332.673828125, + "learning_rate": 9.951406978062153e-05, + "loss": 0.5282, + "step": 19550 + }, + { + "epoch": 0.10112423318422668, + "grad_norm": 19321.662109375, + "learning_rate": 9.950897601759024e-05, + "loss": 0.5236, + "step": 19600 + }, + { + "epoch": 0.1013822031668395, + "grad_norm": 19050.42578125, + "learning_rate": 9.950385582762624e-05, + "loss": 0.5269, + "step": 19650 + }, + { + "epoch": 0.10164017314945233, + "grad_norm": 18592.8125, + "learning_rate": 9.949870921346259e-05, + "loss": 0.5294, + "step": 19700 + }, + { + "epoch": 0.10189814313206515, + "grad_norm": 17702.080078125, + "learning_rate": 9.949353617784644e-05, + "loss": 0.5321, + "step": 19750 + }, + { + "epoch": 0.10215611311467798, + "grad_norm": 18935.71875, + "learning_rate": 9.948833672353907e-05, + "loss": 0.5279, + "step": 19800 + }, + { + "epoch": 0.1024140830972908, + "grad_norm": 19814.96484375, + "learning_rate": 9.948311085331585e-05, + "loss": 0.5174, + "step": 19850 + }, + { + "epoch": 0.10267205307990362, + "grad_norm": 18945.4375, + "learning_rate": 9.947785856996623e-05, + "loss": 0.525, + "step": 19900 + }, + { + "epoch": 0.10293002306251645, + "grad_norm": 19162.28125, + "learning_rate": 9.947257987629379e-05, + "loss": 0.5268, + "step": 19950 + }, + { + "epoch": 0.10318799304512927, + "grad_norm": 18814.861328125, + "learning_rate": 9.94672747751162e-05, + "loss": 0.5191, + "step": 20000 + }, + { + "epoch": 0.10318799304512927, + "eval_loss": 0.5160176157951355, + "eval_runtime": 3272.5369, + "eval_samples_per_second": 947.62, + "eval_steps_per_second": 1.851, + "step": 20000 + }, + { + "epoch": 0.10344596302774209, + "grad_norm": 19089.77734375, + "learning_rate": 9.94619432692652e-05, + "loss": 0.5254, + "step": 20050 + }, + { + "epoch": 0.10370393301035491, + "grad_norm": 19005.53125, + "learning_rate": 9.945658536158667e-05, + "loss": 0.525, + "step": 20100 + }, + { + "epoch": 0.10396190299296774, + "grad_norm": 20896.8125, + "learning_rate": 9.945120105494054e-05, + "loss": 0.5173, + "step": 20150 + }, + { + "epoch": 0.10421987297558057, + "grad_norm": 19254.22265625, + "learning_rate": 9.944579035220085e-05, + "loss": 0.5195, + "step": 20200 + }, + { + "epoch": 0.10447784295819339, + "grad_norm": 19317.572265625, + "learning_rate": 9.944035325625573e-05, + "loss": 0.5239, + "step": 20250 + }, + { + "epoch": 0.10473581294080621, + "grad_norm": 18661.330078125, + "learning_rate": 9.94348897700074e-05, + "loss": 0.5243, + "step": 20300 + }, + { + "epoch": 0.10499378292341903, + "grad_norm": 18914.298828125, + "learning_rate": 9.942939989637216e-05, + "loss": 0.5247, + "step": 20350 + }, + { + "epoch": 0.10525175290603185, + "grad_norm": 17788.77734375, + "learning_rate": 9.942388363828041e-05, + "loss": 0.5205, + "step": 20400 + }, + { + "epoch": 0.10550972288864467, + "grad_norm": 17314.578125, + "learning_rate": 9.941834099867659e-05, + "loss": 0.5182, + "step": 20450 + }, + { + "epoch": 0.10576769287125751, + "grad_norm": 18627.068359375, + "learning_rate": 9.941277198051931e-05, + "loss": 0.5208, + "step": 20500 + }, + { + "epoch": 0.10602566285387033, + "grad_norm": 18274.4609375, + "learning_rate": 9.940717658678113e-05, + "loss": 0.5244, + "step": 20550 + }, + { + "epoch": 0.10628363283648315, + "grad_norm": 18668.767578125, + "learning_rate": 9.940155482044884e-05, + "loss": 0.5237, + "step": 20600 + }, + { + "epoch": 0.10654160281909597, + "grad_norm": 17703.703125, + "learning_rate": 9.939590668452316e-05, + "loss": 0.5148, + "step": 20650 + }, + { + "epoch": 0.10679957280170879, + "grad_norm": 18372.7578125, + "learning_rate": 9.939023218201901e-05, + "loss": 0.522, + "step": 20700 + }, + { + "epoch": 0.10705754278432161, + "grad_norm": 18439.521484375, + "learning_rate": 9.93845313159653e-05, + "loss": 0.5177, + "step": 20750 + }, + { + "epoch": 0.10731551276693443, + "grad_norm": 18812.10546875, + "learning_rate": 9.937880408940504e-05, + "loss": 0.5161, + "step": 20800 + }, + { + "epoch": 0.10757348274954727, + "grad_norm": 19163.4296875, + "learning_rate": 9.937305050539534e-05, + "loss": 0.5175, + "step": 20850 + }, + { + "epoch": 0.10783145273216009, + "grad_norm": 19459.3984375, + "learning_rate": 9.936727056700732e-05, + "loss": 0.5257, + "step": 20900 + }, + { + "epoch": 0.10808942271477291, + "grad_norm": 20272.22265625, + "learning_rate": 9.93614642773262e-05, + "loss": 0.5244, + "step": 20950 + }, + { + "epoch": 0.10834739269738573, + "grad_norm": 19995.736328125, + "learning_rate": 9.93556316394513e-05, + "loss": 0.5179, + "step": 21000 + }, + { + "epoch": 0.10860536267999855, + "grad_norm": 20567.369140625, + "learning_rate": 9.934977265649594e-05, + "loss": 0.528, + "step": 21050 + }, + { + "epoch": 0.10886333266261138, + "grad_norm": 19328.57421875, + "learning_rate": 9.934388733158753e-05, + "loss": 0.5249, + "step": 21100 + }, + { + "epoch": 0.1091213026452242, + "grad_norm": 17305.19921875, + "learning_rate": 9.933797566786757e-05, + "loss": 0.5163, + "step": 21150 + }, + { + "epoch": 0.10937927262783702, + "grad_norm": 19983.99609375, + "learning_rate": 9.933203766849155e-05, + "loss": 0.5227, + "step": 21200 + }, + { + "epoch": 0.10963724261044985, + "grad_norm": 18918.16015625, + "learning_rate": 9.93260733366291e-05, + "loss": 0.521, + "step": 21250 + }, + { + "epoch": 0.10989521259306267, + "grad_norm": 19260.40625, + "learning_rate": 9.932008267546384e-05, + "loss": 0.5195, + "step": 21300 + }, + { + "epoch": 0.1101531825756755, + "grad_norm": 16713.015625, + "learning_rate": 9.931406568819348e-05, + "loss": 0.5187, + "step": 21350 + }, + { + "epoch": 0.11041115255828832, + "grad_norm": 19787.67578125, + "learning_rate": 9.930802237802976e-05, + "loss": 0.5152, + "step": 21400 + }, + { + "epoch": 0.11066912254090114, + "grad_norm": 20632.775390625, + "learning_rate": 9.93019527481985e-05, + "loss": 0.5158, + "step": 21450 + }, + { + "epoch": 0.11092709252351396, + "grad_norm": 18545.748046875, + "learning_rate": 9.929585680193951e-05, + "loss": 0.5161, + "step": 21500 + }, + { + "epoch": 0.11118506250612678, + "grad_norm": 18961.138671875, + "learning_rate": 9.928973454250674e-05, + "loss": 0.5192, + "step": 21550 + }, + { + "epoch": 0.11144303248873962, + "grad_norm": 18970.013671875, + "learning_rate": 9.928358597316812e-05, + "loss": 0.5211, + "step": 21600 + }, + { + "epoch": 0.11170100247135244, + "grad_norm": 20800.046875, + "learning_rate": 9.927741109720561e-05, + "loss": 0.5143, + "step": 21650 + }, + { + "epoch": 0.11195897245396526, + "grad_norm": 18738.564453125, + "learning_rate": 9.927120991791528e-05, + "loss": 0.5232, + "step": 21700 + }, + { + "epoch": 0.11221694243657808, + "grad_norm": 18495.798828125, + "learning_rate": 9.926498243860715e-05, + "loss": 0.5176, + "step": 21750 + }, + { + "epoch": 0.1124749124191909, + "grad_norm": 18129.375, + "learning_rate": 9.925872866260537e-05, + "loss": 0.5132, + "step": 21800 + }, + { + "epoch": 0.11273288240180372, + "grad_norm": 19332.751953125, + "learning_rate": 9.925244859324807e-05, + "loss": 0.5135, + "step": 21850 + }, + { + "epoch": 0.11299085238441654, + "grad_norm": 19395.544921875, + "learning_rate": 9.924614223388742e-05, + "loss": 0.5191, + "step": 21900 + }, + { + "epoch": 0.11324882236702938, + "grad_norm": 20292.890625, + "learning_rate": 9.923980958788964e-05, + "loss": 0.5212, + "step": 21950 + }, + { + "epoch": 0.1135067923496422, + "grad_norm": 20309.033203125, + "learning_rate": 9.923345065863498e-05, + "loss": 0.5134, + "step": 22000 + }, + { + "epoch": 0.11376476233225502, + "grad_norm": 17513.578125, + "learning_rate": 9.922706544951772e-05, + "loss": 0.5216, + "step": 22050 + }, + { + "epoch": 0.11402273231486784, + "grad_norm": 18886.10546875, + "learning_rate": 9.922065396394614e-05, + "loss": 0.5219, + "step": 22100 + }, + { + "epoch": 0.11428070229748066, + "grad_norm": 19656.1484375, + "learning_rate": 9.921421620534257e-05, + "loss": 0.5163, + "step": 22150 + }, + { + "epoch": 0.11453867228009348, + "grad_norm": 18463.068359375, + "learning_rate": 9.920775217714338e-05, + "loss": 0.5198, + "step": 22200 + }, + { + "epoch": 0.1147966422627063, + "grad_norm": 20666.400390625, + "learning_rate": 9.920126188279892e-05, + "loss": 0.5164, + "step": 22250 + }, + { + "epoch": 0.11505461224531914, + "grad_norm": 20401.681640625, + "learning_rate": 9.919474532577359e-05, + "loss": 0.5163, + "step": 22300 + }, + { + "epoch": 0.11531258222793196, + "grad_norm": 21289.541015625, + "learning_rate": 9.918820250954581e-05, + "loss": 0.5114, + "step": 22350 + }, + { + "epoch": 0.11557055221054478, + "grad_norm": 17559.50390625, + "learning_rate": 9.918163343760801e-05, + "loss": 0.5156, + "step": 22400 + }, + { + "epoch": 0.1158285221931576, + "grad_norm": 17041.087890625, + "learning_rate": 9.917503811346662e-05, + "loss": 0.5146, + "step": 22450 + }, + { + "epoch": 0.11608649217577043, + "grad_norm": 20508.087890625, + "learning_rate": 9.916841654064212e-05, + "loss": 0.5202, + "step": 22500 + }, + { + "epoch": 0.11634446215838325, + "grad_norm": 21307.646484375, + "learning_rate": 9.916176872266894e-05, + "loss": 0.5108, + "step": 22550 + }, + { + "epoch": 0.11660243214099607, + "grad_norm": 21765.580078125, + "learning_rate": 9.91550946630956e-05, + "loss": 0.5158, + "step": 22600 + }, + { + "epoch": 0.1168604021236089, + "grad_norm": 18173.646484375, + "learning_rate": 9.914839436548454e-05, + "loss": 0.5081, + "step": 22650 + }, + { + "epoch": 0.11711837210622172, + "grad_norm": 19044.880859375, + "learning_rate": 9.914166783341227e-05, + "loss": 0.5144, + "step": 22700 + }, + { + "epoch": 0.11737634208883455, + "grad_norm": 19291.37109375, + "learning_rate": 9.91349150704693e-05, + "loss": 0.5147, + "step": 22750 + }, + { + "epoch": 0.11763431207144737, + "grad_norm": 16757.376953125, + "learning_rate": 9.91281360802601e-05, + "loss": 0.5163, + "step": 22800 + }, + { + "epoch": 0.11789228205406019, + "grad_norm": 18870.287109375, + "learning_rate": 9.912133086640318e-05, + "loss": 0.512, + "step": 22850 + }, + { + "epoch": 0.11815025203667301, + "grad_norm": 20520.115234375, + "learning_rate": 9.911449943253102e-05, + "loss": 0.5175, + "step": 22900 + }, + { + "epoch": 0.11840822201928583, + "grad_norm": 20585.21484375, + "learning_rate": 9.910764178229011e-05, + "loss": 0.5114, + "step": 22950 + }, + { + "epoch": 0.11866619200189867, + "grad_norm": 18660.384765625, + "learning_rate": 9.910075791934092e-05, + "loss": 0.5115, + "step": 23000 + }, + { + "epoch": 0.11892416198451149, + "grad_norm": 19391.318359375, + "learning_rate": 9.909384784735794e-05, + "loss": 0.5198, + "step": 23050 + }, + { + "epoch": 0.11918213196712431, + "grad_norm": 18007.306640625, + "learning_rate": 9.908691157002962e-05, + "loss": 0.5125, + "step": 23100 + }, + { + "epoch": 0.11944010194973713, + "grad_norm": 20804.501953125, + "learning_rate": 9.907994909105842e-05, + "loss": 0.516, + "step": 23150 + }, + { + "epoch": 0.11969807193234995, + "grad_norm": 18307.63671875, + "learning_rate": 9.907296041416076e-05, + "loss": 0.5108, + "step": 23200 + }, + { + "epoch": 0.11995604191496277, + "grad_norm": 19694.552734375, + "learning_rate": 9.906594554306709e-05, + "loss": 0.5092, + "step": 23250 + }, + { + "epoch": 0.12021401189757559, + "grad_norm": 20234.0703125, + "learning_rate": 9.90589044815218e-05, + "loss": 0.515, + "step": 23300 + }, + { + "epoch": 0.12047198188018843, + "grad_norm": 18483.4296875, + "learning_rate": 9.905183723328327e-05, + "loss": 0.5127, + "step": 23350 + }, + { + "epoch": 0.12072995186280125, + "grad_norm": 17447.51953125, + "learning_rate": 9.904474380212384e-05, + "loss": 0.5107, + "step": 23400 + }, + { + "epoch": 0.12098792184541407, + "grad_norm": 18881.7109375, + "learning_rate": 9.903762419182986e-05, + "loss": 0.5177, + "step": 23450 + }, + { + "epoch": 0.12124589182802689, + "grad_norm": 17861.990234375, + "learning_rate": 9.903047840620168e-05, + "loss": 0.5128, + "step": 23500 + }, + { + "epoch": 0.12150386181063971, + "grad_norm": 19111.53515625, + "learning_rate": 9.902330644905351e-05, + "loss": 0.5134, + "step": 23550 + }, + { + "epoch": 0.12176183179325253, + "grad_norm": 18461.107421875, + "learning_rate": 9.901610832421366e-05, + "loss": 0.51, + "step": 23600 + }, + { + "epoch": 0.12201980177586536, + "grad_norm": 18103.701171875, + "learning_rate": 9.900888403552431e-05, + "loss": 0.5131, + "step": 23650 + }, + { + "epoch": 0.12227777175847819, + "grad_norm": 18334.755859375, + "learning_rate": 9.900163358684168e-05, + "loss": 0.511, + "step": 23700 + }, + { + "epoch": 0.12253574174109101, + "grad_norm": 17476.322265625, + "learning_rate": 9.89943569820359e-05, + "loss": 0.5151, + "step": 23750 + }, + { + "epoch": 0.12279371172370383, + "grad_norm": 18698.09765625, + "learning_rate": 9.898705422499107e-05, + "loss": 0.5146, + "step": 23800 + }, + { + "epoch": 0.12305168170631665, + "grad_norm": 18321.80859375, + "learning_rate": 9.897972531960528e-05, + "loss": 0.5109, + "step": 23850 + }, + { + "epoch": 0.12330965168892948, + "grad_norm": 18234.361328125, + "learning_rate": 9.897237026979056e-05, + "loss": 0.5115, + "step": 23900 + }, + { + "epoch": 0.1235676216715423, + "grad_norm": 19737.849609375, + "learning_rate": 9.896498907947287e-05, + "loss": 0.5155, + "step": 23950 + }, + { + "epoch": 0.12382559165415512, + "grad_norm": 19136.279296875, + "learning_rate": 9.895758175259218e-05, + "loss": 0.5162, + "step": 24000 + }, + { + "epoch": 0.12408356163676794, + "grad_norm": 18575.431640625, + "learning_rate": 9.895014829310235e-05, + "loss": 0.5141, + "step": 24050 + }, + { + "epoch": 0.12434153161938077, + "grad_norm": 17589.353515625, + "learning_rate": 9.894268870497121e-05, + "loss": 0.501, + "step": 24100 + }, + { + "epoch": 0.1245995016019936, + "grad_norm": 19781.830078125, + "learning_rate": 9.893520299218057e-05, + "loss": 0.5128, + "step": 24150 + }, + { + "epoch": 0.12485747158460642, + "grad_norm": 17501.150390625, + "learning_rate": 9.892769115872617e-05, + "loss": 0.5113, + "step": 24200 + }, + { + "epoch": 0.12511544156721924, + "grad_norm": 21107.34375, + "learning_rate": 9.892015320861762e-05, + "loss": 0.5041, + "step": 24250 + }, + { + "epoch": 0.12537341154983206, + "grad_norm": 17529.345703125, + "learning_rate": 9.89125891458786e-05, + "loss": 0.5093, + "step": 24300 + }, + { + "epoch": 0.12563138153244488, + "grad_norm": 18061.890625, + "learning_rate": 9.890499897454663e-05, + "loss": 0.5111, + "step": 24350 + }, + { + "epoch": 0.1258893515150577, + "grad_norm": 21213.177734375, + "learning_rate": 9.889738269867318e-05, + "loss": 0.5106, + "step": 24400 + }, + { + "epoch": 0.12614732149767052, + "grad_norm": 17838.625, + "learning_rate": 9.88897403223237e-05, + "loss": 0.5144, + "step": 24450 + }, + { + "epoch": 0.12640529148028334, + "grad_norm": 19047.787109375, + "learning_rate": 9.888207184957752e-05, + "loss": 0.5133, + "step": 24500 + }, + { + "epoch": 0.12666326146289617, + "grad_norm": 17355.26171875, + "learning_rate": 9.887437728452794e-05, + "loss": 0.5054, + "step": 24550 + }, + { + "epoch": 0.12692123144550901, + "grad_norm": 20496.369140625, + "learning_rate": 9.886665663128216e-05, + "loss": 0.51, + "step": 24600 + }, + { + "epoch": 0.12717920142812184, + "grad_norm": 19887.734375, + "learning_rate": 9.885890989396133e-05, + "loss": 0.5049, + "step": 24650 + }, + { + "epoch": 0.12743717141073466, + "grad_norm": 20027.69140625, + "learning_rate": 9.885113707670049e-05, + "loss": 0.5118, + "step": 24700 + }, + { + "epoch": 0.12769514139334748, + "grad_norm": 18888.92578125, + "learning_rate": 9.884333818364861e-05, + "loss": 0.5168, + "step": 24750 + }, + { + "epoch": 0.1279531113759603, + "grad_norm": 20906.673828125, + "learning_rate": 9.883551321896862e-05, + "loss": 0.5109, + "step": 24800 + }, + { + "epoch": 0.12821108135857312, + "grad_norm": 20228.833984375, + "learning_rate": 9.882766218683731e-05, + "loss": 0.5167, + "step": 24850 + }, + { + "epoch": 0.12846905134118594, + "grad_norm": 19832.4609375, + "learning_rate": 9.881978509144543e-05, + "loss": 0.5113, + "step": 24900 + }, + { + "epoch": 0.12872702132379876, + "grad_norm": 18049.193359375, + "learning_rate": 9.881188193699758e-05, + "loss": 0.5121, + "step": 24950 + }, + { + "epoch": 0.12898499130641158, + "grad_norm": 18765.033203125, + "learning_rate": 9.880395272771236e-05, + "loss": 0.5123, + "step": 25000 + }, + { + "epoch": 0.12898499130641158, + "eval_loss": 0.5013377666473389, + "eval_runtime": 3332.4061, + "eval_samples_per_second": 930.595, + "eval_steps_per_second": 1.818, + "step": 25000 + }, + { + "epoch": 0.1292429612890244, + "grad_norm": 18435.787109375, + "learning_rate": 9.879599746782221e-05, + "loss": 0.5096, + "step": 25050 + }, + { + "epoch": 0.12950093127163723, + "grad_norm": 18993.890625, + "learning_rate": 9.878801616157348e-05, + "loss": 0.5091, + "step": 25100 + }, + { + "epoch": 0.12975890125425005, + "grad_norm": 19766.783203125, + "learning_rate": 9.878000881322646e-05, + "loss": 0.5059, + "step": 25150 + }, + { + "epoch": 0.13001687123686287, + "grad_norm": 19316.537109375, + "learning_rate": 9.87719754270553e-05, + "loss": 0.5112, + "step": 25200 + }, + { + "epoch": 0.1302748412194757, + "grad_norm": 19288.64453125, + "learning_rate": 9.876391600734807e-05, + "loss": 0.5031, + "step": 25250 + }, + { + "epoch": 0.1305328112020885, + "grad_norm": 18962.7734375, + "learning_rate": 9.875583055840673e-05, + "loss": 0.5113, + "step": 25300 + }, + { + "epoch": 0.13079078118470136, + "grad_norm": 19399.21875, + "learning_rate": 9.874771908454714e-05, + "loss": 0.5177, + "step": 25350 + }, + { + "epoch": 0.13104875116731418, + "grad_norm": 20511.134765625, + "learning_rate": 9.873958159009904e-05, + "loss": 0.5049, + "step": 25400 + }, + { + "epoch": 0.131306721149927, + "grad_norm": 17669.00390625, + "learning_rate": 9.87314180794061e-05, + "loss": 0.5076, + "step": 25450 + }, + { + "epoch": 0.13156469113253982, + "grad_norm": 20254.75390625, + "learning_rate": 9.872322855682579e-05, + "loss": 0.5102, + "step": 25500 + }, + { + "epoch": 0.13182266111515265, + "grad_norm": 21859.880859375, + "learning_rate": 9.871501302672956e-05, + "loss": 0.5098, + "step": 25550 + }, + { + "epoch": 0.13208063109776547, + "grad_norm": 18794.90625, + "learning_rate": 9.870677149350268e-05, + "loss": 0.5078, + "step": 25600 + }, + { + "epoch": 0.1323386010803783, + "grad_norm": 19909.65625, + "learning_rate": 9.869850396154434e-05, + "loss": 0.5129, + "step": 25650 + }, + { + "epoch": 0.1325965710629911, + "grad_norm": 17887.99609375, + "learning_rate": 9.869021043526756e-05, + "loss": 0.508, + "step": 25700 + }, + { + "epoch": 0.13285454104560393, + "grad_norm": 17189.033203125, + "learning_rate": 9.868189091909929e-05, + "loss": 0.5114, + "step": 25750 + }, + { + "epoch": 0.13311251102821675, + "grad_norm": 21320.78125, + "learning_rate": 9.867354541748033e-05, + "loss": 0.5081, + "step": 25800 + }, + { + "epoch": 0.13337048101082957, + "grad_norm": 19035.33984375, + "learning_rate": 9.866517393486532e-05, + "loss": 0.5065, + "step": 25850 + }, + { + "epoch": 0.1336284509934424, + "grad_norm": 19038.876953125, + "learning_rate": 9.86567764757228e-05, + "loss": 0.5055, + "step": 25900 + }, + { + "epoch": 0.13388642097605521, + "grad_norm": 20425.6875, + "learning_rate": 9.86483530445352e-05, + "loss": 0.5091, + "step": 25950 + }, + { + "epoch": 0.13414439095866804, + "grad_norm": 19947.34765625, + "learning_rate": 9.863990364579876e-05, + "loss": 0.5062, + "step": 26000 + }, + { + "epoch": 0.13440236094128089, + "grad_norm": 18758.7890625, + "learning_rate": 9.863142828402361e-05, + "loss": 0.5099, + "step": 26050 + }, + { + "epoch": 0.1346603309238937, + "grad_norm": 18494.076171875, + "learning_rate": 9.862292696373372e-05, + "loss": 0.5043, + "step": 26100 + }, + { + "epoch": 0.13491830090650653, + "grad_norm": 19646.841796875, + "learning_rate": 9.861439968946696e-05, + "loss": 0.508, + "step": 26150 + }, + { + "epoch": 0.13517627088911935, + "grad_norm": 19356.009765625, + "learning_rate": 9.8605846465775e-05, + "loss": 0.5015, + "step": 26200 + }, + { + "epoch": 0.13543424087173217, + "grad_norm": 19243.1875, + "learning_rate": 9.859726729722341e-05, + "loss": 0.5086, + "step": 26250 + }, + { + "epoch": 0.135692210854345, + "grad_norm": 20116.43359375, + "learning_rate": 9.858866218839156e-05, + "loss": 0.5074, + "step": 26300 + }, + { + "epoch": 0.1359501808369578, + "grad_norm": 18592.1015625, + "learning_rate": 9.858003114387269e-05, + "loss": 0.5054, + "step": 26350 + }, + { + "epoch": 0.13620815081957063, + "grad_norm": 19552.505859375, + "learning_rate": 9.85713741682739e-05, + "loss": 0.5042, + "step": 26400 + }, + { + "epoch": 0.13646612080218345, + "grad_norm": 18818.142578125, + "learning_rate": 9.856269126621611e-05, + "loss": 0.5106, + "step": 26450 + }, + { + "epoch": 0.13672409078479628, + "grad_norm": 21973.685546875, + "learning_rate": 9.855398244233407e-05, + "loss": 0.5116, + "step": 26500 + }, + { + "epoch": 0.1369820607674091, + "grad_norm": 19296.7890625, + "learning_rate": 9.854524770127641e-05, + "loss": 0.5103, + "step": 26550 + }, + { + "epoch": 0.13724003075002192, + "grad_norm": 18975.22265625, + "learning_rate": 9.853648704770554e-05, + "loss": 0.5093, + "step": 26600 + }, + { + "epoch": 0.13749800073263474, + "grad_norm": 20003.19140625, + "learning_rate": 9.852770048629776e-05, + "loss": 0.5094, + "step": 26650 + }, + { + "epoch": 0.13775597071524756, + "grad_norm": 19885.341796875, + "learning_rate": 9.851888802174312e-05, + "loss": 0.502, + "step": 26700 + }, + { + "epoch": 0.1380139406978604, + "grad_norm": 18030.115234375, + "learning_rate": 9.851004965874557e-05, + "loss": 0.5045, + "step": 26750 + }, + { + "epoch": 0.13827191068047323, + "grad_norm": 19143.369140625, + "learning_rate": 9.850118540202286e-05, + "loss": 0.5068, + "step": 26800 + }, + { + "epoch": 0.13852988066308605, + "grad_norm": 18902.5390625, + "learning_rate": 9.849229525630656e-05, + "loss": 0.4984, + "step": 26850 + }, + { + "epoch": 0.13878785064569887, + "grad_norm": 18523.115234375, + "learning_rate": 9.848337922634206e-05, + "loss": 0.5099, + "step": 26900 + }, + { + "epoch": 0.1390458206283117, + "grad_norm": 19873.283203125, + "learning_rate": 9.847443731688852e-05, + "loss": 0.5039, + "step": 26950 + }, + { + "epoch": 0.13930379061092452, + "grad_norm": 20202.23046875, + "learning_rate": 9.846546953271902e-05, + "loss": 0.507, + "step": 27000 + }, + { + "epoch": 0.13956176059353734, + "grad_norm": 17484.572265625, + "learning_rate": 9.845647587862034e-05, + "loss": 0.5113, + "step": 27050 + }, + { + "epoch": 0.13981973057615016, + "grad_norm": 17931.634765625, + "learning_rate": 9.844745635939316e-05, + "loss": 0.5051, + "step": 27100 + }, + { + "epoch": 0.14007770055876298, + "grad_norm": 20536.693359375, + "learning_rate": 9.843841097985191e-05, + "loss": 0.5044, + "step": 27150 + }, + { + "epoch": 0.1403356705413758, + "grad_norm": 18379.619140625, + "learning_rate": 9.842933974482482e-05, + "loss": 0.5071, + "step": 27200 + }, + { + "epoch": 0.14059364052398862, + "grad_norm": 19097.240234375, + "learning_rate": 9.842024265915397e-05, + "loss": 0.5046, + "step": 27250 + }, + { + "epoch": 0.14085161050660144, + "grad_norm": 22569.80859375, + "learning_rate": 9.841111972769517e-05, + "loss": 0.5022, + "step": 27300 + }, + { + "epoch": 0.14110958048921426, + "grad_norm": 17499.166015625, + "learning_rate": 9.84019709553181e-05, + "loss": 0.5014, + "step": 27350 + }, + { + "epoch": 0.14136755047182709, + "grad_norm": 20447.25, + "learning_rate": 9.839279634690619e-05, + "loss": 0.5065, + "step": 27400 + }, + { + "epoch": 0.14162552045443993, + "grad_norm": 20977.70703125, + "learning_rate": 9.838359590735665e-05, + "loss": 0.5042, + "step": 27450 + }, + { + "epoch": 0.14188349043705276, + "grad_norm": 18168.962890625, + "learning_rate": 9.83743696415805e-05, + "loss": 0.5043, + "step": 27500 + }, + { + "epoch": 0.14214146041966558, + "grad_norm": 18671.841796875, + "learning_rate": 9.836511755450256e-05, + "loss": 0.5054, + "step": 27550 + }, + { + "epoch": 0.1423994304022784, + "grad_norm": 17737.90625, + "learning_rate": 9.835583965106141e-05, + "loss": 0.507, + "step": 27600 + }, + { + "epoch": 0.14265740038489122, + "grad_norm": 23218.873046875, + "learning_rate": 9.834653593620939e-05, + "loss": 0.5055, + "step": 27650 + }, + { + "epoch": 0.14291537036750404, + "grad_norm": 20013.341796875, + "learning_rate": 9.833720641491269e-05, + "loss": 0.5008, + "step": 27700 + }, + { + "epoch": 0.14317334035011686, + "grad_norm": 21755.08203125, + "learning_rate": 9.832785109215119e-05, + "loss": 0.5029, + "step": 27750 + }, + { + "epoch": 0.14343131033272968, + "grad_norm": 18450.541015625, + "learning_rate": 9.831846997291859e-05, + "loss": 0.5086, + "step": 27800 + }, + { + "epoch": 0.1436892803153425, + "grad_norm": 17578.990234375, + "learning_rate": 9.830906306222235e-05, + "loss": 0.498, + "step": 27850 + }, + { + "epoch": 0.14394725029795533, + "grad_norm": 18771.2578125, + "learning_rate": 9.82996303650837e-05, + "loss": 0.5006, + "step": 27900 + }, + { + "epoch": 0.14420522028056815, + "grad_norm": 19841.912109375, + "learning_rate": 9.829017188653763e-05, + "loss": 0.5003, + "step": 27950 + }, + { + "epoch": 0.14446319026318097, + "grad_norm": 19089.384765625, + "learning_rate": 9.82806876316329e-05, + "loss": 0.5028, + "step": 28000 + }, + { + "epoch": 0.1447211602457938, + "grad_norm": 17971.998046875, + "learning_rate": 9.827117760543198e-05, + "loss": 0.5103, + "step": 28050 + }, + { + "epoch": 0.1449791302284066, + "grad_norm": 19590.46875, + "learning_rate": 9.826164181301121e-05, + "loss": 0.5075, + "step": 28100 + }, + { + "epoch": 0.14523710021101943, + "grad_norm": 19316.150390625, + "learning_rate": 9.825208025946056e-05, + "loss": 0.4971, + "step": 28150 + }, + { + "epoch": 0.14549507019363228, + "grad_norm": 19814.3125, + "learning_rate": 9.82424929498838e-05, + "loss": 0.501, + "step": 28200 + }, + { + "epoch": 0.1457530401762451, + "grad_norm": 18669.203125, + "learning_rate": 9.823287988939847e-05, + "loss": 0.5027, + "step": 28250 + }, + { + "epoch": 0.14601101015885792, + "grad_norm": 20375.48828125, + "learning_rate": 9.822324108313585e-05, + "loss": 0.4966, + "step": 28300 + }, + { + "epoch": 0.14626898014147074, + "grad_norm": 19665.4296875, + "learning_rate": 9.82135765362409e-05, + "loss": 0.4966, + "step": 28350 + }, + { + "epoch": 0.14652695012408357, + "grad_norm": 19579.771484375, + "learning_rate": 9.820388625387242e-05, + "loss": 0.5028, + "step": 28400 + }, + { + "epoch": 0.1467849201066964, + "grad_norm": 20270.564453125, + "learning_rate": 9.819417024120285e-05, + "loss": 0.4972, + "step": 28450 + }, + { + "epoch": 0.1470428900893092, + "grad_norm": 20025.6328125, + "learning_rate": 9.818442850341845e-05, + "loss": 0.5082, + "step": 28500 + }, + { + "epoch": 0.14730086007192203, + "grad_norm": 19062.525390625, + "learning_rate": 9.817466104571915e-05, + "loss": 0.4983, + "step": 28550 + }, + { + "epoch": 0.14755883005453485, + "grad_norm": 18558.0390625, + "learning_rate": 9.816486787331862e-05, + "loss": 0.5004, + "step": 28600 + }, + { + "epoch": 0.14781680003714767, + "grad_norm": 20880.6875, + "learning_rate": 9.815504899144428e-05, + "loss": 0.5036, + "step": 28650 + }, + { + "epoch": 0.1480747700197605, + "grad_norm": 19120.3359375, + "learning_rate": 9.814520440533726e-05, + "loss": 0.5004, + "step": 28700 + }, + { + "epoch": 0.14833274000237331, + "grad_norm": 17185.451171875, + "learning_rate": 9.813533412025242e-05, + "loss": 0.5047, + "step": 28750 + }, + { + "epoch": 0.14859070998498614, + "grad_norm": 21795.697265625, + "learning_rate": 9.81254381414583e-05, + "loss": 0.5033, + "step": 28800 + }, + { + "epoch": 0.14884867996759896, + "grad_norm": 21923.44140625, + "learning_rate": 9.811551647423718e-05, + "loss": 0.4957, + "step": 28850 + }, + { + "epoch": 0.1491066499502118, + "grad_norm": 18988.30078125, + "learning_rate": 9.810556912388509e-05, + "loss": 0.4979, + "step": 28900 + }, + { + "epoch": 0.14936461993282463, + "grad_norm": 18479.74609375, + "learning_rate": 9.809559609571169e-05, + "loss": 0.5003, + "step": 28950 + }, + { + "epoch": 0.14962258991543745, + "grad_norm": 20426.57421875, + "learning_rate": 9.808559739504043e-05, + "loss": 0.5019, + "step": 29000 + }, + { + "epoch": 0.14988055989805027, + "grad_norm": 20044.365234375, + "learning_rate": 9.80755730272084e-05, + "loss": 0.5012, + "step": 29050 + }, + { + "epoch": 0.1501385298806631, + "grad_norm": 18321.439453125, + "learning_rate": 9.806552299756641e-05, + "loss": 0.4918, + "step": 29100 + }, + { + "epoch": 0.1503964998632759, + "grad_norm": 20315.681640625, + "learning_rate": 9.805544731147899e-05, + "loss": 0.5015, + "step": 29150 + }, + { + "epoch": 0.15065446984588873, + "grad_norm": 20399.990234375, + "learning_rate": 9.804534597432432e-05, + "loss": 0.4967, + "step": 29200 + }, + { + "epoch": 0.15091243982850155, + "grad_norm": 19539.91796875, + "learning_rate": 9.803521899149432e-05, + "loss": 0.5002, + "step": 29250 + }, + { + "epoch": 0.15117040981111438, + "grad_norm": 20317.970703125, + "learning_rate": 9.802506636839457e-05, + "loss": 0.4988, + "step": 29300 + }, + { + "epoch": 0.1514283797937272, + "grad_norm": 18728.32421875, + "learning_rate": 9.801488811044434e-05, + "loss": 0.5007, + "step": 29350 + }, + { + "epoch": 0.15168634977634002, + "grad_norm": 21256.51171875, + "learning_rate": 9.80046842230766e-05, + "loss": 0.5066, + "step": 29400 + }, + { + "epoch": 0.15194431975895284, + "grad_norm": 18871.8828125, + "learning_rate": 9.799445471173799e-05, + "loss": 0.502, + "step": 29450 + }, + { + "epoch": 0.15220228974156566, + "grad_norm": 18434.251953125, + "learning_rate": 9.798419958188878e-05, + "loss": 0.5018, + "step": 29500 + }, + { + "epoch": 0.15246025972417848, + "grad_norm": 18562.412109375, + "learning_rate": 9.7973918839003e-05, + "loss": 0.4978, + "step": 29550 + }, + { + "epoch": 0.15271822970679133, + "grad_norm": 20020.7890625, + "learning_rate": 9.796361248856832e-05, + "loss": 0.4989, + "step": 29600 + }, + { + "epoch": 0.15297619968940415, + "grad_norm": 20026.6015625, + "learning_rate": 9.795328053608606e-05, + "loss": 0.5002, + "step": 29650 + }, + { + "epoch": 0.15323416967201697, + "grad_norm": 20098.703125, + "learning_rate": 9.794292298707119e-05, + "loss": 0.4938, + "step": 29700 + }, + { + "epoch": 0.1534921396546298, + "grad_norm": 18960.154296875, + "learning_rate": 9.793253984705239e-05, + "loss": 0.4956, + "step": 29750 + }, + { + "epoch": 0.15375010963724262, + "grad_norm": 20478.669921875, + "learning_rate": 9.7922131121572e-05, + "loss": 0.4998, + "step": 29800 + }, + { + "epoch": 0.15400807961985544, + "grad_norm": 20406.701171875, + "learning_rate": 9.791169681618596e-05, + "loss": 0.5083, + "step": 29850 + }, + { + "epoch": 0.15426604960246826, + "grad_norm": 17598.75390625, + "learning_rate": 9.790123693646391e-05, + "loss": 0.4968, + "step": 29900 + }, + { + "epoch": 0.15452401958508108, + "grad_norm": 19622.521484375, + "learning_rate": 9.789075148798915e-05, + "loss": 0.4881, + "step": 29950 + }, + { + "epoch": 0.1547819895676939, + "grad_norm": 20092.87109375, + "learning_rate": 9.78802404763586e-05, + "loss": 0.4994, + "step": 30000 + }, + { + "epoch": 0.1547819895676939, + "eval_loss": 0.4904574453830719, + "eval_runtime": 3267.2095, + "eval_samples_per_second": 949.165, + "eval_steps_per_second": 1.854, + "step": 30000 + }, + { + "epoch": 0.15503995955030672, + "grad_norm": 19136.84765625, + "learning_rate": 9.786970390718282e-05, + "loss": 0.4963, + "step": 30050 + }, + { + "epoch": 0.15529792953291954, + "grad_norm": 20464.998046875, + "learning_rate": 9.785914178608603e-05, + "loss": 0.4994, + "step": 30100 + }, + { + "epoch": 0.15555589951553236, + "grad_norm": 23388.55859375, + "learning_rate": 9.784855411870611e-05, + "loss": 0.5036, + "step": 30150 + }, + { + "epoch": 0.15581386949814519, + "grad_norm": 20002.095703125, + "learning_rate": 9.783794091069451e-05, + "loss": 0.5021, + "step": 30200 + }, + { + "epoch": 0.156071839480758, + "grad_norm": 19565.419921875, + "learning_rate": 9.782730216771641e-05, + "loss": 0.4929, + "step": 30250 + }, + { + "epoch": 0.15632980946337086, + "grad_norm": 20284.173828125, + "learning_rate": 9.781663789545052e-05, + "loss": 0.4889, + "step": 30300 + }, + { + "epoch": 0.15658777944598368, + "grad_norm": 18613.439453125, + "learning_rate": 9.780594809958922e-05, + "loss": 0.496, + "step": 30350 + }, + { + "epoch": 0.1568457494285965, + "grad_norm": 19419.1640625, + "learning_rate": 9.779523278583855e-05, + "loss": 0.4977, + "step": 30400 + }, + { + "epoch": 0.15710371941120932, + "grad_norm": 21695.361328125, + "learning_rate": 9.778449195991813e-05, + "loss": 0.4998, + "step": 30450 + }, + { + "epoch": 0.15736168939382214, + "grad_norm": 21914.3828125, + "learning_rate": 9.777372562756117e-05, + "loss": 0.4936, + "step": 30500 + }, + { + "epoch": 0.15761965937643496, + "grad_norm": 22384.525390625, + "learning_rate": 9.776293379451458e-05, + "loss": 0.5034, + "step": 30550 + }, + { + "epoch": 0.15787762935904778, + "grad_norm": 21174.220703125, + "learning_rate": 9.775211646653879e-05, + "loss": 0.4928, + "step": 30600 + }, + { + "epoch": 0.1581355993416606, + "grad_norm": 19809.953125, + "learning_rate": 9.77412736494079e-05, + "loss": 0.5014, + "step": 30650 + }, + { + "epoch": 0.15839356932427343, + "grad_norm": 19657.048828125, + "learning_rate": 9.773040534890958e-05, + "loss": 0.5022, + "step": 30700 + }, + { + "epoch": 0.15865153930688625, + "grad_norm": 20559.490234375, + "learning_rate": 9.771951157084514e-05, + "loss": 0.4923, + "step": 30750 + }, + { + "epoch": 0.15890950928949907, + "grad_norm": 19473.294921875, + "learning_rate": 9.770859232102946e-05, + "loss": 0.4991, + "step": 30800 + }, + { + "epoch": 0.1591674792721119, + "grad_norm": 19243.509765625, + "learning_rate": 9.769764760529102e-05, + "loss": 0.4934, + "step": 30850 + }, + { + "epoch": 0.1594254492547247, + "grad_norm": 20882.853515625, + "learning_rate": 9.768667742947189e-05, + "loss": 0.4989, + "step": 30900 + }, + { + "epoch": 0.15968341923733753, + "grad_norm": 19654.17578125, + "learning_rate": 9.767568179942776e-05, + "loss": 0.501, + "step": 30950 + }, + { + "epoch": 0.15994138921995035, + "grad_norm": 20069.412109375, + "learning_rate": 9.766466072102786e-05, + "loss": 0.5001, + "step": 31000 + }, + { + "epoch": 0.1601993592025632, + "grad_norm": 19730.416015625, + "learning_rate": 9.765361420015506e-05, + "loss": 0.4947, + "step": 31050 + }, + { + "epoch": 0.16045732918517602, + "grad_norm": 19825.43359375, + "learning_rate": 9.764254224270573e-05, + "loss": 0.5012, + "step": 31100 + }, + { + "epoch": 0.16071529916778884, + "grad_norm": 19111.859375, + "learning_rate": 9.763144485458992e-05, + "loss": 0.4946, + "step": 31150 + }, + { + "epoch": 0.16097326915040167, + "grad_norm": 20071.7578125, + "learning_rate": 9.762032204173116e-05, + "loss": 0.4961, + "step": 31200 + }, + { + "epoch": 0.1612312391330145, + "grad_norm": 18780.638671875, + "learning_rate": 9.76091738100666e-05, + "loss": 0.4952, + "step": 31250 + }, + { + "epoch": 0.1614892091156273, + "grad_norm": 20192.69140625, + "learning_rate": 9.759800016554699e-05, + "loss": 0.4919, + "step": 31300 + }, + { + "epoch": 0.16174717909824013, + "grad_norm": 18430.57421875, + "learning_rate": 9.758680111413653e-05, + "loss": 0.4953, + "step": 31350 + }, + { + "epoch": 0.16200514908085295, + "grad_norm": 18921.740234375, + "learning_rate": 9.757557666181314e-05, + "loss": 0.5013, + "step": 31400 + }, + { + "epoch": 0.16226311906346577, + "grad_norm": 18918.857421875, + "learning_rate": 9.756432681456815e-05, + "loss": 0.4976, + "step": 31450 + }, + { + "epoch": 0.1625210890460786, + "grad_norm": 21373.814453125, + "learning_rate": 9.755305157840655e-05, + "loss": 0.4975, + "step": 31500 + }, + { + "epoch": 0.16277905902869141, + "grad_norm": 19509.482421875, + "learning_rate": 9.754175095934684e-05, + "loss": 0.4966, + "step": 31550 + }, + { + "epoch": 0.16303702901130424, + "grad_norm": 18362.125, + "learning_rate": 9.753042496342103e-05, + "loss": 0.505, + "step": 31600 + }, + { + "epoch": 0.16329499899391706, + "grad_norm": 20344.11328125, + "learning_rate": 9.751907359667476e-05, + "loss": 0.4988, + "step": 31650 + }, + { + "epoch": 0.16355296897652988, + "grad_norm": 21398.97265625, + "learning_rate": 9.750769686516715e-05, + "loss": 0.493, + "step": 31700 + }, + { + "epoch": 0.16381093895914273, + "grad_norm": 21106.955078125, + "learning_rate": 9.74962947749709e-05, + "loss": 0.4999, + "step": 31750 + }, + { + "epoch": 0.16406890894175555, + "grad_norm": 19787.216796875, + "learning_rate": 9.74848673321722e-05, + "loss": 0.4932, + "step": 31800 + }, + { + "epoch": 0.16432687892436837, + "grad_norm": 19198.83984375, + "learning_rate": 9.747341454287082e-05, + "loss": 0.4919, + "step": 31850 + }, + { + "epoch": 0.1645848489069812, + "grad_norm": 18460.92578125, + "learning_rate": 9.746193641318002e-05, + "loss": 0.495, + "step": 31900 + }, + { + "epoch": 0.164842818889594, + "grad_norm": 18591.427734375, + "learning_rate": 9.74504329492266e-05, + "loss": 0.4888, + "step": 31950 + }, + { + "epoch": 0.16510078887220683, + "grad_norm": 21651.3515625, + "learning_rate": 9.743890415715091e-05, + "loss": 0.4909, + "step": 32000 + }, + { + "epoch": 0.16535875885481965, + "grad_norm": 18884.486328125, + "learning_rate": 9.742735004310677e-05, + "loss": 0.4981, + "step": 32050 + }, + { + "epoch": 0.16561672883743248, + "grad_norm": 19223.658203125, + "learning_rate": 9.741577061326157e-05, + "loss": 0.4961, + "step": 32100 + }, + { + "epoch": 0.1658746988200453, + "grad_norm": 18266.560546875, + "learning_rate": 9.740416587379615e-05, + "loss": 0.4914, + "step": 32150 + }, + { + "epoch": 0.16613266880265812, + "grad_norm": 19871.509765625, + "learning_rate": 9.739253583090493e-05, + "loss": 0.499, + "step": 32200 + }, + { + "epoch": 0.16639063878527094, + "grad_norm": 19524.298828125, + "learning_rate": 9.738088049079577e-05, + "loss": 0.4944, + "step": 32250 + }, + { + "epoch": 0.16664860876788376, + "grad_norm": 20308.685546875, + "learning_rate": 9.73691998596901e-05, + "loss": 0.4941, + "step": 32300 + }, + { + "epoch": 0.16690657875049658, + "grad_norm": 19125.52734375, + "learning_rate": 9.735749394382278e-05, + "loss": 0.4968, + "step": 32350 + }, + { + "epoch": 0.1671645487331094, + "grad_norm": 18792.716796875, + "learning_rate": 9.734576274944223e-05, + "loss": 0.4959, + "step": 32400 + }, + { + "epoch": 0.16742251871572225, + "grad_norm": 18521.54296875, + "learning_rate": 9.73340062828103e-05, + "loss": 0.4913, + "step": 32450 + }, + { + "epoch": 0.16768048869833507, + "grad_norm": 19540.41796875, + "learning_rate": 9.732222455020241e-05, + "loss": 0.4999, + "step": 32500 + }, + { + "epoch": 0.1679384586809479, + "grad_norm": 18682.84375, + "learning_rate": 9.73104175579074e-05, + "loss": 0.4991, + "step": 32550 + }, + { + "epoch": 0.16819642866356072, + "grad_norm": 20134.8984375, + "learning_rate": 9.72985853122276e-05, + "loss": 0.4839, + "step": 32600 + }, + { + "epoch": 0.16845439864617354, + "grad_norm": 20375.1484375, + "learning_rate": 9.728672781947883e-05, + "loss": 0.4941, + "step": 32650 + }, + { + "epoch": 0.16871236862878636, + "grad_norm": 19720.98046875, + "learning_rate": 9.727484508599042e-05, + "loss": 0.4856, + "step": 32700 + }, + { + "epoch": 0.16897033861139918, + "grad_norm": 19408.7734375, + "learning_rate": 9.726293711810513e-05, + "loss": 0.4942, + "step": 32750 + }, + { + "epoch": 0.169228308594012, + "grad_norm": 20136.892578125, + "learning_rate": 9.725100392217919e-05, + "loss": 0.4942, + "step": 32800 + }, + { + "epoch": 0.16948627857662482, + "grad_norm": 20555.27734375, + "learning_rate": 9.723904550458232e-05, + "loss": 0.4907, + "step": 32850 + }, + { + "epoch": 0.16974424855923764, + "grad_norm": 18876.787109375, + "learning_rate": 9.722706187169769e-05, + "loss": 0.4951, + "step": 32900 + }, + { + "epoch": 0.17000221854185046, + "grad_norm": 19918.4296875, + "learning_rate": 9.721505302992194e-05, + "loss": 0.4871, + "step": 32950 + }, + { + "epoch": 0.17026018852446329, + "grad_norm": 18593.453125, + "learning_rate": 9.720301898566513e-05, + "loss": 0.4889, + "step": 33000 + }, + { + "epoch": 0.1705181585070761, + "grad_norm": 21007.5625, + "learning_rate": 9.719095974535084e-05, + "loss": 0.4936, + "step": 33050 + }, + { + "epoch": 0.17077612848968893, + "grad_norm": 21749.849609375, + "learning_rate": 9.717887531541601e-05, + "loss": 0.4915, + "step": 33100 + }, + { + "epoch": 0.17103409847230178, + "grad_norm": 19097.896484375, + "learning_rate": 9.716676570231114e-05, + "loss": 0.4857, + "step": 33150 + }, + { + "epoch": 0.1712920684549146, + "grad_norm": 18509.107421875, + "learning_rate": 9.715463091250003e-05, + "loss": 0.487, + "step": 33200 + }, + { + "epoch": 0.17155003843752742, + "grad_norm": 21414.916015625, + "learning_rate": 9.714247095246007e-05, + "loss": 0.4929, + "step": 33250 + }, + { + "epoch": 0.17180800842014024, + "grad_norm": 19836.978515625, + "learning_rate": 9.713028582868196e-05, + "loss": 0.4948, + "step": 33300 + }, + { + "epoch": 0.17206597840275306, + "grad_norm": 18013.787109375, + "learning_rate": 9.71180755476699e-05, + "loss": 0.4945, + "step": 33350 + }, + { + "epoch": 0.17232394838536588, + "grad_norm": 18498.1640625, + "learning_rate": 9.71058401159415e-05, + "loss": 0.4961, + "step": 33400 + }, + { + "epoch": 0.1725819183679787, + "grad_norm": 19871.404296875, + "learning_rate": 9.709357954002778e-05, + "loss": 0.4896, + "step": 33450 + }, + { + "epoch": 0.17283988835059153, + "grad_norm": 20794.05859375, + "learning_rate": 9.708129382647324e-05, + "loss": 0.4855, + "step": 33500 + }, + { + "epoch": 0.17309785833320435, + "grad_norm": 19775.6328125, + "learning_rate": 9.706898298183573e-05, + "loss": 0.4899, + "step": 33550 + }, + { + "epoch": 0.17335582831581717, + "grad_norm": 24329.740234375, + "learning_rate": 9.705664701268652e-05, + "loss": 0.4879, + "step": 33600 + }, + { + "epoch": 0.17361379829843, + "grad_norm": 19666.697265625, + "learning_rate": 9.704428592561037e-05, + "loss": 0.493, + "step": 33650 + }, + { + "epoch": 0.1738717682810428, + "grad_norm": 20382.115234375, + "learning_rate": 9.703189972720532e-05, + "loss": 0.4922, + "step": 33700 + }, + { + "epoch": 0.17412973826365563, + "grad_norm": 20240.46875, + "learning_rate": 9.701948842408293e-05, + "loss": 0.4908, + "step": 33750 + }, + { + "epoch": 0.17438770824626845, + "grad_norm": 18531.224609375, + "learning_rate": 9.700705202286811e-05, + "loss": 0.489, + "step": 33800 + }, + { + "epoch": 0.17464567822888127, + "grad_norm": 19121.0625, + "learning_rate": 9.699459053019912e-05, + "loss": 0.4884, + "step": 33850 + }, + { + "epoch": 0.17490364821149412, + "grad_norm": 19959.931640625, + "learning_rate": 9.698210395272773e-05, + "loss": 0.4912, + "step": 33900 + }, + { + "epoch": 0.17516161819410694, + "grad_norm": 18255.732421875, + "learning_rate": 9.696959229711901e-05, + "loss": 0.4888, + "step": 33950 + }, + { + "epoch": 0.17541958817671977, + "grad_norm": 21808.8671875, + "learning_rate": 9.695705557005142e-05, + "loss": 0.4945, + "step": 34000 + }, + { + "epoch": 0.1756775581593326, + "grad_norm": 18687.521484375, + "learning_rate": 9.694449377821685e-05, + "loss": 0.4891, + "step": 34050 + }, + { + "epoch": 0.1759355281419454, + "grad_norm": 18309.859375, + "learning_rate": 9.693190692832053e-05, + "loss": 0.4888, + "step": 34100 + }, + { + "epoch": 0.17619349812455823, + "grad_norm": 19453.705078125, + "learning_rate": 9.691929502708106e-05, + "loss": 0.4852, + "step": 34150 + }, + { + "epoch": 0.17645146810717105, + "grad_norm": 20964.595703125, + "learning_rate": 9.690665808123046e-05, + "loss": 0.4931, + "step": 34200 + }, + { + "epoch": 0.17670943808978387, + "grad_norm": 20170.5703125, + "learning_rate": 9.689399609751405e-05, + "loss": 0.4908, + "step": 34250 + }, + { + "epoch": 0.1769674080723967, + "grad_norm": 18276.19140625, + "learning_rate": 9.688130908269058e-05, + "loss": 0.4906, + "step": 34300 + }, + { + "epoch": 0.1772253780550095, + "grad_norm": 21062.56640625, + "learning_rate": 9.686859704353212e-05, + "loss": 0.4911, + "step": 34350 + }, + { + "epoch": 0.17748334803762233, + "grad_norm": 21678.6796875, + "learning_rate": 9.685585998682414e-05, + "loss": 0.4894, + "step": 34400 + }, + { + "epoch": 0.17774131802023516, + "grad_norm": 17795.384765625, + "learning_rate": 9.684309791936539e-05, + "loss": 0.4893, + "step": 34450 + }, + { + "epoch": 0.17799928800284798, + "grad_norm": 21536.837890625, + "learning_rate": 9.683031084796803e-05, + "loss": 0.4889, + "step": 34500 + }, + { + "epoch": 0.1782572579854608, + "grad_norm": 20554.423828125, + "learning_rate": 9.681749877945756e-05, + "loss": 0.4843, + "step": 34550 + }, + { + "epoch": 0.17851522796807365, + "grad_norm": 22045.376953125, + "learning_rate": 9.680466172067282e-05, + "loss": 0.4895, + "step": 34600 + }, + { + "epoch": 0.17877319795068647, + "grad_norm": 21406.853515625, + "learning_rate": 9.679179967846597e-05, + "loss": 0.4914, + "step": 34650 + }, + { + "epoch": 0.1790311679332993, + "grad_norm": 20971.037109375, + "learning_rate": 9.677891265970252e-05, + "loss": 0.485, + "step": 34700 + }, + { + "epoch": 0.1792891379159121, + "grad_norm": 20256.73828125, + "learning_rate": 9.676600067126129e-05, + "loss": 0.4918, + "step": 34750 + }, + { + "epoch": 0.17954710789852493, + "grad_norm": 19123.048828125, + "learning_rate": 9.67530637200345e-05, + "loss": 0.49, + "step": 34800 + }, + { + "epoch": 0.17980507788113775, + "grad_norm": 20799.748046875, + "learning_rate": 9.674010181292761e-05, + "loss": 0.4889, + "step": 34850 + }, + { + "epoch": 0.18006304786375057, + "grad_norm": 19569.609375, + "learning_rate": 9.672711495685945e-05, + "loss": 0.4882, + "step": 34900 + }, + { + "epoch": 0.1803210178463634, + "grad_norm": 18339.76171875, + "learning_rate": 9.671410315876213e-05, + "loss": 0.4884, + "step": 34950 + }, + { + "epoch": 0.18057898782897622, + "grad_norm": 20066.099609375, + "learning_rate": 9.670106642558111e-05, + "loss": 0.4866, + "step": 35000 + }, + { + "epoch": 0.18057898782897622, + "eval_loss": 0.48020538687705994, + "eval_runtime": 3265.3619, + "eval_samples_per_second": 949.702, + "eval_steps_per_second": 1.855, + "step": 35000 + }, + { + "epoch": 0.18083695781158904, + "grad_norm": 18703.037109375, + "learning_rate": 9.668800476427515e-05, + "loss": 0.4953, + "step": 35050 + }, + { + "epoch": 0.18109492779420186, + "grad_norm": 19886.177734375, + "learning_rate": 9.667491818181631e-05, + "loss": 0.4845, + "step": 35100 + }, + { + "epoch": 0.18135289777681468, + "grad_norm": 19349.08203125, + "learning_rate": 9.666180668518993e-05, + "loss": 0.493, + "step": 35150 + }, + { + "epoch": 0.1816108677594275, + "grad_norm": 19786.404296875, + "learning_rate": 9.664867028139473e-05, + "loss": 0.4815, + "step": 35200 + }, + { + "epoch": 0.18186883774204032, + "grad_norm": 21271.05859375, + "learning_rate": 9.66355089774426e-05, + "loss": 0.4907, + "step": 35250 + }, + { + "epoch": 0.18212680772465317, + "grad_norm": 19096.3125, + "learning_rate": 9.662232278035885e-05, + "loss": 0.4865, + "step": 35300 + }, + { + "epoch": 0.182384777707266, + "grad_norm": 20136.935546875, + "learning_rate": 9.660911169718196e-05, + "loss": 0.4824, + "step": 35350 + }, + { + "epoch": 0.18264274768987881, + "grad_norm": 19532.361328125, + "learning_rate": 9.65958757349638e-05, + "loss": 0.4857, + "step": 35400 + }, + { + "epoch": 0.18290071767249164, + "grad_norm": 18227.626953125, + "learning_rate": 9.658261490076944e-05, + "loss": 0.4871, + "step": 35450 + }, + { + "epoch": 0.18315868765510446, + "grad_norm": 21021.564453125, + "learning_rate": 9.656932920167727e-05, + "loss": 0.485, + "step": 35500 + }, + { + "epoch": 0.18341665763771728, + "grad_norm": 19943.9765625, + "learning_rate": 9.655601864477893e-05, + "loss": 0.4908, + "step": 35550 + }, + { + "epoch": 0.1836746276203301, + "grad_norm": 19356.8203125, + "learning_rate": 9.654268323717934e-05, + "loss": 0.4849, + "step": 35600 + }, + { + "epoch": 0.18393259760294292, + "grad_norm": 19431.9453125, + "learning_rate": 9.652932298599671e-05, + "loss": 0.4927, + "step": 35650 + }, + { + "epoch": 0.18419056758555574, + "grad_norm": 18860.0625, + "learning_rate": 9.651593789836242e-05, + "loss": 0.4879, + "step": 35700 + }, + { + "epoch": 0.18444853756816856, + "grad_norm": 18524.46875, + "learning_rate": 9.650252798142123e-05, + "loss": 0.4877, + "step": 35750 + }, + { + "epoch": 0.18470650755078138, + "grad_norm": 18897.322265625, + "learning_rate": 9.648909324233107e-05, + "loss": 0.4906, + "step": 35800 + }, + { + "epoch": 0.1849644775333942, + "grad_norm": 21080.552734375, + "learning_rate": 9.647563368826313e-05, + "loss": 0.4895, + "step": 35850 + }, + { + "epoch": 0.18522244751600703, + "grad_norm": 20014.828125, + "learning_rate": 9.64621493264019e-05, + "loss": 0.4816, + "step": 35900 + }, + { + "epoch": 0.18548041749861985, + "grad_norm": 19470.3984375, + "learning_rate": 9.644864016394504e-05, + "loss": 0.4812, + "step": 35950 + }, + { + "epoch": 0.1857383874812327, + "grad_norm": 21915.400390625, + "learning_rate": 9.643510620810348e-05, + "loss": 0.4859, + "step": 36000 + }, + { + "epoch": 0.18599635746384552, + "grad_norm": 19367.009765625, + "learning_rate": 9.642154746610139e-05, + "loss": 0.4905, + "step": 36050 + }, + { + "epoch": 0.18625432744645834, + "grad_norm": 18379.70703125, + "learning_rate": 9.640796394517616e-05, + "loss": 0.4878, + "step": 36100 + }, + { + "epoch": 0.18651229742907116, + "grad_norm": 18933.455078125, + "learning_rate": 9.639435565257842e-05, + "loss": 0.4877, + "step": 36150 + }, + { + "epoch": 0.18677026741168398, + "grad_norm": 19026.484375, + "learning_rate": 9.638072259557201e-05, + "loss": 0.4873, + "step": 36200 + }, + { + "epoch": 0.1870282373942968, + "grad_norm": 21111.09375, + "learning_rate": 9.636706478143398e-05, + "loss": 0.4815, + "step": 36250 + }, + { + "epoch": 0.18728620737690962, + "grad_norm": 19362.541015625, + "learning_rate": 9.635338221745462e-05, + "loss": 0.4854, + "step": 36300 + }, + { + "epoch": 0.18754417735952245, + "grad_norm": 19861.58984375, + "learning_rate": 9.63396749109374e-05, + "loss": 0.4832, + "step": 36350 + }, + { + "epoch": 0.18780214734213527, + "grad_norm": 18793.623046875, + "learning_rate": 9.632594286919905e-05, + "loss": 0.4811, + "step": 36400 + }, + { + "epoch": 0.1880601173247481, + "grad_norm": 20452.26953125, + "learning_rate": 9.631218609956943e-05, + "loss": 0.4872, + "step": 36450 + }, + { + "epoch": 0.1883180873073609, + "grad_norm": 19237.203125, + "learning_rate": 9.629840460939165e-05, + "loss": 0.4941, + "step": 36500 + }, + { + "epoch": 0.18857605728997373, + "grad_norm": 19828.84765625, + "learning_rate": 9.628459840602202e-05, + "loss": 0.4869, + "step": 36550 + }, + { + "epoch": 0.18883402727258655, + "grad_norm": 18171.08203125, + "learning_rate": 9.627076749683e-05, + "loss": 0.4915, + "step": 36600 + }, + { + "epoch": 0.18909199725519937, + "grad_norm": 21346.9375, + "learning_rate": 9.625691188919827e-05, + "loss": 0.4913, + "step": 36650 + }, + { + "epoch": 0.1893499672378122, + "grad_norm": 20066.7890625, + "learning_rate": 9.62430315905227e-05, + "loss": 0.4809, + "step": 36700 + }, + { + "epoch": 0.18960793722042504, + "grad_norm": 20736.546875, + "learning_rate": 9.622912660821231e-05, + "loss": 0.4849, + "step": 36750 + }, + { + "epoch": 0.18986590720303786, + "grad_norm": 20891.958984375, + "learning_rate": 9.62151969496893e-05, + "loss": 0.4831, + "step": 36800 + }, + { + "epoch": 0.19012387718565069, + "grad_norm": 21394.1953125, + "learning_rate": 9.620124262238908e-05, + "loss": 0.4855, + "step": 36850 + }, + { + "epoch": 0.1903818471682635, + "grad_norm": 19725.89453125, + "learning_rate": 9.618726363376016e-05, + "loss": 0.48, + "step": 36900 + }, + { + "epoch": 0.19063981715087633, + "grad_norm": 21622.78125, + "learning_rate": 9.617325999126429e-05, + "loss": 0.4832, + "step": 36950 + }, + { + "epoch": 0.19089778713348915, + "grad_norm": 22529.548828125, + "learning_rate": 9.615923170237633e-05, + "loss": 0.4852, + "step": 37000 + }, + { + "epoch": 0.19115575711610197, + "grad_norm": 21136.404296875, + "learning_rate": 9.614517877458428e-05, + "loss": 0.4816, + "step": 37050 + }, + { + "epoch": 0.1914137270987148, + "grad_norm": 19039.330078125, + "learning_rate": 9.61311012153894e-05, + "loss": 0.4835, + "step": 37100 + }, + { + "epoch": 0.1916716970813276, + "grad_norm": 19755.974609375, + "learning_rate": 9.611699903230594e-05, + "loss": 0.4846, + "step": 37150 + }, + { + "epoch": 0.19192966706394043, + "grad_norm": 19061.28515625, + "learning_rate": 9.610287223286139e-05, + "loss": 0.4816, + "step": 37200 + }, + { + "epoch": 0.19218763704655326, + "grad_norm": 21649.275390625, + "learning_rate": 9.608872082459639e-05, + "loss": 0.4837, + "step": 37250 + }, + { + "epoch": 0.19244560702916608, + "grad_norm": 19856.759765625, + "learning_rate": 9.607454481506466e-05, + "loss": 0.4848, + "step": 37300 + }, + { + "epoch": 0.1927035770117789, + "grad_norm": 19442.810546875, + "learning_rate": 9.60603442118331e-05, + "loss": 0.4828, + "step": 37350 + }, + { + "epoch": 0.19296154699439172, + "grad_norm": 20076.44140625, + "learning_rate": 9.604611902248168e-05, + "loss": 0.4896, + "step": 37400 + }, + { + "epoch": 0.19321951697700457, + "grad_norm": 18413.908203125, + "learning_rate": 9.603186925460359e-05, + "loss": 0.4806, + "step": 37450 + }, + { + "epoch": 0.1934774869596174, + "grad_norm": 19618.3984375, + "learning_rate": 9.601759491580503e-05, + "loss": 0.4864, + "step": 37500 + }, + { + "epoch": 0.1937354569422302, + "grad_norm": 20347.177734375, + "learning_rate": 9.600329601370539e-05, + "loss": 0.489, + "step": 37550 + }, + { + "epoch": 0.19399342692484303, + "grad_norm": 19288.380859375, + "learning_rate": 9.598897255593713e-05, + "loss": 0.4829, + "step": 37600 + }, + { + "epoch": 0.19425139690745585, + "grad_norm": 20326.1484375, + "learning_rate": 9.597462455014585e-05, + "loss": 0.4856, + "step": 37650 + }, + { + "epoch": 0.19450936689006867, + "grad_norm": 19598.14453125, + "learning_rate": 9.596025200399024e-05, + "loss": 0.4831, + "step": 37700 + }, + { + "epoch": 0.1947673368726815, + "grad_norm": 20041.28125, + "learning_rate": 9.594585492514205e-05, + "loss": 0.4822, + "step": 37750 + }, + { + "epoch": 0.19502530685529432, + "grad_norm": 20853.201171875, + "learning_rate": 9.593143332128623e-05, + "loss": 0.4874, + "step": 37800 + }, + { + "epoch": 0.19528327683790714, + "grad_norm": 21364.455078125, + "learning_rate": 9.591698720012068e-05, + "loss": 0.482, + "step": 37850 + }, + { + "epoch": 0.19554124682051996, + "grad_norm": 18795.447265625, + "learning_rate": 9.590251656935652e-05, + "loss": 0.489, + "step": 37900 + }, + { + "epoch": 0.19579921680313278, + "grad_norm": 23039.455078125, + "learning_rate": 9.588802143671784e-05, + "loss": 0.4879, + "step": 37950 + }, + { + "epoch": 0.1960571867857456, + "grad_norm": 19842.263671875, + "learning_rate": 9.58735018099419e-05, + "loss": 0.4869, + "step": 38000 + }, + { + "epoch": 0.19631515676835842, + "grad_norm": 21241.00390625, + "learning_rate": 9.585895769677897e-05, + "loss": 0.4746, + "step": 38050 + }, + { + "epoch": 0.19657312675097124, + "grad_norm": 19803.2265625, + "learning_rate": 9.584438910499245e-05, + "loss": 0.4824, + "step": 38100 + }, + { + "epoch": 0.1968310967335841, + "grad_norm": 18873.744140625, + "learning_rate": 9.582979604235873e-05, + "loss": 0.4817, + "step": 38150 + }, + { + "epoch": 0.19708906671619691, + "grad_norm": 19128.8828125, + "learning_rate": 9.581517851666734e-05, + "loss": 0.482, + "step": 38200 + }, + { + "epoch": 0.19734703669880974, + "grad_norm": 20514.16796875, + "learning_rate": 9.580053653572081e-05, + "loss": 0.4781, + "step": 38250 + }, + { + "epoch": 0.19760500668142256, + "grad_norm": 19135.58984375, + "learning_rate": 9.578587010733475e-05, + "loss": 0.4815, + "step": 38300 + }, + { + "epoch": 0.19786297666403538, + "grad_norm": 22849.197265625, + "learning_rate": 9.577117923933782e-05, + "loss": 0.4794, + "step": 38350 + }, + { + "epoch": 0.1981209466466482, + "grad_norm": 21278.736328125, + "learning_rate": 9.575646393957173e-05, + "loss": 0.4832, + "step": 38400 + }, + { + "epoch": 0.19837891662926102, + "grad_norm": 19292.162109375, + "learning_rate": 9.57417242158912e-05, + "loss": 0.4876, + "step": 38450 + }, + { + "epoch": 0.19863688661187384, + "grad_norm": 17778.423828125, + "learning_rate": 9.572696007616402e-05, + "loss": 0.4842, + "step": 38500 + }, + { + "epoch": 0.19889485659448666, + "grad_norm": 18855.140625, + "learning_rate": 9.5712171528271e-05, + "loss": 0.4846, + "step": 38550 + }, + { + "epoch": 0.19915282657709948, + "grad_norm": 21640.8203125, + "learning_rate": 9.5697358580106e-05, + "loss": 0.4829, + "step": 38600 + }, + { + "epoch": 0.1994107965597123, + "grad_norm": 19358.3828125, + "learning_rate": 9.568252123957586e-05, + "loss": 0.4806, + "step": 38650 + }, + { + "epoch": 0.19966876654232513, + "grad_norm": 20781.98828125, + "learning_rate": 9.566765951460046e-05, + "loss": 0.4849, + "step": 38700 + }, + { + "epoch": 0.19992673652493795, + "grad_norm": 20604.7265625, + "learning_rate": 9.565277341311271e-05, + "loss": 0.4856, + "step": 38750 + }, + { + "epoch": 0.20018470650755077, + "grad_norm": 20930.048828125, + "learning_rate": 9.563786294305854e-05, + "loss": 0.4812, + "step": 38800 + }, + { + "epoch": 0.20044267649016362, + "grad_norm": 22721.259765625, + "learning_rate": 9.562292811239686e-05, + "loss": 0.4857, + "step": 38850 + }, + { + "epoch": 0.20070064647277644, + "grad_norm": 19667.57421875, + "learning_rate": 9.560796892909957e-05, + "loss": 0.483, + "step": 38900 + }, + { + "epoch": 0.20095861645538926, + "grad_norm": 18259.19140625, + "learning_rate": 9.559298540115164e-05, + "loss": 0.4851, + "step": 38950 + }, + { + "epoch": 0.20121658643800208, + "grad_norm": 20980.18359375, + "learning_rate": 9.557797753655096e-05, + "loss": 0.4815, + "step": 39000 + }, + { + "epoch": 0.2014745564206149, + "grad_norm": 19840.025390625, + "learning_rate": 9.556294534330841e-05, + "loss": 0.4878, + "step": 39050 + }, + { + "epoch": 0.20173252640322772, + "grad_norm": 20406.69921875, + "learning_rate": 9.554788882944792e-05, + "loss": 0.481, + "step": 39100 + }, + { + "epoch": 0.20199049638584055, + "grad_norm": 19177.447265625, + "learning_rate": 9.553280800300637e-05, + "loss": 0.4857, + "step": 39150 + }, + { + "epoch": 0.20224846636845337, + "grad_norm": 21242.21875, + "learning_rate": 9.551770287203359e-05, + "loss": 0.4889, + "step": 39200 + }, + { + "epoch": 0.2025064363510662, + "grad_norm": 19343.58203125, + "learning_rate": 9.550257344459241e-05, + "loss": 0.482, + "step": 39250 + }, + { + "epoch": 0.202764406333679, + "grad_norm": 21327.587890625, + "learning_rate": 9.548741972875863e-05, + "loss": 0.4802, + "step": 39300 + }, + { + "epoch": 0.20302237631629183, + "grad_norm": 21366.98828125, + "learning_rate": 9.547224173262102e-05, + "loss": 0.4779, + "step": 39350 + }, + { + "epoch": 0.20328034629890465, + "grad_norm": 20876.39453125, + "learning_rate": 9.545703946428128e-05, + "loss": 0.4843, + "step": 39400 + }, + { + "epoch": 0.20353831628151747, + "grad_norm": 21280.873046875, + "learning_rate": 9.544181293185413e-05, + "loss": 0.4805, + "step": 39450 + }, + { + "epoch": 0.2037962862641303, + "grad_norm": 19546.134765625, + "learning_rate": 9.542656214346713e-05, + "loss": 0.4753, + "step": 39500 + }, + { + "epoch": 0.20405425624674312, + "grad_norm": 19179.05859375, + "learning_rate": 9.541128710726091e-05, + "loss": 0.4812, + "step": 39550 + }, + { + "epoch": 0.20431222622935596, + "grad_norm": 23525.50390625, + "learning_rate": 9.539598783138897e-05, + "loss": 0.4843, + "step": 39600 + }, + { + "epoch": 0.20457019621196879, + "grad_norm": 19369.103515625, + "learning_rate": 9.538066432401775e-05, + "loss": 0.4788, + "step": 39650 + }, + { + "epoch": 0.2048281661945816, + "grad_norm": 20777.119140625, + "learning_rate": 9.536531659332667e-05, + "loss": 0.4779, + "step": 39700 + }, + { + "epoch": 0.20508613617719443, + "grad_norm": 18987.701171875, + "learning_rate": 9.534994464750806e-05, + "loss": 0.4807, + "step": 39750 + }, + { + "epoch": 0.20534410615980725, + "grad_norm": 19523.873046875, + "learning_rate": 9.533454849476712e-05, + "loss": 0.4798, + "step": 39800 + }, + { + "epoch": 0.20560207614242007, + "grad_norm": 21302.05859375, + "learning_rate": 9.531912814332206e-05, + "loss": 0.4811, + "step": 39850 + }, + { + "epoch": 0.2058600461250329, + "grad_norm": 21545.626953125, + "learning_rate": 9.530368360140394e-05, + "loss": 0.4814, + "step": 39900 + }, + { + "epoch": 0.2061180161076457, + "grad_norm": 22709.7265625, + "learning_rate": 9.528821487725678e-05, + "loss": 0.4827, + "step": 39950 + }, + { + "epoch": 0.20637598609025853, + "grad_norm": 20853.228515625, + "learning_rate": 9.527272197913746e-05, + "loss": 0.4838, + "step": 40000 + }, + { + "epoch": 0.20637598609025853, + "eval_loss": 0.47092095017433167, + "eval_runtime": 3339.7722, + "eval_samples_per_second": 928.542, + "eval_steps_per_second": 1.814, + "step": 40000 + }, + { + "epoch": 0.20663395607287136, + "grad_norm": 18389.748046875, + "learning_rate": 9.525720491531581e-05, + "loss": 0.4809, + "step": 40050 + }, + { + "epoch": 0.20689192605548418, + "grad_norm": 20328.59765625, + "learning_rate": 9.524166369407453e-05, + "loss": 0.4827, + "step": 40100 + }, + { + "epoch": 0.207149896038097, + "grad_norm": 21094.966796875, + "learning_rate": 9.522609832370924e-05, + "loss": 0.484, + "step": 40150 + }, + { + "epoch": 0.20740786602070982, + "grad_norm": 22630.64453125, + "learning_rate": 9.52105088125284e-05, + "loss": 0.4829, + "step": 40200 + }, + { + "epoch": 0.20766583600332264, + "grad_norm": 19477.7265625, + "learning_rate": 9.51948951688534e-05, + "loss": 0.4793, + "step": 40250 + }, + { + "epoch": 0.2079238059859355, + "grad_norm": 20242.53125, + "learning_rate": 9.517925740101851e-05, + "loss": 0.4797, + "step": 40300 + }, + { + "epoch": 0.2081817759685483, + "grad_norm": 19952.421875, + "learning_rate": 9.516359551737087e-05, + "loss": 0.4785, + "step": 40350 + }, + { + "epoch": 0.20843974595116113, + "grad_norm": 19216.220703125, + "learning_rate": 9.514790952627049e-05, + "loss": 0.4753, + "step": 40400 + }, + { + "epoch": 0.20869771593377395, + "grad_norm": 20297.515625, + "learning_rate": 9.513219943609024e-05, + "loss": 0.4792, + "step": 40450 + }, + { + "epoch": 0.20895568591638677, + "grad_norm": 19528.7890625, + "learning_rate": 9.511646525521585e-05, + "loss": 0.4801, + "step": 40500 + }, + { + "epoch": 0.2092136558989996, + "grad_norm": 18037.7890625, + "learning_rate": 9.510070699204597e-05, + "loss": 0.483, + "step": 40550 + }, + { + "epoch": 0.20947162588161242, + "grad_norm": 20636.4296875, + "learning_rate": 9.508492465499199e-05, + "loss": 0.4761, + "step": 40600 + }, + { + "epoch": 0.20972959586422524, + "grad_norm": 20096.857421875, + "learning_rate": 9.506911825247827e-05, + "loss": 0.4804, + "step": 40650 + }, + { + "epoch": 0.20998756584683806, + "grad_norm": 20855.619140625, + "learning_rate": 9.505328779294192e-05, + "loss": 0.4823, + "step": 40700 + }, + { + "epoch": 0.21024553582945088, + "grad_norm": 19640.521484375, + "learning_rate": 9.503743328483296e-05, + "loss": 0.4818, + "step": 40750 + }, + { + "epoch": 0.2105035058120637, + "grad_norm": 20990.525390625, + "learning_rate": 9.50215547366142e-05, + "loss": 0.4804, + "step": 40800 + }, + { + "epoch": 0.21076147579467652, + "grad_norm": 18773.564453125, + "learning_rate": 9.500565215676132e-05, + "loss": 0.4798, + "step": 40850 + }, + { + "epoch": 0.21101944577728934, + "grad_norm": 18688.7265625, + "learning_rate": 9.498972555376282e-05, + "loss": 0.4773, + "step": 40900 + }, + { + "epoch": 0.21127741575990217, + "grad_norm": 22649.3671875, + "learning_rate": 9.497377493611998e-05, + "loss": 0.478, + "step": 40950 + }, + { + "epoch": 0.21153538574251501, + "grad_norm": 19575.95703125, + "learning_rate": 9.495780031234694e-05, + "loss": 0.4809, + "step": 41000 + }, + { + "epoch": 0.21179335572512784, + "grad_norm": 18587.681640625, + "learning_rate": 9.494180169097067e-05, + "loss": 0.4805, + "step": 41050 + }, + { + "epoch": 0.21205132570774066, + "grad_norm": 19466.5703125, + "learning_rate": 9.492577908053089e-05, + "loss": 0.4772, + "step": 41100 + }, + { + "epoch": 0.21230929569035348, + "grad_norm": 21085.15234375, + "learning_rate": 9.490973248958018e-05, + "loss": 0.4787, + "step": 41150 + }, + { + "epoch": 0.2125672656729663, + "grad_norm": 21866.95703125, + "learning_rate": 9.489366192668388e-05, + "loss": 0.4803, + "step": 41200 + }, + { + "epoch": 0.21282523565557912, + "grad_norm": 20759.609375, + "learning_rate": 9.487756740042015e-05, + "loss": 0.4782, + "step": 41250 + }, + { + "epoch": 0.21308320563819194, + "grad_norm": 20565.51171875, + "learning_rate": 9.486144891937997e-05, + "loss": 0.4765, + "step": 41300 + }, + { + "epoch": 0.21334117562080476, + "grad_norm": 21536.017578125, + "learning_rate": 9.484530649216705e-05, + "loss": 0.4753, + "step": 41350 + }, + { + "epoch": 0.21359914560341758, + "grad_norm": 19452.001953125, + "learning_rate": 9.482914012739788e-05, + "loss": 0.4807, + "step": 41400 + }, + { + "epoch": 0.2138571155860304, + "grad_norm": 21220.927734375, + "learning_rate": 9.481294983370179e-05, + "loss": 0.4803, + "step": 41450 + }, + { + "epoch": 0.21411508556864323, + "grad_norm": 18278.884765625, + "learning_rate": 9.479673561972082e-05, + "loss": 0.4807, + "step": 41500 + }, + { + "epoch": 0.21437305555125605, + "grad_norm": 21568.13671875, + "learning_rate": 9.478049749410983e-05, + "loss": 0.4751, + "step": 41550 + }, + { + "epoch": 0.21463102553386887, + "grad_norm": 21004.734375, + "learning_rate": 9.47642354655364e-05, + "loss": 0.4828, + "step": 41600 + }, + { + "epoch": 0.2148889955164817, + "grad_norm": 20709.193359375, + "learning_rate": 9.474794954268089e-05, + "loss": 0.477, + "step": 41650 + }, + { + "epoch": 0.21514696549909454, + "grad_norm": 21408.3671875, + "learning_rate": 9.47316397342364e-05, + "loss": 0.4783, + "step": 41700 + }, + { + "epoch": 0.21540493548170736, + "grad_norm": 18606.6328125, + "learning_rate": 9.47153060489088e-05, + "loss": 0.4771, + "step": 41750 + }, + { + "epoch": 0.21566290546432018, + "grad_norm": 19498.20703125, + "learning_rate": 9.469894849541667e-05, + "loss": 0.4782, + "step": 41800 + }, + { + "epoch": 0.215920875446933, + "grad_norm": 20441.9765625, + "learning_rate": 9.46825670824914e-05, + "loss": 0.4769, + "step": 41850 + }, + { + "epoch": 0.21617884542954582, + "grad_norm": 20925.109375, + "learning_rate": 9.466616181887704e-05, + "loss": 0.4858, + "step": 41900 + }, + { + "epoch": 0.21643681541215865, + "grad_norm": 21410.38671875, + "learning_rate": 9.464973271333042e-05, + "loss": 0.4791, + "step": 41950 + }, + { + "epoch": 0.21669478539477147, + "grad_norm": 19169.583984375, + "learning_rate": 9.463327977462106e-05, + "loss": 0.4783, + "step": 42000 + }, + { + "epoch": 0.2169527553773843, + "grad_norm": 19487.3359375, + "learning_rate": 9.461680301153124e-05, + "loss": 0.4792, + "step": 42050 + }, + { + "epoch": 0.2172107253599971, + "grad_norm": 21303.861328125, + "learning_rate": 9.460030243285592e-05, + "loss": 0.4811, + "step": 42100 + }, + { + "epoch": 0.21746869534260993, + "grad_norm": 21529.490234375, + "learning_rate": 9.458377804740279e-05, + "loss": 0.4761, + "step": 42150 + }, + { + "epoch": 0.21772666532522275, + "grad_norm": 21356.505859375, + "learning_rate": 9.456722986399227e-05, + "loss": 0.477, + "step": 42200 + }, + { + "epoch": 0.21798463530783557, + "grad_norm": 19551.33203125, + "learning_rate": 9.455065789145742e-05, + "loss": 0.4777, + "step": 42250 + }, + { + "epoch": 0.2182426052904484, + "grad_norm": 21424.58984375, + "learning_rate": 9.453406213864408e-05, + "loss": 0.4759, + "step": 42300 + }, + { + "epoch": 0.21850057527306121, + "grad_norm": 18835.1953125, + "learning_rate": 9.451744261441072e-05, + "loss": 0.4749, + "step": 42350 + }, + { + "epoch": 0.21875854525567404, + "grad_norm": 20333.490234375, + "learning_rate": 9.450079932762852e-05, + "loss": 0.4786, + "step": 42400 + }, + { + "epoch": 0.21901651523828689, + "grad_norm": 18957.232421875, + "learning_rate": 9.448413228718134e-05, + "loss": 0.4778, + "step": 42450 + }, + { + "epoch": 0.2192744852208997, + "grad_norm": 20251.939453125, + "learning_rate": 9.446744150196574e-05, + "loss": 0.4759, + "step": 42500 + }, + { + "epoch": 0.21953245520351253, + "grad_norm": 20740.82421875, + "learning_rate": 9.445072698089091e-05, + "loss": 0.4782, + "step": 42550 + }, + { + "epoch": 0.21979042518612535, + "grad_norm": 19501.91015625, + "learning_rate": 9.443398873287877e-05, + "loss": 0.479, + "step": 42600 + }, + { + "epoch": 0.22004839516873817, + "grad_norm": 20895.58984375, + "learning_rate": 9.441722676686386e-05, + "loss": 0.4754, + "step": 42650 + }, + { + "epoch": 0.220306365151351, + "grad_norm": 19932.66796875, + "learning_rate": 9.440044109179338e-05, + "loss": 0.4778, + "step": 42700 + }, + { + "epoch": 0.2205643351339638, + "grad_norm": 20158.693359375, + "learning_rate": 9.438363171662722e-05, + "loss": 0.4755, + "step": 42750 + }, + { + "epoch": 0.22082230511657663, + "grad_norm": 19128.953125, + "learning_rate": 9.436679865033789e-05, + "loss": 0.4744, + "step": 42800 + }, + { + "epoch": 0.22108027509918945, + "grad_norm": 19743.517578125, + "learning_rate": 9.434994190191054e-05, + "loss": 0.4781, + "step": 42850 + }, + { + "epoch": 0.22133824508180228, + "grad_norm": 17826.703125, + "learning_rate": 9.4333061480343e-05, + "loss": 0.4762, + "step": 42900 + }, + { + "epoch": 0.2215962150644151, + "grad_norm": 20606.48046875, + "learning_rate": 9.43161573946457e-05, + "loss": 0.4741, + "step": 42950 + }, + { + "epoch": 0.22185418504702792, + "grad_norm": 20116.66796875, + "learning_rate": 9.429922965384172e-05, + "loss": 0.4766, + "step": 43000 + }, + { + "epoch": 0.22211215502964074, + "grad_norm": 20560.970703125, + "learning_rate": 9.428227826696674e-05, + "loss": 0.481, + "step": 43050 + }, + { + "epoch": 0.22237012501225356, + "grad_norm": 20832.01953125, + "learning_rate": 9.42653032430691e-05, + "loss": 0.4806, + "step": 43100 + }, + { + "epoch": 0.2226280949948664, + "grad_norm": 18686.953125, + "learning_rate": 9.424830459120974e-05, + "loss": 0.4796, + "step": 43150 + }, + { + "epoch": 0.22288606497747923, + "grad_norm": 21061.240234375, + "learning_rate": 9.423128232046223e-05, + "loss": 0.474, + "step": 43200 + }, + { + "epoch": 0.22314403496009205, + "grad_norm": 21862.25, + "learning_rate": 9.421423643991267e-05, + "loss": 0.4721, + "step": 43250 + }, + { + "epoch": 0.22340200494270487, + "grad_norm": 18299.23828125, + "learning_rate": 9.419716695865988e-05, + "loss": 0.4744, + "step": 43300 + }, + { + "epoch": 0.2236599749253177, + "grad_norm": 20387.876953125, + "learning_rate": 9.418007388581517e-05, + "loss": 0.4748, + "step": 43350 + }, + { + "epoch": 0.22391794490793052, + "grad_norm": 21721.740234375, + "learning_rate": 9.416295723050254e-05, + "loss": 0.4782, + "step": 43400 + }, + { + "epoch": 0.22417591489054334, + "grad_norm": 20274.72265625, + "learning_rate": 9.414581700185851e-05, + "loss": 0.4734, + "step": 43450 + }, + { + "epoch": 0.22443388487315616, + "grad_norm": 22443.296875, + "learning_rate": 9.41286532090322e-05, + "loss": 0.4734, + "step": 43500 + }, + { + "epoch": 0.22469185485576898, + "grad_norm": 19874.8203125, + "learning_rate": 9.411146586118529e-05, + "loss": 0.4755, + "step": 43550 + }, + { + "epoch": 0.2249498248383818, + "grad_norm": 20362.3125, + "learning_rate": 9.409425496749209e-05, + "loss": 0.4776, + "step": 43600 + }, + { + "epoch": 0.22520779482099462, + "grad_norm": 22146.5078125, + "learning_rate": 9.40770205371394e-05, + "loss": 0.4784, + "step": 43650 + }, + { + "epoch": 0.22546576480360744, + "grad_norm": 19917.83203125, + "learning_rate": 9.405976257932667e-05, + "loss": 0.4744, + "step": 43700 + }, + { + "epoch": 0.22572373478622026, + "grad_norm": 19296.904296875, + "learning_rate": 9.404248110326583e-05, + "loss": 0.4766, + "step": 43750 + }, + { + "epoch": 0.22598170476883309, + "grad_norm": 20648.35546875, + "learning_rate": 9.402517611818142e-05, + "loss": 0.4801, + "step": 43800 + }, + { + "epoch": 0.22623967475144593, + "grad_norm": 21750.517578125, + "learning_rate": 9.40078476333105e-05, + "loss": 0.4752, + "step": 43850 + }, + { + "epoch": 0.22649764473405876, + "grad_norm": 21233.337890625, + "learning_rate": 9.399049565790266e-05, + "loss": 0.4758, + "step": 43900 + }, + { + "epoch": 0.22675561471667158, + "grad_norm": 21952.6796875, + "learning_rate": 9.397312020122006e-05, + "loss": 0.4755, + "step": 43950 + }, + { + "epoch": 0.2270135846992844, + "grad_norm": 18598.826171875, + "learning_rate": 9.39557212725374e-05, + "loss": 0.4725, + "step": 44000 + }, + { + "epoch": 0.22727155468189722, + "grad_norm": 20325.51171875, + "learning_rate": 9.393829888114188e-05, + "loss": 0.4789, + "step": 44050 + }, + { + "epoch": 0.22752952466451004, + "grad_norm": 17499.228515625, + "learning_rate": 9.392085303633323e-05, + "loss": 0.4738, + "step": 44100 + }, + { + "epoch": 0.22778749464712286, + "grad_norm": 21283.970703125, + "learning_rate": 9.39033837474237e-05, + "loss": 0.4743, + "step": 44150 + }, + { + "epoch": 0.22804546462973568, + "grad_norm": 19672.765625, + "learning_rate": 9.388589102373807e-05, + "loss": 0.4751, + "step": 44200 + }, + { + "epoch": 0.2283034346123485, + "grad_norm": 19722.314453125, + "learning_rate": 9.386837487461361e-05, + "loss": 0.4767, + "step": 44250 + }, + { + "epoch": 0.22856140459496133, + "grad_norm": 19948.154296875, + "learning_rate": 9.38508353094001e-05, + "loss": 0.4765, + "step": 44300 + }, + { + "epoch": 0.22881937457757415, + "grad_norm": 19880.611328125, + "learning_rate": 9.383327233745984e-05, + "loss": 0.4754, + "step": 44350 + }, + { + "epoch": 0.22907734456018697, + "grad_norm": 20052.91796875, + "learning_rate": 9.381568596816757e-05, + "loss": 0.4801, + "step": 44400 + }, + { + "epoch": 0.2293353145427998, + "grad_norm": 23129.869140625, + "learning_rate": 9.379807621091057e-05, + "loss": 0.4713, + "step": 44450 + }, + { + "epoch": 0.2295932845254126, + "grad_norm": 19922.0703125, + "learning_rate": 9.37804430750886e-05, + "loss": 0.4736, + "step": 44500 + }, + { + "epoch": 0.22985125450802546, + "grad_norm": 19704.24609375, + "learning_rate": 9.376278657011388e-05, + "loss": 0.4682, + "step": 44550 + }, + { + "epoch": 0.23010922449063828, + "grad_norm": 19080.125, + "learning_rate": 9.374510670541109e-05, + "loss": 0.4751, + "step": 44600 + }, + { + "epoch": 0.2303671944732511, + "grad_norm": 20858.388671875, + "learning_rate": 9.372740349041742e-05, + "loss": 0.4734, + "step": 44650 + }, + { + "epoch": 0.23062516445586392, + "grad_norm": 22074.056640625, + "learning_rate": 9.37096769345825e-05, + "loss": 0.4699, + "step": 44700 + }, + { + "epoch": 0.23088313443847674, + "grad_norm": 21852.623046875, + "learning_rate": 9.369192704736842e-05, + "loss": 0.47, + "step": 44750 + }, + { + "epoch": 0.23114110442108957, + "grad_norm": 20904.033203125, + "learning_rate": 9.367415383824974e-05, + "loss": 0.4736, + "step": 44800 + }, + { + "epoch": 0.2313990744037024, + "grad_norm": 18965.021484375, + "learning_rate": 9.365635731671343e-05, + "loss": 0.4687, + "step": 44850 + }, + { + "epoch": 0.2316570443863152, + "grad_norm": 16994.271484375, + "learning_rate": 9.363853749225894e-05, + "loss": 0.4747, + "step": 44900 + }, + { + "epoch": 0.23191501436892803, + "grad_norm": 19191.794921875, + "learning_rate": 9.362069437439814e-05, + "loss": 0.4689, + "step": 44950 + }, + { + "epoch": 0.23217298435154085, + "grad_norm": 19691.982421875, + "learning_rate": 9.360282797265537e-05, + "loss": 0.4683, + "step": 45000 + }, + { + "epoch": 0.23217298435154085, + "eval_loss": 0.4633353352546692, + "eval_runtime": 3256.5731, + "eval_samples_per_second": 952.265, + "eval_steps_per_second": 1.86, + "step": 45000 + }, + { + "epoch": 0.23243095433415367, + "grad_norm": 21778.20703125, + "learning_rate": 9.358493829656732e-05, + "loss": 0.4726, + "step": 45050 + }, + { + "epoch": 0.2326889243167665, + "grad_norm": 20281.802734375, + "learning_rate": 9.35670253556832e-05, + "loss": 0.4752, + "step": 45100 + }, + { + "epoch": 0.23294689429937931, + "grad_norm": 20620.580078125, + "learning_rate": 9.354908915956456e-05, + "loss": 0.474, + "step": 45150 + }, + { + "epoch": 0.23320486428199214, + "grad_norm": 21115.86328125, + "learning_rate": 9.353112971778542e-05, + "loss": 0.4763, + "step": 45200 + }, + { + "epoch": 0.23346283426460496, + "grad_norm": 19746.30859375, + "learning_rate": 9.351314703993215e-05, + "loss": 0.4792, + "step": 45250 + }, + { + "epoch": 0.2337208042472178, + "grad_norm": 21270.26171875, + "learning_rate": 9.349514113560358e-05, + "loss": 0.4726, + "step": 45300 + }, + { + "epoch": 0.23397877422983063, + "grad_norm": 20273.658203125, + "learning_rate": 9.347711201441092e-05, + "loss": 0.4683, + "step": 45350 + }, + { + "epoch": 0.23423674421244345, + "grad_norm": 19746.9609375, + "learning_rate": 9.345905968597773e-05, + "loss": 0.4778, + "step": 45400 + }, + { + "epoch": 0.23449471419505627, + "grad_norm": 22999.52734375, + "learning_rate": 9.344098415994003e-05, + "loss": 0.4799, + "step": 45450 + }, + { + "epoch": 0.2347526841776691, + "grad_norm": 19922.41015625, + "learning_rate": 9.342288544594617e-05, + "loss": 0.4773, + "step": 45500 + }, + { + "epoch": 0.2350106541602819, + "grad_norm": 19793.73828125, + "learning_rate": 9.340476355365688e-05, + "loss": 0.4743, + "step": 45550 + }, + { + "epoch": 0.23526862414289473, + "grad_norm": 19525.74609375, + "learning_rate": 9.33866184927453e-05, + "loss": 0.4729, + "step": 45600 + }, + { + "epoch": 0.23552659412550755, + "grad_norm": 26093.65625, + "learning_rate": 9.336845027289691e-05, + "loss": 0.4767, + "step": 45650 + }, + { + "epoch": 0.23578456410812038, + "grad_norm": 20045.16796875, + "learning_rate": 9.335025890380953e-05, + "loss": 0.4768, + "step": 45700 + }, + { + "epoch": 0.2360425340907332, + "grad_norm": 21272.36328125, + "learning_rate": 9.333204439519338e-05, + "loss": 0.4738, + "step": 45750 + }, + { + "epoch": 0.23630050407334602, + "grad_norm": 19174.44921875, + "learning_rate": 9.3313806756771e-05, + "loss": 0.4752, + "step": 45800 + }, + { + "epoch": 0.23655847405595884, + "grad_norm": 18446.640625, + "learning_rate": 9.32955459982773e-05, + "loss": 0.4747, + "step": 45850 + }, + { + "epoch": 0.23681644403857166, + "grad_norm": 23397.7109375, + "learning_rate": 9.327726212945953e-05, + "loss": 0.4723, + "step": 45900 + }, + { + "epoch": 0.23707441402118448, + "grad_norm": 20350.755859375, + "learning_rate": 9.325895516007725e-05, + "loss": 0.4671, + "step": 45950 + }, + { + "epoch": 0.23733238400379733, + "grad_norm": 21147.5546875, + "learning_rate": 9.324062509990235e-05, + "loss": 0.4689, + "step": 46000 + }, + { + "epoch": 0.23759035398641015, + "grad_norm": 19813.130859375, + "learning_rate": 9.322227195871909e-05, + "loss": 0.4723, + "step": 46050 + }, + { + "epoch": 0.23784832396902297, + "grad_norm": 22310.037109375, + "learning_rate": 9.320389574632399e-05, + "loss": 0.4727, + "step": 46100 + }, + { + "epoch": 0.2381062939516358, + "grad_norm": 19646.509765625, + "learning_rate": 9.318549647252596e-05, + "loss": 0.4723, + "step": 46150 + }, + { + "epoch": 0.23836426393424862, + "grad_norm": 20145.29296875, + "learning_rate": 9.316707414714614e-05, + "loss": 0.4652, + "step": 46200 + }, + { + "epoch": 0.23862223391686144, + "grad_norm": 19513.466796875, + "learning_rate": 9.314862878001803e-05, + "loss": 0.4774, + "step": 46250 + }, + { + "epoch": 0.23888020389947426, + "grad_norm": 20701.25390625, + "learning_rate": 9.313016038098739e-05, + "loss": 0.4721, + "step": 46300 + }, + { + "epoch": 0.23913817388208708, + "grad_norm": 18766.328125, + "learning_rate": 9.31116689599123e-05, + "loss": 0.4691, + "step": 46350 + }, + { + "epoch": 0.2393961438646999, + "grad_norm": 20925.5, + "learning_rate": 9.309315452666314e-05, + "loss": 0.4743, + "step": 46400 + }, + { + "epoch": 0.23965411384731272, + "grad_norm": 19413.0703125, + "learning_rate": 9.307461709112253e-05, + "loss": 0.469, + "step": 46450 + }, + { + "epoch": 0.23991208382992554, + "grad_norm": 18517.669921875, + "learning_rate": 9.305605666318543e-05, + "loss": 0.4769, + "step": 46500 + }, + { + "epoch": 0.24017005381253836, + "grad_norm": 20222.50390625, + "learning_rate": 9.3037473252759e-05, + "loss": 0.4701, + "step": 46550 + }, + { + "epoch": 0.24042802379515119, + "grad_norm": 21650.63671875, + "learning_rate": 9.301886686976272e-05, + "loss": 0.4693, + "step": 46600 + }, + { + "epoch": 0.240685993777764, + "grad_norm": 18923.498046875, + "learning_rate": 9.300023752412832e-05, + "loss": 0.4749, + "step": 46650 + }, + { + "epoch": 0.24094396376037686, + "grad_norm": 21353.748046875, + "learning_rate": 9.298158522579978e-05, + "loss": 0.4735, + "step": 46700 + }, + { + "epoch": 0.24120193374298968, + "grad_norm": 19405.5234375, + "learning_rate": 9.296290998473334e-05, + "loss": 0.4708, + "step": 46750 + }, + { + "epoch": 0.2414599037256025, + "grad_norm": 21692.3203125, + "learning_rate": 9.294421181089747e-05, + "loss": 0.4644, + "step": 46800 + }, + { + "epoch": 0.24171787370821532, + "grad_norm": 18488.671875, + "learning_rate": 9.292549071427291e-05, + "loss": 0.4668, + "step": 46850 + }, + { + "epoch": 0.24197584369082814, + "grad_norm": 21951.712890625, + "learning_rate": 9.29067467048526e-05, + "loss": 0.4749, + "step": 46900 + }, + { + "epoch": 0.24223381367344096, + "grad_norm": 20673.82421875, + "learning_rate": 9.288797979264176e-05, + "loss": 0.4687, + "step": 46950 + }, + { + "epoch": 0.24249178365605378, + "grad_norm": 18687.69140625, + "learning_rate": 9.286918998765776e-05, + "loss": 0.4731, + "step": 47000 + }, + { + "epoch": 0.2427497536386666, + "grad_norm": 18882.009765625, + "learning_rate": 9.285037729993027e-05, + "loss": 0.4699, + "step": 47050 + }, + { + "epoch": 0.24300772362127943, + "grad_norm": 22378.685546875, + "learning_rate": 9.283154173950112e-05, + "loss": 0.4678, + "step": 47100 + }, + { + "epoch": 0.24326569360389225, + "grad_norm": 19457.736328125, + "learning_rate": 9.281268331642439e-05, + "loss": 0.4665, + "step": 47150 + }, + { + "epoch": 0.24352366358650507, + "grad_norm": 19794.4296875, + "learning_rate": 9.279380204076631e-05, + "loss": 0.4683, + "step": 47200 + }, + { + "epoch": 0.2437816335691179, + "grad_norm": 18910.41796875, + "learning_rate": 9.277489792260536e-05, + "loss": 0.4683, + "step": 47250 + }, + { + "epoch": 0.2440396035517307, + "grad_norm": 21774.009765625, + "learning_rate": 9.275597097203216e-05, + "loss": 0.4729, + "step": 47300 + }, + { + "epoch": 0.24429757353434353, + "grad_norm": 21403.1796875, + "learning_rate": 9.273702119914962e-05, + "loss": 0.4681, + "step": 47350 + }, + { + "epoch": 0.24455554351695638, + "grad_norm": 20333.400390625, + "learning_rate": 9.271804861407269e-05, + "loss": 0.4713, + "step": 47400 + }, + { + "epoch": 0.2448135134995692, + "grad_norm": 22196.32421875, + "learning_rate": 9.269905322692862e-05, + "loss": 0.468, + "step": 47450 + }, + { + "epoch": 0.24507148348218202, + "grad_norm": 18356.623046875, + "learning_rate": 9.268003504785673e-05, + "loss": 0.4663, + "step": 47500 + }, + { + "epoch": 0.24532945346479484, + "grad_norm": 20337.546875, + "learning_rate": 9.266099408700859e-05, + "loss": 0.4657, + "step": 47550 + }, + { + "epoch": 0.24558742344740767, + "grad_norm": 20426.03515625, + "learning_rate": 9.264193035454789e-05, + "loss": 0.4677, + "step": 47600 + }, + { + "epoch": 0.2458453934300205, + "grad_norm": 20962.81640625, + "learning_rate": 9.262284386065047e-05, + "loss": 0.4759, + "step": 47650 + }, + { + "epoch": 0.2461033634126333, + "grad_norm": 20498.919921875, + "learning_rate": 9.260373461550435e-05, + "loss": 0.4647, + "step": 47700 + }, + { + "epoch": 0.24636133339524613, + "grad_norm": 21223.171875, + "learning_rate": 9.258460262930967e-05, + "loss": 0.4698, + "step": 47750 + }, + { + "epoch": 0.24661930337785895, + "grad_norm": 21146.671875, + "learning_rate": 9.256544791227871e-05, + "loss": 0.4727, + "step": 47800 + }, + { + "epoch": 0.24687727336047177, + "grad_norm": 19261.603515625, + "learning_rate": 9.254627047463588e-05, + "loss": 0.4734, + "step": 47850 + }, + { + "epoch": 0.2471352433430846, + "grad_norm": 21131.298828125, + "learning_rate": 9.252707032661774e-05, + "loss": 0.4686, + "step": 47900 + }, + { + "epoch": 0.24739321332569741, + "grad_norm": 22491.212890625, + "learning_rate": 9.250784747847294e-05, + "loss": 0.4701, + "step": 47950 + }, + { + "epoch": 0.24765118330831024, + "grad_norm": 20198.486328125, + "learning_rate": 9.248860194046228e-05, + "loss": 0.4657, + "step": 48000 + }, + { + "epoch": 0.24790915329092306, + "grad_norm": 21754.078125, + "learning_rate": 9.246933372285863e-05, + "loss": 0.4674, + "step": 48050 + }, + { + "epoch": 0.24816712327353588, + "grad_norm": 20948.244140625, + "learning_rate": 9.245004283594703e-05, + "loss": 0.4604, + "step": 48100 + }, + { + "epoch": 0.24842509325614873, + "grad_norm": 20916.3671875, + "learning_rate": 9.243072929002454e-05, + "loss": 0.4656, + "step": 48150 + }, + { + "epoch": 0.24868306323876155, + "grad_norm": 19935.021484375, + "learning_rate": 9.24113930954004e-05, + "loss": 0.4735, + "step": 48200 + }, + { + "epoch": 0.24894103322137437, + "grad_norm": 20075.96875, + "learning_rate": 9.239203426239585e-05, + "loss": 0.4679, + "step": 48250 + }, + { + "epoch": 0.2491990032039872, + "grad_norm": 20107.943359375, + "learning_rate": 9.23726528013443e-05, + "loss": 0.4773, + "step": 48300 + }, + { + "epoch": 0.2494569731866, + "grad_norm": 20341.1171875, + "learning_rate": 9.235324872259119e-05, + "loss": 0.4699, + "step": 48350 + }, + { + "epoch": 0.24971494316921283, + "grad_norm": 21787.4296875, + "learning_rate": 9.233382203649401e-05, + "loss": 0.4665, + "step": 48400 + }, + { + "epoch": 0.24997291315182565, + "grad_norm": 17707.583984375, + "learning_rate": 9.231437275342239e-05, + "loss": 0.4678, + "step": 48450 + }, + { + "epoch": 0.2502308831344385, + "grad_norm": 24467.810546875, + "learning_rate": 9.229490088375797e-05, + "loss": 0.466, + "step": 48500 + }, + { + "epoch": 0.2504888531170513, + "grad_norm": 20794.73828125, + "learning_rate": 9.227540643789446e-05, + "loss": 0.4711, + "step": 48550 + }, + { + "epoch": 0.2507468230996641, + "grad_norm": 20147.099609375, + "learning_rate": 9.225588942623758e-05, + "loss": 0.4689, + "step": 48600 + }, + { + "epoch": 0.25100479308227697, + "grad_norm": 20704.037109375, + "learning_rate": 9.223634985920517e-05, + "loss": 0.4687, + "step": 48650 + }, + { + "epoch": 0.25126276306488976, + "grad_norm": 19472.21875, + "learning_rate": 9.221678774722707e-05, + "loss": 0.4636, + "step": 48700 + }, + { + "epoch": 0.2515207330475026, + "grad_norm": 21352.755859375, + "learning_rate": 9.219720310074515e-05, + "loss": 0.4671, + "step": 48750 + }, + { + "epoch": 0.2517787030301154, + "grad_norm": 20956.146484375, + "learning_rate": 9.21775959302133e-05, + "loss": 0.4703, + "step": 48800 + }, + { + "epoch": 0.25203667301272825, + "grad_norm": 26295.541015625, + "learning_rate": 9.215796624609749e-05, + "loss": 0.4742, + "step": 48850 + }, + { + "epoch": 0.25229464299534105, + "grad_norm": 19862.15625, + "learning_rate": 9.213831405887564e-05, + "loss": 0.468, + "step": 48900 + }, + { + "epoch": 0.2525526129779539, + "grad_norm": 21760.404296875, + "learning_rate": 9.211863937903769e-05, + "loss": 0.4728, + "step": 48950 + }, + { + "epoch": 0.2528105829605667, + "grad_norm": 22488.1484375, + "learning_rate": 9.209894221708564e-05, + "loss": 0.4627, + "step": 49000 + }, + { + "epoch": 0.25306855294317954, + "grad_norm": 20244.5, + "learning_rate": 9.20792225835334e-05, + "loss": 0.4706, + "step": 49050 + }, + { + "epoch": 0.25332652292579233, + "grad_norm": 22642.44140625, + "learning_rate": 9.205948048890698e-05, + "loss": 0.4708, + "step": 49100 + }, + { + "epoch": 0.2535844929084052, + "grad_norm": 23121.501953125, + "learning_rate": 9.203971594374432e-05, + "loss": 0.4723, + "step": 49150 + }, + { + "epoch": 0.25384246289101803, + "grad_norm": 19514.916015625, + "learning_rate": 9.201992895859532e-05, + "loss": 0.4692, + "step": 49200 + }, + { + "epoch": 0.2541004328736308, + "grad_norm": 19467.662109375, + "learning_rate": 9.200011954402193e-05, + "loss": 0.4719, + "step": 49250 + }, + { + "epoch": 0.25435840285624367, + "grad_norm": 20737.7578125, + "learning_rate": 9.198028771059799e-05, + "loss": 0.4643, + "step": 49300 + }, + { + "epoch": 0.25461637283885646, + "grad_norm": 20229.341796875, + "learning_rate": 9.196043346890939e-05, + "loss": 0.462, + "step": 49350 + }, + { + "epoch": 0.2548743428214693, + "grad_norm": 23094.35546875, + "learning_rate": 9.194055682955392e-05, + "loss": 0.4701, + "step": 49400 + }, + { + "epoch": 0.2551323128040821, + "grad_norm": 21099.541015625, + "learning_rate": 9.192065780314132e-05, + "loss": 0.466, + "step": 49450 + }, + { + "epoch": 0.25539028278669496, + "grad_norm": 21500.302734375, + "learning_rate": 9.190073640029335e-05, + "loss": 0.4703, + "step": 49500 + }, + { + "epoch": 0.25564825276930775, + "grad_norm": 24272.228515625, + "learning_rate": 9.188079263164366e-05, + "loss": 0.4672, + "step": 49550 + }, + { + "epoch": 0.2559062227519206, + "grad_norm": 21129.013671875, + "learning_rate": 9.186082650783783e-05, + "loss": 0.4715, + "step": 49600 + }, + { + "epoch": 0.2561641927345334, + "grad_norm": 20696.32421875, + "learning_rate": 9.184083803953339e-05, + "loss": 0.4646, + "step": 49650 + }, + { + "epoch": 0.25642216271714624, + "grad_norm": 20142.7890625, + "learning_rate": 9.18208272373998e-05, + "loss": 0.4627, + "step": 49700 + }, + { + "epoch": 0.25668013269975903, + "grad_norm": 18810.43359375, + "learning_rate": 9.180079411211847e-05, + "loss": 0.4659, + "step": 49750 + }, + { + "epoch": 0.2569381026823719, + "grad_norm": 23121.84765625, + "learning_rate": 9.178073867438264e-05, + "loss": 0.4683, + "step": 49800 + }, + { + "epoch": 0.2571960726649847, + "grad_norm": 20432.021484375, + "learning_rate": 9.176066093489755e-05, + "loss": 0.4704, + "step": 49850 + }, + { + "epoch": 0.2574540426475975, + "grad_norm": 22056.09765625, + "learning_rate": 9.17405609043803e-05, + "loss": 0.4753, + "step": 49900 + }, + { + "epoch": 0.2577120126302104, + "grad_norm": 21094.931640625, + "learning_rate": 9.17204385935599e-05, + "loss": 0.4648, + "step": 49950 + }, + { + "epoch": 0.25796998261282317, + "grad_norm": 20127.525390625, + "learning_rate": 9.170029401317725e-05, + "loss": 0.4646, + "step": 50000 + }, + { + "epoch": 0.25796998261282317, + "eval_loss": 0.4567689299583435, + "eval_runtime": 3268.0543, + "eval_samples_per_second": 948.919, + "eval_steps_per_second": 1.853, + "step": 50000 + }, + { + "epoch": 0.258227952595436, + "grad_norm": 20947.306640625, + "learning_rate": 9.168012717398516e-05, + "loss": 0.4688, + "step": 50050 + }, + { + "epoch": 0.2584859225780488, + "grad_norm": 23591.646484375, + "learning_rate": 9.165993808674823e-05, + "loss": 0.4683, + "step": 50100 + }, + { + "epoch": 0.25874389256066166, + "grad_norm": 21227.677734375, + "learning_rate": 9.163972676224306e-05, + "loss": 0.4671, + "step": 50150 + }, + { + "epoch": 0.25900186254327445, + "grad_norm": 20084.953125, + "learning_rate": 9.161949321125807e-05, + "loss": 0.4598, + "step": 50200 + }, + { + "epoch": 0.2592598325258873, + "grad_norm": 21139.5, + "learning_rate": 9.159923744459349e-05, + "loss": 0.4707, + "step": 50250 + }, + { + "epoch": 0.2595178025085001, + "grad_norm": 20410.794921875, + "learning_rate": 9.15789594730615e-05, + "loss": 0.4675, + "step": 50300 + }, + { + "epoch": 0.25977577249111294, + "grad_norm": 20010.328125, + "learning_rate": 9.155865930748608e-05, + "loss": 0.4599, + "step": 50350 + }, + { + "epoch": 0.26003374247372574, + "grad_norm": 23502.890625, + "learning_rate": 9.153833695870304e-05, + "loss": 0.4664, + "step": 50400 + }, + { + "epoch": 0.2602917124563386, + "grad_norm": 20373.498046875, + "learning_rate": 9.151799243756008e-05, + "loss": 0.4655, + "step": 50450 + }, + { + "epoch": 0.2605496824389514, + "grad_norm": 21093.669921875, + "learning_rate": 9.149762575491671e-05, + "loss": 0.4623, + "step": 50500 + }, + { + "epoch": 0.26080765242156423, + "grad_norm": 22206.87890625, + "learning_rate": 9.147723692164427e-05, + "loss": 0.4687, + "step": 50550 + }, + { + "epoch": 0.261065622404177, + "grad_norm": 23264.875, + "learning_rate": 9.145682594862593e-05, + "loss": 0.4705, + "step": 50600 + }, + { + "epoch": 0.26132359238678987, + "grad_norm": 22029.849609375, + "learning_rate": 9.143639284675664e-05, + "loss": 0.4673, + "step": 50650 + }, + { + "epoch": 0.2615815623694027, + "grad_norm": 23016.955078125, + "learning_rate": 9.141593762694323e-05, + "loss": 0.4663, + "step": 50700 + }, + { + "epoch": 0.2618395323520155, + "grad_norm": 21590.80859375, + "learning_rate": 9.139546030010427e-05, + "loss": 0.4684, + "step": 50750 + }, + { + "epoch": 0.26209750233462836, + "grad_norm": 19839.986328125, + "learning_rate": 9.13749608771702e-05, + "loss": 0.4682, + "step": 50800 + }, + { + "epoch": 0.26235547231724116, + "grad_norm": 17922.802734375, + "learning_rate": 9.135443936908318e-05, + "loss": 0.4601, + "step": 50850 + }, + { + "epoch": 0.262613442299854, + "grad_norm": 21141.119140625, + "learning_rate": 9.133389578679723e-05, + "loss": 0.467, + "step": 50900 + }, + { + "epoch": 0.2628714122824668, + "grad_norm": 21858.158203125, + "learning_rate": 9.131333014127806e-05, + "loss": 0.4663, + "step": 50950 + }, + { + "epoch": 0.26312938226507965, + "grad_norm": 21516.46875, + "learning_rate": 9.129274244350326e-05, + "loss": 0.4656, + "step": 51000 + }, + { + "epoch": 0.26338735224769244, + "grad_norm": 21403.263671875, + "learning_rate": 9.127213270446213e-05, + "loss": 0.4717, + "step": 51050 + }, + { + "epoch": 0.2636453222303053, + "grad_norm": 20405.4296875, + "learning_rate": 9.125150093515575e-05, + "loss": 0.4656, + "step": 51100 + }, + { + "epoch": 0.2639032922129181, + "grad_norm": 21057.57421875, + "learning_rate": 9.123084714659698e-05, + "loss": 0.4655, + "step": 51150 + }, + { + "epoch": 0.26416126219553093, + "grad_norm": 19891.15234375, + "learning_rate": 9.121017134981036e-05, + "loss": 0.4706, + "step": 51200 + }, + { + "epoch": 0.2644192321781437, + "grad_norm": 20441.30078125, + "learning_rate": 9.118947355583228e-05, + "loss": 0.4707, + "step": 51250 + }, + { + "epoch": 0.2646772021607566, + "grad_norm": 22182.67578125, + "learning_rate": 9.11687537757108e-05, + "loss": 0.4633, + "step": 51300 + }, + { + "epoch": 0.2649351721433694, + "grad_norm": 18211.728515625, + "learning_rate": 9.114801202050574e-05, + "loss": 0.4677, + "step": 51350 + }, + { + "epoch": 0.2651931421259822, + "grad_norm": 20691.697265625, + "learning_rate": 9.112724830128865e-05, + "loss": 0.4634, + "step": 51400 + }, + { + "epoch": 0.26545111210859507, + "grad_norm": 19717.75390625, + "learning_rate": 9.110646262914279e-05, + "loss": 0.4647, + "step": 51450 + }, + { + "epoch": 0.26570908209120786, + "grad_norm": 19860.55078125, + "learning_rate": 9.108565501516318e-05, + "loss": 0.4665, + "step": 51500 + }, + { + "epoch": 0.2659670520738207, + "grad_norm": 20122.984375, + "learning_rate": 9.106482547045648e-05, + "loss": 0.4663, + "step": 51550 + }, + { + "epoch": 0.2662250220564335, + "grad_norm": 21214.724609375, + "learning_rate": 9.104397400614112e-05, + "loss": 0.4676, + "step": 51600 + }, + { + "epoch": 0.26648299203904635, + "grad_norm": 24545.041015625, + "learning_rate": 9.102310063334722e-05, + "loss": 0.4705, + "step": 51650 + }, + { + "epoch": 0.26674096202165914, + "grad_norm": 22479.380859375, + "learning_rate": 9.100220536321655e-05, + "loss": 0.4616, + "step": 51700 + }, + { + "epoch": 0.266998932004272, + "grad_norm": 20262.27734375, + "learning_rate": 9.098128820690264e-05, + "loss": 0.4569, + "step": 51750 + }, + { + "epoch": 0.2672569019868848, + "grad_norm": 20906.880859375, + "learning_rate": 9.096034917557062e-05, + "loss": 0.468, + "step": 51800 + }, + { + "epoch": 0.26751487196949764, + "grad_norm": 20986.455078125, + "learning_rate": 9.093938828039737e-05, + "loss": 0.4697, + "step": 51850 + }, + { + "epoch": 0.26777284195211043, + "grad_norm": 22425.681640625, + "learning_rate": 9.09184055325714e-05, + "loss": 0.4692, + "step": 51900 + }, + { + "epoch": 0.2680308119347233, + "grad_norm": 21817.744140625, + "learning_rate": 9.089740094329288e-05, + "loss": 0.4726, + "step": 51950 + }, + { + "epoch": 0.26828878191733607, + "grad_norm": 20527.017578125, + "learning_rate": 9.087637452377369e-05, + "loss": 0.459, + "step": 52000 + }, + { + "epoch": 0.2685467518999489, + "grad_norm": 24486.521484375, + "learning_rate": 9.08553262852373e-05, + "loss": 0.4624, + "step": 52050 + }, + { + "epoch": 0.26880472188256177, + "grad_norm": 20964.537109375, + "learning_rate": 9.083425623891885e-05, + "loss": 0.4657, + "step": 52100 + }, + { + "epoch": 0.26906269186517456, + "grad_norm": 20966.478515625, + "learning_rate": 9.081316439606513e-05, + "loss": 0.4723, + "step": 52150 + }, + { + "epoch": 0.2693206618477874, + "grad_norm": 20067.330078125, + "learning_rate": 9.079205076793457e-05, + "loss": 0.4644, + "step": 52200 + }, + { + "epoch": 0.2695786318304002, + "grad_norm": 21526.298828125, + "learning_rate": 9.077091536579719e-05, + "loss": 0.4602, + "step": 52250 + }, + { + "epoch": 0.26983660181301306, + "grad_norm": 20446.767578125, + "learning_rate": 9.074975820093468e-05, + "loss": 0.4671, + "step": 52300 + }, + { + "epoch": 0.27009457179562585, + "grad_norm": 19936.599609375, + "learning_rate": 9.072857928464029e-05, + "loss": 0.4626, + "step": 52350 + }, + { + "epoch": 0.2703525417782387, + "grad_norm": 21716.60546875, + "learning_rate": 9.070737862821896e-05, + "loss": 0.4642, + "step": 52400 + }, + { + "epoch": 0.2706105117608515, + "grad_norm": 17588.40625, + "learning_rate": 9.068615624298717e-05, + "loss": 0.4595, + "step": 52450 + }, + { + "epoch": 0.27086848174346434, + "grad_norm": 21721.138671875, + "learning_rate": 9.066491214027302e-05, + "loss": 0.4639, + "step": 52500 + }, + { + "epoch": 0.27112645172607713, + "grad_norm": 19480.875, + "learning_rate": 9.06436463314162e-05, + "loss": 0.4654, + "step": 52550 + }, + { + "epoch": 0.27138442170869, + "grad_norm": 22658.076171875, + "learning_rate": 9.062235882776797e-05, + "loss": 0.4653, + "step": 52600 + }, + { + "epoch": 0.2716423916913028, + "grad_norm": 22396.4140625, + "learning_rate": 9.060104964069121e-05, + "loss": 0.4634, + "step": 52650 + }, + { + "epoch": 0.2719003616739156, + "grad_norm": 22354.28125, + "learning_rate": 9.057971878156036e-05, + "loss": 0.4626, + "step": 52700 + }, + { + "epoch": 0.2721583316565285, + "grad_norm": 19845.22265625, + "learning_rate": 9.05583662617614e-05, + "loss": 0.4666, + "step": 52750 + }, + { + "epoch": 0.27241630163914127, + "grad_norm": 19933.978515625, + "learning_rate": 9.053699209269188e-05, + "loss": 0.4601, + "step": 52800 + }, + { + "epoch": 0.2726742716217541, + "grad_norm": 21288.86328125, + "learning_rate": 9.051559628576094e-05, + "loss": 0.4622, + "step": 52850 + }, + { + "epoch": 0.2729322416043669, + "grad_norm": 20604.05078125, + "learning_rate": 9.049417885238927e-05, + "loss": 0.4618, + "step": 52900 + }, + { + "epoch": 0.27319021158697976, + "grad_norm": 18641.544921875, + "learning_rate": 9.047273980400903e-05, + "loss": 0.46, + "step": 52950 + }, + { + "epoch": 0.27344818156959255, + "grad_norm": 22482.8125, + "learning_rate": 9.045127915206398e-05, + "loss": 0.4673, + "step": 53000 + }, + { + "epoch": 0.2737061515522054, + "grad_norm": 20967.9375, + "learning_rate": 9.042979690800943e-05, + "loss": 0.4607, + "step": 53050 + }, + { + "epoch": 0.2739641215348182, + "grad_norm": 22371.90234375, + "learning_rate": 9.040829308331216e-05, + "loss": 0.4624, + "step": 53100 + }, + { + "epoch": 0.27422209151743104, + "grad_norm": 19802.947265625, + "learning_rate": 9.03867676894505e-05, + "loss": 0.4542, + "step": 53150 + }, + { + "epoch": 0.27448006150004384, + "grad_norm": 21255.974609375, + "learning_rate": 9.03652207379143e-05, + "loss": 0.4636, + "step": 53200 + }, + { + "epoch": 0.2747380314826567, + "grad_norm": 21687.16796875, + "learning_rate": 9.034365224020489e-05, + "loss": 0.4626, + "step": 53250 + }, + { + "epoch": 0.2749960014652695, + "grad_norm": 21386.275390625, + "learning_rate": 9.032206220783512e-05, + "loss": 0.4659, + "step": 53300 + }, + { + "epoch": 0.27525397144788233, + "grad_norm": 19433.888671875, + "learning_rate": 9.030045065232935e-05, + "loss": 0.4585, + "step": 53350 + }, + { + "epoch": 0.2755119414304951, + "grad_norm": 20615.021484375, + "learning_rate": 9.027881758522339e-05, + "loss": 0.4619, + "step": 53400 + }, + { + "epoch": 0.27576991141310797, + "grad_norm": 20498.369140625, + "learning_rate": 9.025716301806454e-05, + "loss": 0.4658, + "step": 53450 + }, + { + "epoch": 0.2760278813957208, + "grad_norm": 20348.955078125, + "learning_rate": 9.023548696241162e-05, + "loss": 0.4637, + "step": 53500 + }, + { + "epoch": 0.2762858513783336, + "grad_norm": 18524.3203125, + "learning_rate": 9.021378942983487e-05, + "loss": 0.4636, + "step": 53550 + }, + { + "epoch": 0.27654382136094646, + "grad_norm": 20778.064453125, + "learning_rate": 9.019207043191602e-05, + "loss": 0.4604, + "step": 53600 + }, + { + "epoch": 0.27680179134355926, + "grad_norm": 19481.369140625, + "learning_rate": 9.017032998024823e-05, + "loss": 0.4629, + "step": 53650 + }, + { + "epoch": 0.2770597613261721, + "grad_norm": 20873.8515625, + "learning_rate": 9.014856808643617e-05, + "loss": 0.4647, + "step": 53700 + }, + { + "epoch": 0.2773177313087849, + "grad_norm": 21859.05078125, + "learning_rate": 9.012678476209591e-05, + "loss": 0.4621, + "step": 53750 + }, + { + "epoch": 0.27757570129139775, + "grad_norm": 20832.587890625, + "learning_rate": 9.010498001885492e-05, + "loss": 0.463, + "step": 53800 + }, + { + "epoch": 0.27783367127401054, + "grad_norm": 18435.703125, + "learning_rate": 9.00831538683522e-05, + "loss": 0.466, + "step": 53850 + }, + { + "epoch": 0.2780916412566234, + "grad_norm": 21496.61328125, + "learning_rate": 9.006130632223811e-05, + "loss": 0.4611, + "step": 53900 + }, + { + "epoch": 0.2783496112392362, + "grad_norm": 21796.873046875, + "learning_rate": 9.003943739217444e-05, + "loss": 0.4587, + "step": 53950 + }, + { + "epoch": 0.27860758122184903, + "grad_norm": 21053.099609375, + "learning_rate": 9.001754708983443e-05, + "loss": 0.4659, + "step": 54000 + }, + { + "epoch": 0.2788655512044618, + "grad_norm": 20332.98828125, + "learning_rate": 8.999563542690266e-05, + "loss": 0.4586, + "step": 54050 + }, + { + "epoch": 0.2791235211870747, + "grad_norm": 19829.93359375, + "learning_rate": 8.997370241507516e-05, + "loss": 0.4608, + "step": 54100 + }, + { + "epoch": 0.27938149116968747, + "grad_norm": 21215.3515625, + "learning_rate": 8.995174806605937e-05, + "loss": 0.4672, + "step": 54150 + }, + { + "epoch": 0.2796394611523003, + "grad_norm": 19068.890625, + "learning_rate": 8.992977239157408e-05, + "loss": 0.4637, + "step": 54200 + }, + { + "epoch": 0.27989743113491317, + "grad_norm": 20632.857421875, + "learning_rate": 8.99077754033495e-05, + "loss": 0.4615, + "step": 54250 + }, + { + "epoch": 0.28015540111752596, + "grad_norm": 20244.943359375, + "learning_rate": 8.988575711312714e-05, + "loss": 0.4665, + "step": 54300 + }, + { + "epoch": 0.2804133711001388, + "grad_norm": 21873.34375, + "learning_rate": 8.986371753266001e-05, + "loss": 0.4636, + "step": 54350 + }, + { + "epoch": 0.2806713410827516, + "grad_norm": 18075.001953125, + "learning_rate": 8.984165667371236e-05, + "loss": 0.4626, + "step": 54400 + }, + { + "epoch": 0.28092931106536445, + "grad_norm": 19815.0546875, + "learning_rate": 8.981957454805987e-05, + "loss": 0.4535, + "step": 54450 + }, + { + "epoch": 0.28118728104797724, + "grad_norm": 22713.48046875, + "learning_rate": 8.979747116748955e-05, + "loss": 0.4592, + "step": 54500 + }, + { + "epoch": 0.2814452510305901, + "grad_norm": 23360.1953125, + "learning_rate": 8.977534654379976e-05, + "loss": 0.4646, + "step": 54550 + }, + { + "epoch": 0.2817032210132029, + "grad_norm": 21626.36328125, + "learning_rate": 8.975320068880018e-05, + "loss": 0.4644, + "step": 54600 + }, + { + "epoch": 0.28196119099581574, + "grad_norm": 20061.873046875, + "learning_rate": 8.973103361431184e-05, + "loss": 0.4674, + "step": 54650 + }, + { + "epoch": 0.28221916097842853, + "grad_norm": 21295.0625, + "learning_rate": 8.970884533216713e-05, + "loss": 0.4674, + "step": 54700 + }, + { + "epoch": 0.2824771309610414, + "grad_norm": 19434.23828125, + "learning_rate": 8.968663585420967e-05, + "loss": 0.46, + "step": 54750 + }, + { + "epoch": 0.28273510094365417, + "grad_norm": 23654.849609375, + "learning_rate": 8.966440519229449e-05, + "loss": 0.4649, + "step": 54800 + }, + { + "epoch": 0.282993070926267, + "grad_norm": 22763.603515625, + "learning_rate": 8.964215335828787e-05, + "loss": 0.4578, + "step": 54850 + }, + { + "epoch": 0.28325104090887987, + "grad_norm": 23262.849609375, + "learning_rate": 8.961988036406741e-05, + "loss": 0.4674, + "step": 54900 + }, + { + "epoch": 0.28350901089149266, + "grad_norm": 20148.380859375, + "learning_rate": 8.959758622152201e-05, + "loss": 0.4642, + "step": 54950 + }, + { + "epoch": 0.2837669808741055, + "grad_norm": 22515.548828125, + "learning_rate": 8.957527094255186e-05, + "loss": 0.4697, + "step": 55000 + }, + { + "epoch": 0.2837669808741055, + "eval_loss": 0.4508056044578552, + "eval_runtime": 3347.9938, + "eval_samples_per_second": 926.262, + "eval_steps_per_second": 1.809, + "step": 55000 + }, + { + "epoch": 0.2840249508567183, + "grad_norm": 21158.09375, + "learning_rate": 8.95529345390684e-05, + "loss": 0.4617, + "step": 55050 + }, + { + "epoch": 0.28428292083933115, + "grad_norm": 20892.517578125, + "learning_rate": 8.953057702299437e-05, + "loss": 0.4612, + "step": 55100 + }, + { + "epoch": 0.28454089082194395, + "grad_norm": 21489.740234375, + "learning_rate": 8.950819840626381e-05, + "loss": 0.4578, + "step": 55150 + }, + { + "epoch": 0.2847988608045568, + "grad_norm": 20703.072265625, + "learning_rate": 8.948579870082197e-05, + "loss": 0.4632, + "step": 55200 + }, + { + "epoch": 0.2850568307871696, + "grad_norm": 21731.775390625, + "learning_rate": 8.946337791862537e-05, + "loss": 0.4621, + "step": 55250 + }, + { + "epoch": 0.28531480076978244, + "grad_norm": 24507.076171875, + "learning_rate": 8.94409360716418e-05, + "loss": 0.4542, + "step": 55300 + }, + { + "epoch": 0.28557277075239523, + "grad_norm": 20686.79296875, + "learning_rate": 8.94184731718503e-05, + "loss": 0.4575, + "step": 55350 + }, + { + "epoch": 0.2858307407350081, + "grad_norm": 20055.396484375, + "learning_rate": 8.93959892312411e-05, + "loss": 0.4595, + "step": 55400 + }, + { + "epoch": 0.2860887107176209, + "grad_norm": 21203.28515625, + "learning_rate": 8.93734842618157e-05, + "loss": 0.457, + "step": 55450 + }, + { + "epoch": 0.2863466807002337, + "grad_norm": 21738.6328125, + "learning_rate": 8.935095827558684e-05, + "loss": 0.4639, + "step": 55500 + }, + { + "epoch": 0.2866046506828465, + "grad_norm": 21593.056640625, + "learning_rate": 8.932841128457844e-05, + "loss": 0.4566, + "step": 55550 + }, + { + "epoch": 0.28686262066545937, + "grad_norm": 20362.564453125, + "learning_rate": 8.930584330082564e-05, + "loss": 0.4613, + "step": 55600 + }, + { + "epoch": 0.2871205906480722, + "grad_norm": 20415.390625, + "learning_rate": 8.928325433637482e-05, + "loss": 0.4591, + "step": 55650 + }, + { + "epoch": 0.287378560630685, + "grad_norm": 21615.1953125, + "learning_rate": 8.926064440328348e-05, + "loss": 0.4645, + "step": 55700 + }, + { + "epoch": 0.28763653061329786, + "grad_norm": 19537.873046875, + "learning_rate": 8.92380135136204e-05, + "loss": 0.4595, + "step": 55750 + }, + { + "epoch": 0.28789450059591065, + "grad_norm": 21288.21484375, + "learning_rate": 8.921536167946552e-05, + "loss": 0.4565, + "step": 55800 + }, + { + "epoch": 0.2881524705785235, + "grad_norm": 25019.783203125, + "learning_rate": 8.919268891290992e-05, + "loss": 0.4635, + "step": 55850 + }, + { + "epoch": 0.2884104405611363, + "grad_norm": 23099.5625, + "learning_rate": 8.916999522605592e-05, + "loss": 0.4561, + "step": 55900 + }, + { + "epoch": 0.28866841054374914, + "grad_norm": 22477.849609375, + "learning_rate": 8.914728063101694e-05, + "loss": 0.458, + "step": 55950 + }, + { + "epoch": 0.28892638052636194, + "grad_norm": 19823.103515625, + "learning_rate": 8.91245451399176e-05, + "loss": 0.457, + "step": 56000 + }, + { + "epoch": 0.2891843505089748, + "grad_norm": 20293.353515625, + "learning_rate": 8.910178876489368e-05, + "loss": 0.4614, + "step": 56050 + }, + { + "epoch": 0.2894423204915876, + "grad_norm": 19020.892578125, + "learning_rate": 8.907901151809205e-05, + "loss": 0.4597, + "step": 56100 + }, + { + "epoch": 0.28970029047420043, + "grad_norm": 20133.603515625, + "learning_rate": 8.905621341167082e-05, + "loss": 0.4577, + "step": 56150 + }, + { + "epoch": 0.2899582604568132, + "grad_norm": 21008.95703125, + "learning_rate": 8.903339445779915e-05, + "loss": 0.4596, + "step": 56200 + }, + { + "epoch": 0.29021623043942607, + "grad_norm": 21339.892578125, + "learning_rate": 8.901055466865735e-05, + "loss": 0.4631, + "step": 56250 + }, + { + "epoch": 0.29047420042203886, + "grad_norm": 20088.455078125, + "learning_rate": 8.898769405643686e-05, + "loss": 0.4571, + "step": 56300 + }, + { + "epoch": 0.2907321704046517, + "grad_norm": 21779.341796875, + "learning_rate": 8.896481263334023e-05, + "loss": 0.4541, + "step": 56350 + }, + { + "epoch": 0.29099014038726456, + "grad_norm": 24433.103515625, + "learning_rate": 8.894191041158113e-05, + "loss": 0.4627, + "step": 56400 + }, + { + "epoch": 0.29124811036987736, + "grad_norm": 22214.70703125, + "learning_rate": 8.891898740338432e-05, + "loss": 0.4585, + "step": 56450 + }, + { + "epoch": 0.2915060803524902, + "grad_norm": 20558.955078125, + "learning_rate": 8.889604362098567e-05, + "loss": 0.4547, + "step": 56500 + }, + { + "epoch": 0.291764050335103, + "grad_norm": 22438.3828125, + "learning_rate": 8.88730790766321e-05, + "loss": 0.4581, + "step": 56550 + }, + { + "epoch": 0.29202202031771585, + "grad_norm": 22429.658203125, + "learning_rate": 8.885009378258164e-05, + "loss": 0.4556, + "step": 56600 + }, + { + "epoch": 0.29227999030032864, + "grad_norm": 18076.814453125, + "learning_rate": 8.882708775110342e-05, + "loss": 0.4571, + "step": 56650 + }, + { + "epoch": 0.2925379602829415, + "grad_norm": 19816.873046875, + "learning_rate": 8.88040609944776e-05, + "loss": 0.4584, + "step": 56700 + }, + { + "epoch": 0.2927959302655543, + "grad_norm": 20448.5234375, + "learning_rate": 8.878101352499542e-05, + "loss": 0.4575, + "step": 56750 + }, + { + "epoch": 0.29305390024816713, + "grad_norm": 19950.4609375, + "learning_rate": 8.875794535495915e-05, + "loss": 0.4558, + "step": 56800 + }, + { + "epoch": 0.2933118702307799, + "grad_norm": 20185.0625, + "learning_rate": 8.873485649668218e-05, + "loss": 0.4523, + "step": 56850 + }, + { + "epoch": 0.2935698402133928, + "grad_norm": 22338.080078125, + "learning_rate": 8.871174696248888e-05, + "loss": 0.4648, + "step": 56900 + }, + { + "epoch": 0.29382781019600557, + "grad_norm": 22531.541015625, + "learning_rate": 8.868861676471463e-05, + "loss": 0.4628, + "step": 56950 + }, + { + "epoch": 0.2940857801786184, + "grad_norm": 19558.10546875, + "learning_rate": 8.866546591570592e-05, + "loss": 0.4565, + "step": 57000 + }, + { + "epoch": 0.29434375016123127, + "grad_norm": 20166.33203125, + "learning_rate": 8.864229442782023e-05, + "loss": 0.4527, + "step": 57050 + }, + { + "epoch": 0.29460172014384406, + "grad_norm": 20262.185546875, + "learning_rate": 8.861910231342603e-05, + "loss": 0.4575, + "step": 57100 + }, + { + "epoch": 0.2948596901264569, + "grad_norm": 19107.080078125, + "learning_rate": 8.859588958490283e-05, + "loss": 0.4564, + "step": 57150 + }, + { + "epoch": 0.2951176601090697, + "grad_norm": 19690.37109375, + "learning_rate": 8.857265625464113e-05, + "loss": 0.4576, + "step": 57200 + }, + { + "epoch": 0.29537563009168255, + "grad_norm": 21793.189453125, + "learning_rate": 8.854940233504245e-05, + "loss": 0.4616, + "step": 57250 + }, + { + "epoch": 0.29563360007429534, + "grad_norm": 21543.033203125, + "learning_rate": 8.852612783851926e-05, + "loss": 0.4559, + "step": 57300 + }, + { + "epoch": 0.2958915700569082, + "grad_norm": 21455.56640625, + "learning_rate": 8.850283277749504e-05, + "loss": 0.4583, + "step": 57350 + }, + { + "epoch": 0.296149540039521, + "grad_norm": 21236.935546875, + "learning_rate": 8.847951716440426e-05, + "loss": 0.46, + "step": 57400 + }, + { + "epoch": 0.29640751002213384, + "grad_norm": 22411.130859375, + "learning_rate": 8.845618101169232e-05, + "loss": 0.4563, + "step": 57450 + }, + { + "epoch": 0.29666548000474663, + "grad_norm": 19269.26171875, + "learning_rate": 8.843282433181561e-05, + "loss": 0.4634, + "step": 57500 + }, + { + "epoch": 0.2969234499873595, + "grad_norm": 22179.669921875, + "learning_rate": 8.840944713724149e-05, + "loss": 0.4582, + "step": 57550 + }, + { + "epoch": 0.29718141996997227, + "grad_norm": 19867.076171875, + "learning_rate": 8.838604944044825e-05, + "loss": 0.4591, + "step": 57600 + }, + { + "epoch": 0.2974393899525851, + "grad_norm": 19806.09375, + "learning_rate": 8.836263125392511e-05, + "loss": 0.4571, + "step": 57650 + }, + { + "epoch": 0.2976973599351979, + "grad_norm": 21762.22265625, + "learning_rate": 8.833919259017225e-05, + "loss": 0.4526, + "step": 57700 + }, + { + "epoch": 0.29795532991781076, + "grad_norm": 21031.263671875, + "learning_rate": 8.83157334617008e-05, + "loss": 0.4577, + "step": 57750 + }, + { + "epoch": 0.2982132999004236, + "grad_norm": 22886.556640625, + "learning_rate": 8.829225388103276e-05, + "loss": 0.4553, + "step": 57800 + }, + { + "epoch": 0.2984712698830364, + "grad_norm": 19710.173828125, + "learning_rate": 8.826875386070108e-05, + "loss": 0.4556, + "step": 57850 + }, + { + "epoch": 0.29872923986564925, + "grad_norm": 20607.244140625, + "learning_rate": 8.824523341324963e-05, + "loss": 0.458, + "step": 57900 + }, + { + "epoch": 0.29898720984826205, + "grad_norm": 20672.05859375, + "learning_rate": 8.822169255123317e-05, + "loss": 0.4531, + "step": 57950 + }, + { + "epoch": 0.2992451798308749, + "grad_norm": 21375.76953125, + "learning_rate": 8.819813128721732e-05, + "loss": 0.4602, + "step": 58000 + }, + { + "epoch": 0.2995031498134877, + "grad_norm": 20848.328125, + "learning_rate": 8.817454963377865e-05, + "loss": 0.4557, + "step": 58050 + }, + { + "epoch": 0.29976111979610054, + "grad_norm": 20778.619140625, + "learning_rate": 8.81509476035046e-05, + "loss": 0.4588, + "step": 58100 + }, + { + "epoch": 0.30001908977871333, + "grad_norm": 19791.296875, + "learning_rate": 8.812732520899347e-05, + "loss": 0.4609, + "step": 58150 + }, + { + "epoch": 0.3002770597613262, + "grad_norm": 21814.482421875, + "learning_rate": 8.810368246285445e-05, + "loss": 0.4597, + "step": 58200 + }, + { + "epoch": 0.300535029743939, + "grad_norm": 22417.65625, + "learning_rate": 8.808001937770755e-05, + "loss": 0.461, + "step": 58250 + }, + { + "epoch": 0.3007929997265518, + "grad_norm": 21347.53515625, + "learning_rate": 8.80563359661837e-05, + "loss": 0.4523, + "step": 58300 + }, + { + "epoch": 0.3010509697091646, + "grad_norm": 21612.689453125, + "learning_rate": 8.803263224092461e-05, + "loss": 0.4588, + "step": 58350 + }, + { + "epoch": 0.30130893969177747, + "grad_norm": 19139.7109375, + "learning_rate": 8.80089082145829e-05, + "loss": 0.4576, + "step": 58400 + }, + { + "epoch": 0.3015669096743903, + "grad_norm": 21629.78125, + "learning_rate": 8.798516389982197e-05, + "loss": 0.4514, + "step": 58450 + }, + { + "epoch": 0.3018248796570031, + "grad_norm": 20307.630859375, + "learning_rate": 8.79613993093161e-05, + "loss": 0.4606, + "step": 58500 + }, + { + "epoch": 0.30208284963961596, + "grad_norm": 17832.3359375, + "learning_rate": 8.793761445575037e-05, + "loss": 0.4654, + "step": 58550 + }, + { + "epoch": 0.30234081962222875, + "grad_norm": 19975.20703125, + "learning_rate": 8.791380935182065e-05, + "loss": 0.4519, + "step": 58600 + }, + { + "epoch": 0.3025987896048416, + "grad_norm": 23387.681640625, + "learning_rate": 8.788998401023365e-05, + "loss": 0.4576, + "step": 58650 + }, + { + "epoch": 0.3028567595874544, + "grad_norm": 18704.669921875, + "learning_rate": 8.78661384437069e-05, + "loss": 0.4634, + "step": 58700 + }, + { + "epoch": 0.30311472957006724, + "grad_norm": 21739.806640625, + "learning_rate": 8.784227266496868e-05, + "loss": 0.4471, + "step": 58750 + }, + { + "epoch": 0.30337269955268004, + "grad_norm": 22190.74609375, + "learning_rate": 8.781838668675806e-05, + "loss": 0.4508, + "step": 58800 + }, + { + "epoch": 0.3036306695352929, + "grad_norm": 19186.9609375, + "learning_rate": 8.779448052182495e-05, + "loss": 0.4575, + "step": 58850 + }, + { + "epoch": 0.3038886395179057, + "grad_norm": 21925.8984375, + "learning_rate": 8.777055418293e-05, + "loss": 0.4614, + "step": 58900 + }, + { + "epoch": 0.3041466095005185, + "grad_norm": 21280.16796875, + "learning_rate": 8.774660768284459e-05, + "loss": 0.4621, + "step": 58950 + }, + { + "epoch": 0.3044045794831313, + "grad_norm": 19872.3828125, + "learning_rate": 8.772264103435094e-05, + "loss": 0.4617, + "step": 59000 + }, + { + "epoch": 0.30466254946574417, + "grad_norm": 17518.58984375, + "learning_rate": 8.769865425024195e-05, + "loss": 0.4548, + "step": 59050 + }, + { + "epoch": 0.30492051944835696, + "grad_norm": 25605.537109375, + "learning_rate": 8.767464734332131e-05, + "loss": 0.4532, + "step": 59100 + }, + { + "epoch": 0.3051784894309698, + "grad_norm": 20151.53515625, + "learning_rate": 8.765062032640346e-05, + "loss": 0.4558, + "step": 59150 + }, + { + "epoch": 0.30543645941358266, + "grad_norm": 19346.048828125, + "learning_rate": 8.762657321231353e-05, + "loss": 0.4624, + "step": 59200 + }, + { + "epoch": 0.30569442939619546, + "grad_norm": 21447.115234375, + "learning_rate": 8.760250601388741e-05, + "loss": 0.4632, + "step": 59250 + }, + { + "epoch": 0.3059523993788083, + "grad_norm": 19053.896484375, + "learning_rate": 8.757841874397172e-05, + "loss": 0.454, + "step": 59300 + }, + { + "epoch": 0.3062103693614211, + "grad_norm": 20928.8515625, + "learning_rate": 8.755431141542376e-05, + "loss": 0.4509, + "step": 59350 + }, + { + "epoch": 0.30646833934403395, + "grad_norm": 20900.40234375, + "learning_rate": 8.753018404111157e-05, + "loss": 0.4523, + "step": 59400 + }, + { + "epoch": 0.30672630932664674, + "grad_norm": 19776.572265625, + "learning_rate": 8.750603663391385e-05, + "loss": 0.458, + "step": 59450 + }, + { + "epoch": 0.3069842793092596, + "grad_norm": 21503.505859375, + "learning_rate": 8.748186920672005e-05, + "loss": 0.4496, + "step": 59500 + }, + { + "epoch": 0.3072422492918724, + "grad_norm": 20588.5078125, + "learning_rate": 8.745768177243027e-05, + "loss": 0.4578, + "step": 59550 + }, + { + "epoch": 0.30750021927448523, + "grad_norm": 20516.150390625, + "learning_rate": 8.743347434395528e-05, + "loss": 0.46, + "step": 59600 + }, + { + "epoch": 0.307758189257098, + "grad_norm": 20487.498046875, + "learning_rate": 8.740924693421655e-05, + "loss": 0.4574, + "step": 59650 + }, + { + "epoch": 0.3080161592397109, + "grad_norm": 21070.3671875, + "learning_rate": 8.738499955614619e-05, + "loss": 0.4564, + "step": 59700 + }, + { + "epoch": 0.30827412922232367, + "grad_norm": 19067.427734375, + "learning_rate": 8.736073222268697e-05, + "loss": 0.4523, + "step": 59750 + }, + { + "epoch": 0.3085320992049365, + "grad_norm": 22084.68359375, + "learning_rate": 8.733644494679236e-05, + "loss": 0.4558, + "step": 59800 + }, + { + "epoch": 0.3087900691875493, + "grad_norm": 22324.9140625, + "learning_rate": 8.731213774142639e-05, + "loss": 0.4585, + "step": 59850 + }, + { + "epoch": 0.30904803917016216, + "grad_norm": 19219.47265625, + "learning_rate": 8.728781061956383e-05, + "loss": 0.4571, + "step": 59900 + }, + { + "epoch": 0.309306009152775, + "grad_norm": 20598.125, + "learning_rate": 8.726346359418998e-05, + "loss": 0.4581, + "step": 59950 + }, + { + "epoch": 0.3095639791353878, + "grad_norm": 22155.720703125, + "learning_rate": 8.723909667830082e-05, + "loss": 0.4578, + "step": 60000 + }, + { + "epoch": 0.3095639791353878, + "eval_loss": 0.44494956731796265, + "eval_runtime": 3261.5111, + "eval_samples_per_second": 950.823, + "eval_steps_per_second": 1.857, + "step": 60000 + }, + { + "epoch": 0.30982194911800065, + "grad_norm": 22012.822265625, + "learning_rate": 8.721470988490297e-05, + "loss": 0.4533, + "step": 60050 + }, + { + "epoch": 0.31007991910061344, + "grad_norm": 20934.453125, + "learning_rate": 8.719030322701358e-05, + "loss": 0.4538, + "step": 60100 + }, + { + "epoch": 0.3103378890832263, + "grad_norm": 20173.20703125, + "learning_rate": 8.716587671766049e-05, + "loss": 0.4559, + "step": 60150 + }, + { + "epoch": 0.3105958590658391, + "grad_norm": 19343.833984375, + "learning_rate": 8.714143036988208e-05, + "loss": 0.4579, + "step": 60200 + }, + { + "epoch": 0.31085382904845194, + "grad_norm": 20720.435546875, + "learning_rate": 8.711696419672734e-05, + "loss": 0.4529, + "step": 60250 + }, + { + "epoch": 0.31111179903106473, + "grad_norm": 22050.85546875, + "learning_rate": 8.709247821125583e-05, + "loss": 0.4505, + "step": 60300 + }, + { + "epoch": 0.3113697690136776, + "grad_norm": 22470.55078125, + "learning_rate": 8.706797242653773e-05, + "loss": 0.4616, + "step": 60350 + }, + { + "epoch": 0.31162773899629037, + "grad_norm": 21057.978515625, + "learning_rate": 8.70434468556537e-05, + "loss": 0.4568, + "step": 60400 + }, + { + "epoch": 0.3118857089789032, + "grad_norm": 21035.34375, + "learning_rate": 8.701890151169507e-05, + "loss": 0.4551, + "step": 60450 + }, + { + "epoch": 0.312143678961516, + "grad_norm": 20412.056640625, + "learning_rate": 8.699433640776363e-05, + "loss": 0.4521, + "step": 60500 + }, + { + "epoch": 0.31240164894412886, + "grad_norm": 19888.26953125, + "learning_rate": 8.696975155697175e-05, + "loss": 0.4565, + "step": 60550 + }, + { + "epoch": 0.3126596189267417, + "grad_norm": 22491.900390625, + "learning_rate": 8.694514697244238e-05, + "loss": 0.4578, + "step": 60600 + }, + { + "epoch": 0.3129175889093545, + "grad_norm": 20026.357421875, + "learning_rate": 8.692052266730897e-05, + "loss": 0.4554, + "step": 60650 + }, + { + "epoch": 0.31317555889196735, + "grad_norm": 22979.109375, + "learning_rate": 8.689587865471547e-05, + "loss": 0.461, + "step": 60700 + }, + { + "epoch": 0.31343352887458015, + "grad_norm": 21558.291015625, + "learning_rate": 8.68712149478164e-05, + "loss": 0.4546, + "step": 60750 + }, + { + "epoch": 0.313691498857193, + "grad_norm": 22115.384765625, + "learning_rate": 8.684653155977676e-05, + "loss": 0.4518, + "step": 60800 + }, + { + "epoch": 0.3139494688398058, + "grad_norm": 21422.41015625, + "learning_rate": 8.682182850377205e-05, + "loss": 0.4602, + "step": 60850 + }, + { + "epoch": 0.31420743882241864, + "grad_norm": 21101.02734375, + "learning_rate": 8.679710579298832e-05, + "loss": 0.4579, + "step": 60900 + }, + { + "epoch": 0.31446540880503143, + "grad_norm": 18844.361328125, + "learning_rate": 8.677236344062203e-05, + "loss": 0.4569, + "step": 60950 + }, + { + "epoch": 0.3147233787876443, + "grad_norm": 20492.796875, + "learning_rate": 8.67476014598802e-05, + "loss": 0.4542, + "step": 61000 + }, + { + "epoch": 0.3149813487702571, + "grad_norm": 28102.55078125, + "learning_rate": 8.67228198639803e-05, + "loss": 0.4516, + "step": 61050 + }, + { + "epoch": 0.3152393187528699, + "grad_norm": 20697.494140625, + "learning_rate": 8.669801866615024e-05, + "loss": 0.4551, + "step": 61100 + }, + { + "epoch": 0.3154972887354827, + "grad_norm": 20726.90625, + "learning_rate": 8.667319787962842e-05, + "loss": 0.4576, + "step": 61150 + }, + { + "epoch": 0.31575525871809557, + "grad_norm": 20007.04296875, + "learning_rate": 8.664835751766371e-05, + "loss": 0.4544, + "step": 61200 + }, + { + "epoch": 0.31601322870070836, + "grad_norm": 23061.224609375, + "learning_rate": 8.662349759351542e-05, + "loss": 0.458, + "step": 61250 + }, + { + "epoch": 0.3162711986833212, + "grad_norm": 19895.3125, + "learning_rate": 8.65986181204533e-05, + "loss": 0.4555, + "step": 61300 + }, + { + "epoch": 0.31652916866593406, + "grad_norm": 22702.5234375, + "learning_rate": 8.65737191117575e-05, + "loss": 0.4586, + "step": 61350 + }, + { + "epoch": 0.31678713864854685, + "grad_norm": 20045.404296875, + "learning_rate": 8.654880058071866e-05, + "loss": 0.4583, + "step": 61400 + }, + { + "epoch": 0.3170451086311597, + "grad_norm": 21180.455078125, + "learning_rate": 8.652386254063778e-05, + "loss": 0.4594, + "step": 61450 + }, + { + "epoch": 0.3173030786137725, + "grad_norm": 19104.767578125, + "learning_rate": 8.649890500482633e-05, + "loss": 0.4532, + "step": 61500 + }, + { + "epoch": 0.31756104859638534, + "grad_norm": 23137.869140625, + "learning_rate": 8.647392798660613e-05, + "loss": 0.4535, + "step": 61550 + }, + { + "epoch": 0.31781901857899814, + "grad_norm": 21784.001953125, + "learning_rate": 8.644893149930949e-05, + "loss": 0.4518, + "step": 61600 + }, + { + "epoch": 0.318076988561611, + "grad_norm": 20489.796875, + "learning_rate": 8.642391555627897e-05, + "loss": 0.4572, + "step": 61650 + }, + { + "epoch": 0.3183349585442238, + "grad_norm": 21743.728515625, + "learning_rate": 8.639888017086764e-05, + "loss": 0.4601, + "step": 61700 + }, + { + "epoch": 0.3185929285268366, + "grad_norm": 21714.6171875, + "learning_rate": 8.63738253564389e-05, + "loss": 0.4597, + "step": 61750 + }, + { + "epoch": 0.3188508985094494, + "grad_norm": 19896.208984375, + "learning_rate": 8.634875112636653e-05, + "loss": 0.4532, + "step": 61800 + }, + { + "epoch": 0.31910886849206227, + "grad_norm": 22215.173828125, + "learning_rate": 8.632365749403465e-05, + "loss": 0.4532, + "step": 61850 + }, + { + "epoch": 0.31936683847467506, + "grad_norm": 22466.958984375, + "learning_rate": 8.629854447283778e-05, + "loss": 0.4539, + "step": 61900 + }, + { + "epoch": 0.3196248084572879, + "grad_norm": 21345.197265625, + "learning_rate": 8.627341207618073e-05, + "loss": 0.4551, + "step": 61950 + }, + { + "epoch": 0.3198827784399007, + "grad_norm": 20988.8203125, + "learning_rate": 8.624826031747872e-05, + "loss": 0.4593, + "step": 62000 + }, + { + "epoch": 0.32014074842251355, + "grad_norm": 23295.70703125, + "learning_rate": 8.622308921015726e-05, + "loss": 0.4547, + "step": 62050 + }, + { + "epoch": 0.3203987184051264, + "grad_norm": 22620.431640625, + "learning_rate": 8.619789876765221e-05, + "loss": 0.4601, + "step": 62100 + }, + { + "epoch": 0.3206566883877392, + "grad_norm": 21914.44140625, + "learning_rate": 8.61726890034097e-05, + "loss": 0.4474, + "step": 62150 + }, + { + "epoch": 0.32091465837035205, + "grad_norm": 20521.265625, + "learning_rate": 8.614745993088626e-05, + "loss": 0.4565, + "step": 62200 + }, + { + "epoch": 0.32117262835296484, + "grad_norm": 22810.072265625, + "learning_rate": 8.612221156354868e-05, + "loss": 0.453, + "step": 62250 + }, + { + "epoch": 0.3214305983355777, + "grad_norm": 20862.349609375, + "learning_rate": 8.609694391487402e-05, + "loss": 0.4543, + "step": 62300 + }, + { + "epoch": 0.3216885683181905, + "grad_norm": 22115.298828125, + "learning_rate": 8.607165699834967e-05, + "loss": 0.453, + "step": 62350 + }, + { + "epoch": 0.32194653830080333, + "grad_norm": 22504.859375, + "learning_rate": 8.60463508274733e-05, + "loss": 0.4552, + "step": 62400 + }, + { + "epoch": 0.3222045082834161, + "grad_norm": 21758.9453125, + "learning_rate": 8.602102541575286e-05, + "loss": 0.4526, + "step": 62450 + }, + { + "epoch": 0.322462478266029, + "grad_norm": 20388.23828125, + "learning_rate": 8.599568077670654e-05, + "loss": 0.4522, + "step": 62500 + }, + { + "epoch": 0.32272044824864177, + "grad_norm": 22393.857421875, + "learning_rate": 8.597031692386286e-05, + "loss": 0.4457, + "step": 62550 + }, + { + "epoch": 0.3229784182312546, + "grad_norm": 22233.978515625, + "learning_rate": 8.594493387076052e-05, + "loss": 0.449, + "step": 62600 + }, + { + "epoch": 0.3232363882138674, + "grad_norm": 19831.12109375, + "learning_rate": 8.591953163094852e-05, + "loss": 0.4556, + "step": 62650 + }, + { + "epoch": 0.32349435819648026, + "grad_norm": 19109.783203125, + "learning_rate": 8.589411021798608e-05, + "loss": 0.4552, + "step": 62700 + }, + { + "epoch": 0.3237523281790931, + "grad_norm": 23053.642578125, + "learning_rate": 8.586866964544265e-05, + "loss": 0.4552, + "step": 62750 + }, + { + "epoch": 0.3240102981617059, + "grad_norm": 17938.240234375, + "learning_rate": 8.584320992689791e-05, + "loss": 0.4512, + "step": 62800 + }, + { + "epoch": 0.32426826814431875, + "grad_norm": 19569.431640625, + "learning_rate": 8.581773107594179e-05, + "loss": 0.4557, + "step": 62850 + }, + { + "epoch": 0.32452623812693154, + "grad_norm": 19247.82421875, + "learning_rate": 8.579223310617439e-05, + "loss": 0.4599, + "step": 62900 + }, + { + "epoch": 0.3247842081095444, + "grad_norm": 21565.8671875, + "learning_rate": 8.576671603120603e-05, + "loss": 0.4573, + "step": 62950 + }, + { + "epoch": 0.3250421780921572, + "grad_norm": 19029.005859375, + "learning_rate": 8.574117986465723e-05, + "loss": 0.455, + "step": 63000 + }, + { + "epoch": 0.32530014807477003, + "grad_norm": 21574.626953125, + "learning_rate": 8.57156246201587e-05, + "loss": 0.4512, + "step": 63050 + }, + { + "epoch": 0.32555811805738283, + "grad_norm": 21181.8203125, + "learning_rate": 8.569005031135136e-05, + "loss": 0.4513, + "step": 63100 + }, + { + "epoch": 0.3258160880399957, + "grad_norm": 22689.93359375, + "learning_rate": 8.566445695188624e-05, + "loss": 0.4515, + "step": 63150 + }, + { + "epoch": 0.32607405802260847, + "grad_norm": 22001.9921875, + "learning_rate": 8.563884455542461e-05, + "loss": 0.4459, + "step": 63200 + }, + { + "epoch": 0.3263320280052213, + "grad_norm": 20342.96875, + "learning_rate": 8.561321313563786e-05, + "loss": 0.4526, + "step": 63250 + }, + { + "epoch": 0.3265899979878341, + "grad_norm": 20673.75390625, + "learning_rate": 8.558756270620756e-05, + "loss": 0.4581, + "step": 63300 + }, + { + "epoch": 0.32684796797044696, + "grad_norm": 23113.490234375, + "learning_rate": 8.556189328082538e-05, + "loss": 0.4525, + "step": 63350 + }, + { + "epoch": 0.32710593795305976, + "grad_norm": 21878.384765625, + "learning_rate": 8.55362048731932e-05, + "loss": 0.4536, + "step": 63400 + }, + { + "epoch": 0.3273639079356726, + "grad_norm": 22787.79296875, + "learning_rate": 8.551049749702297e-05, + "loss": 0.4586, + "step": 63450 + }, + { + "epoch": 0.32762187791828545, + "grad_norm": 20422.0625, + "learning_rate": 8.548477116603679e-05, + "loss": 0.4496, + "step": 63500 + }, + { + "epoch": 0.32787984790089825, + "grad_norm": 21936.8828125, + "learning_rate": 8.54590258939669e-05, + "loss": 0.4509, + "step": 63550 + }, + { + "epoch": 0.3281378178835111, + "grad_norm": 21049.275390625, + "learning_rate": 8.54332616945556e-05, + "loss": 0.4514, + "step": 63600 + }, + { + "epoch": 0.3283957878661239, + "grad_norm": 22976.1015625, + "learning_rate": 8.540747858155533e-05, + "loss": 0.4611, + "step": 63650 + }, + { + "epoch": 0.32865375784873674, + "grad_norm": 21968.18359375, + "learning_rate": 8.538167656872861e-05, + "loss": 0.4557, + "step": 63700 + }, + { + "epoch": 0.32891172783134953, + "grad_norm": 22231.755859375, + "learning_rate": 8.53558556698481e-05, + "loss": 0.4556, + "step": 63750 + }, + { + "epoch": 0.3291696978139624, + "grad_norm": 21183.978515625, + "learning_rate": 8.533001589869643e-05, + "loss": 0.4479, + "step": 63800 + }, + { + "epoch": 0.3294276677965752, + "grad_norm": 23931.5234375, + "learning_rate": 8.530415726906642e-05, + "loss": 0.4533, + "step": 63850 + }, + { + "epoch": 0.329685637779188, + "grad_norm": 21073.62890625, + "learning_rate": 8.527827979476087e-05, + "loss": 0.4577, + "step": 63900 + }, + { + "epoch": 0.3299436077618008, + "grad_norm": 19957.09375, + "learning_rate": 8.525238348959268e-05, + "loss": 0.4486, + "step": 63950 + }, + { + "epoch": 0.33020157774441367, + "grad_norm": 18999.962890625, + "learning_rate": 8.522646836738482e-05, + "loss": 0.4525, + "step": 64000 + }, + { + "epoch": 0.33045954772702646, + "grad_norm": 24102.1640625, + "learning_rate": 8.520053444197026e-05, + "loss": 0.4545, + "step": 64050 + }, + { + "epoch": 0.3307175177096393, + "grad_norm": 20205.65234375, + "learning_rate": 8.517458172719203e-05, + "loss": 0.4539, + "step": 64100 + }, + { + "epoch": 0.33097548769225216, + "grad_norm": 24099.8203125, + "learning_rate": 8.514861023690321e-05, + "loss": 0.4465, + "step": 64150 + }, + { + "epoch": 0.33123345767486495, + "grad_norm": 19802.203125, + "learning_rate": 8.512261998496685e-05, + "loss": 0.4546, + "step": 64200 + }, + { + "epoch": 0.3314914276574778, + "grad_norm": 23137.609375, + "learning_rate": 8.509661098525603e-05, + "loss": 0.4539, + "step": 64250 + }, + { + "epoch": 0.3317493976400906, + "grad_norm": 23578.609375, + "learning_rate": 8.507058325165391e-05, + "loss": 0.4513, + "step": 64300 + }, + { + "epoch": 0.33200736762270344, + "grad_norm": 19172.0859375, + "learning_rate": 8.504453679805353e-05, + "loss": 0.456, + "step": 64350 + }, + { + "epoch": 0.33226533760531624, + "grad_norm": 19165.775390625, + "learning_rate": 8.5018471638358e-05, + "loss": 0.4578, + "step": 64400 + }, + { + "epoch": 0.3325233075879291, + "grad_norm": 18070.72265625, + "learning_rate": 8.49923877864804e-05, + "loss": 0.4608, + "step": 64450 + }, + { + "epoch": 0.3327812775705419, + "grad_norm": 20918.525390625, + "learning_rate": 8.49662852563438e-05, + "loss": 0.4526, + "step": 64500 + }, + { + "epoch": 0.3330392475531547, + "grad_norm": 21165.05078125, + "learning_rate": 8.494016406188121e-05, + "loss": 0.4503, + "step": 64550 + }, + { + "epoch": 0.3332972175357675, + "grad_norm": 19273.013671875, + "learning_rate": 8.491402421703562e-05, + "loss": 0.4572, + "step": 64600 + }, + { + "epoch": 0.33355518751838037, + "grad_norm": 21221.681640625, + "learning_rate": 8.488786573575998e-05, + "loss": 0.456, + "step": 64650 + }, + { + "epoch": 0.33381315750099316, + "grad_norm": 19485.8125, + "learning_rate": 8.486168863201716e-05, + "loss": 0.4423, + "step": 64700 + }, + { + "epoch": 0.334071127483606, + "grad_norm": 23241.580078125, + "learning_rate": 8.483549291978001e-05, + "loss": 0.4531, + "step": 64750 + }, + { + "epoch": 0.3343290974662188, + "grad_norm": 21281.111328125, + "learning_rate": 8.48092786130313e-05, + "loss": 0.452, + "step": 64800 + }, + { + "epoch": 0.33458706744883165, + "grad_norm": 21610.2578125, + "learning_rate": 8.47830457257637e-05, + "loss": 0.4488, + "step": 64850 + }, + { + "epoch": 0.3348450374314445, + "grad_norm": 19343.466796875, + "learning_rate": 8.475679427197982e-05, + "loss": 0.4514, + "step": 64900 + }, + { + "epoch": 0.3351030074140573, + "grad_norm": 19489.1875, + "learning_rate": 8.473052426569219e-05, + "loss": 0.447, + "step": 64950 + }, + { + "epoch": 0.33536097739667015, + "grad_norm": 24805.84765625, + "learning_rate": 8.470423572092323e-05, + "loss": 0.4594, + "step": 65000 + }, + { + "epoch": 0.33536097739667015, + "eval_loss": 0.440469890832901, + "eval_runtime": 3318.76, + "eval_samples_per_second": 934.421, + "eval_steps_per_second": 1.825, + "step": 65000 + }, + { + "epoch": 0.33561894737928294, + "grad_norm": 22912.732421875, + "learning_rate": 8.467792865170525e-05, + "loss": 0.4435, + "step": 65050 + }, + { + "epoch": 0.3358769173618958, + "grad_norm": 19958.994140625, + "learning_rate": 8.465160307208045e-05, + "loss": 0.4588, + "step": 65100 + }, + { + "epoch": 0.3361348873445086, + "grad_norm": 20914.193359375, + "learning_rate": 8.462525899610092e-05, + "loss": 0.4497, + "step": 65150 + }, + { + "epoch": 0.33639285732712143, + "grad_norm": 20505.814453125, + "learning_rate": 8.459889643782861e-05, + "loss": 0.4569, + "step": 65200 + }, + { + "epoch": 0.3366508273097342, + "grad_norm": 19486.068359375, + "learning_rate": 8.457251541133535e-05, + "loss": 0.4505, + "step": 65250 + }, + { + "epoch": 0.3369087972923471, + "grad_norm": 21967.84765625, + "learning_rate": 8.454611593070284e-05, + "loss": 0.4556, + "step": 65300 + }, + { + "epoch": 0.33716676727495987, + "grad_norm": 21949.767578125, + "learning_rate": 8.451969801002258e-05, + "loss": 0.4491, + "step": 65350 + }, + { + "epoch": 0.3374247372575727, + "grad_norm": 19765.14453125, + "learning_rate": 8.449326166339595e-05, + "loss": 0.4507, + "step": 65400 + }, + { + "epoch": 0.3376827072401855, + "grad_norm": 21396.982421875, + "learning_rate": 8.446680690493417e-05, + "loss": 0.4548, + "step": 65450 + }, + { + "epoch": 0.33794067722279836, + "grad_norm": 22511.8359375, + "learning_rate": 8.444033374875828e-05, + "loss": 0.454, + "step": 65500 + }, + { + "epoch": 0.33819864720541115, + "grad_norm": 21264.076171875, + "learning_rate": 8.441384220899912e-05, + "loss": 0.4486, + "step": 65550 + }, + { + "epoch": 0.338456617188024, + "grad_norm": 20736.046875, + "learning_rate": 8.438733229979741e-05, + "loss": 0.4505, + "step": 65600 + }, + { + "epoch": 0.33871458717063685, + "grad_norm": 20183.8359375, + "learning_rate": 8.436080403530356e-05, + "loss": 0.4485, + "step": 65650 + }, + { + "epoch": 0.33897255715324964, + "grad_norm": 21947.3671875, + "learning_rate": 8.433425742967787e-05, + "loss": 0.4499, + "step": 65700 + }, + { + "epoch": 0.3392305271358625, + "grad_norm": 22621.236328125, + "learning_rate": 8.430769249709042e-05, + "loss": 0.4503, + "step": 65750 + }, + { + "epoch": 0.3394884971184753, + "grad_norm": 21537.947265625, + "learning_rate": 8.428110925172103e-05, + "loss": 0.4634, + "step": 65800 + }, + { + "epoch": 0.33974646710108813, + "grad_norm": 20869.759765625, + "learning_rate": 8.425450770775936e-05, + "loss": 0.4504, + "step": 65850 + }, + { + "epoch": 0.34000443708370093, + "grad_norm": 20865.12109375, + "learning_rate": 8.422788787940477e-05, + "loss": 0.4509, + "step": 65900 + }, + { + "epoch": 0.3402624070663138, + "grad_norm": 23897.974609375, + "learning_rate": 8.42012497808664e-05, + "loss": 0.4512, + "step": 65950 + }, + { + "epoch": 0.34052037704892657, + "grad_norm": 23978.56640625, + "learning_rate": 8.417459342636318e-05, + "loss": 0.4513, + "step": 66000 + }, + { + "epoch": 0.3407783470315394, + "grad_norm": 22806.99609375, + "learning_rate": 8.414791883012374e-05, + "loss": 0.4468, + "step": 66050 + }, + { + "epoch": 0.3410363170141522, + "grad_norm": 20348.841796875, + "learning_rate": 8.412122600638646e-05, + "loss": 0.4484, + "step": 66100 + }, + { + "epoch": 0.34129428699676506, + "grad_norm": 21868.353515625, + "learning_rate": 8.409451496939945e-05, + "loss": 0.4601, + "step": 66150 + }, + { + "epoch": 0.34155225697937786, + "grad_norm": 20312.36328125, + "learning_rate": 8.406778573342055e-05, + "loss": 0.4485, + "step": 66200 + }, + { + "epoch": 0.3418102269619907, + "grad_norm": 25603.419921875, + "learning_rate": 8.404103831271733e-05, + "loss": 0.4487, + "step": 66250 + }, + { + "epoch": 0.34206819694460355, + "grad_norm": 21330.416015625, + "learning_rate": 8.4014272721567e-05, + "loss": 0.449, + "step": 66300 + }, + { + "epoch": 0.34232616692721635, + "grad_norm": 20045.4453125, + "learning_rate": 8.398748897425656e-05, + "loss": 0.447, + "step": 66350 + }, + { + "epoch": 0.3425841369098292, + "grad_norm": 21575.642578125, + "learning_rate": 8.396068708508262e-05, + "loss": 0.4495, + "step": 66400 + }, + { + "epoch": 0.342842106892442, + "grad_norm": 20396.5390625, + "learning_rate": 8.393386706835154e-05, + "loss": 0.4478, + "step": 66450 + }, + { + "epoch": 0.34310007687505484, + "grad_norm": 20366.8046875, + "learning_rate": 8.390702893837929e-05, + "loss": 0.4531, + "step": 66500 + }, + { + "epoch": 0.34335804685766763, + "grad_norm": 23514.521484375, + "learning_rate": 8.388017270949158e-05, + "loss": 0.4496, + "step": 66550 + }, + { + "epoch": 0.3436160168402805, + "grad_norm": 23656.869140625, + "learning_rate": 8.385329839602372e-05, + "loss": 0.448, + "step": 66600 + }, + { + "epoch": 0.3438739868228933, + "grad_norm": 23712.216796875, + "learning_rate": 8.382640601232071e-05, + "loss": 0.4502, + "step": 66650 + }, + { + "epoch": 0.3441319568055061, + "grad_norm": 23220.240234375, + "learning_rate": 8.379949557273717e-05, + "loss": 0.4469, + "step": 66700 + }, + { + "epoch": 0.3443899267881189, + "grad_norm": 21469.244140625, + "learning_rate": 8.37725670916374e-05, + "loss": 0.4506, + "step": 66750 + }, + { + "epoch": 0.34464789677073177, + "grad_norm": 19195.431640625, + "learning_rate": 8.374562058339528e-05, + "loss": 0.4494, + "step": 66800 + }, + { + "epoch": 0.34490586675334456, + "grad_norm": 21464.130859375, + "learning_rate": 8.371865606239433e-05, + "loss": 0.4552, + "step": 66850 + }, + { + "epoch": 0.3451638367359574, + "grad_norm": 23449.76953125, + "learning_rate": 8.36916735430277e-05, + "loss": 0.4513, + "step": 66900 + }, + { + "epoch": 0.3454218067185702, + "grad_norm": 20593.39453125, + "learning_rate": 8.366467303969814e-05, + "loss": 0.447, + "step": 66950 + }, + { + "epoch": 0.34567977670118305, + "grad_norm": 21341.72265625, + "learning_rate": 8.3637654566818e-05, + "loss": 0.4448, + "step": 67000 + }, + { + "epoch": 0.3459377466837959, + "grad_norm": 20746.919921875, + "learning_rate": 8.361061813880919e-05, + "loss": 0.4511, + "step": 67050 + }, + { + "epoch": 0.3461957166664087, + "grad_norm": 19786.162109375, + "learning_rate": 8.358356377010325e-05, + "loss": 0.452, + "step": 67100 + }, + { + "epoch": 0.34645368664902154, + "grad_norm": 20875.25, + "learning_rate": 8.355649147514128e-05, + "loss": 0.4491, + "step": 67150 + }, + { + "epoch": 0.34671165663163434, + "grad_norm": 22833.728515625, + "learning_rate": 8.352940126837394e-05, + "loss": 0.4545, + "step": 67200 + }, + { + "epoch": 0.3469696266142472, + "grad_norm": 21289.896484375, + "learning_rate": 8.350229316426146e-05, + "loss": 0.4451, + "step": 67250 + }, + { + "epoch": 0.34722759659686, + "grad_norm": 23276.080078125, + "learning_rate": 8.347516717727363e-05, + "loss": 0.4468, + "step": 67300 + }, + { + "epoch": 0.3474855665794728, + "grad_norm": 22568.234375, + "learning_rate": 8.344802332188977e-05, + "loss": 0.4455, + "step": 67350 + }, + { + "epoch": 0.3477435365620856, + "grad_norm": 19527.234375, + "learning_rate": 8.342086161259874e-05, + "loss": 0.4511, + "step": 67400 + }, + { + "epoch": 0.34800150654469847, + "grad_norm": 21764.56640625, + "learning_rate": 8.339368206389895e-05, + "loss": 0.4481, + "step": 67450 + }, + { + "epoch": 0.34825947652731126, + "grad_norm": 21142.33984375, + "learning_rate": 8.336648469029829e-05, + "loss": 0.4539, + "step": 67500 + }, + { + "epoch": 0.3485174465099241, + "grad_norm": 21612.60546875, + "learning_rate": 8.333926950631421e-05, + "loss": 0.4497, + "step": 67550 + }, + { + "epoch": 0.3487754164925369, + "grad_norm": 20772.0390625, + "learning_rate": 8.331203652647364e-05, + "loss": 0.458, + "step": 67600 + }, + { + "epoch": 0.34903338647514975, + "grad_norm": 22197.166015625, + "learning_rate": 8.328478576531303e-05, + "loss": 0.4499, + "step": 67650 + }, + { + "epoch": 0.34929135645776255, + "grad_norm": 20853.865234375, + "learning_rate": 8.32575172373783e-05, + "loss": 0.4473, + "step": 67700 + }, + { + "epoch": 0.3495493264403754, + "grad_norm": 19692.892578125, + "learning_rate": 8.323023095722486e-05, + "loss": 0.4516, + "step": 67750 + }, + { + "epoch": 0.34980729642298825, + "grad_norm": 22032.115234375, + "learning_rate": 8.32029269394176e-05, + "loss": 0.4452, + "step": 67800 + }, + { + "epoch": 0.35006526640560104, + "grad_norm": 23928.783203125, + "learning_rate": 8.317560519853089e-05, + "loss": 0.4489, + "step": 67850 + }, + { + "epoch": 0.3503232363882139, + "grad_norm": 20832.560546875, + "learning_rate": 8.314826574914853e-05, + "loss": 0.4493, + "step": 67900 + }, + { + "epoch": 0.3505812063708267, + "grad_norm": 23453.634765625, + "learning_rate": 8.31209086058638e-05, + "loss": 0.4487, + "step": 67950 + }, + { + "epoch": 0.35083917635343953, + "grad_norm": 23585.826171875, + "learning_rate": 8.309353378327938e-05, + "loss": 0.4473, + "step": 68000 + }, + { + "epoch": 0.3510971463360523, + "grad_norm": 21680.953125, + "learning_rate": 8.306614129600745e-05, + "loss": 0.4494, + "step": 68050 + }, + { + "epoch": 0.3513551163186652, + "grad_norm": 19228.56640625, + "learning_rate": 8.303873115866958e-05, + "loss": 0.4483, + "step": 68100 + }, + { + "epoch": 0.35161308630127797, + "grad_norm": 22056.6328125, + "learning_rate": 8.301130338589679e-05, + "loss": 0.4485, + "step": 68150 + }, + { + "epoch": 0.3518710562838908, + "grad_norm": 22030.484375, + "learning_rate": 8.298385799232947e-05, + "loss": 0.4462, + "step": 68200 + }, + { + "epoch": 0.3521290262665036, + "grad_norm": 19658.33984375, + "learning_rate": 8.295639499261745e-05, + "loss": 0.4444, + "step": 68250 + }, + { + "epoch": 0.35238699624911646, + "grad_norm": 19667.8125, + "learning_rate": 8.292891440141997e-05, + "loss": 0.4482, + "step": 68300 + }, + { + "epoch": 0.35264496623172925, + "grad_norm": 20248.193359375, + "learning_rate": 8.290141623340558e-05, + "loss": 0.454, + "step": 68350 + }, + { + "epoch": 0.3529029362143421, + "grad_norm": 21358.89453125, + "learning_rate": 8.287390050325232e-05, + "loss": 0.4485, + "step": 68400 + }, + { + "epoch": 0.35316090619695495, + "grad_norm": 19209.328125, + "learning_rate": 8.284636722564754e-05, + "loss": 0.4505, + "step": 68450 + }, + { + "epoch": 0.35341887617956774, + "grad_norm": 21890.7109375, + "learning_rate": 8.281881641528795e-05, + "loss": 0.4531, + "step": 68500 + }, + { + "epoch": 0.3536768461621806, + "grad_norm": 20904.052734375, + "learning_rate": 8.279124808687967e-05, + "loss": 0.4494, + "step": 68550 + }, + { + "epoch": 0.3539348161447934, + "grad_norm": 22519.888671875, + "learning_rate": 8.276366225513812e-05, + "loss": 0.4422, + "step": 68600 + }, + { + "epoch": 0.35419278612740623, + "grad_norm": 20027.009765625, + "learning_rate": 8.27360589347881e-05, + "loss": 0.4484, + "step": 68650 + }, + { + "epoch": 0.354450756110019, + "grad_norm": 22069.64453125, + "learning_rate": 8.27084381405637e-05, + "loss": 0.443, + "step": 68700 + }, + { + "epoch": 0.3547087260926319, + "grad_norm": 23096.74609375, + "learning_rate": 8.26807998872084e-05, + "loss": 0.4437, + "step": 68750 + }, + { + "epoch": 0.35496669607524467, + "grad_norm": 19204.626953125, + "learning_rate": 8.265314418947494e-05, + "loss": 0.4496, + "step": 68800 + }, + { + "epoch": 0.3552246660578575, + "grad_norm": 26871.888671875, + "learning_rate": 8.262547106212541e-05, + "loss": 0.446, + "step": 68850 + }, + { + "epoch": 0.3554826360404703, + "grad_norm": 21342.556640625, + "learning_rate": 8.259778051993118e-05, + "loss": 0.4525, + "step": 68900 + }, + { + "epoch": 0.35574060602308316, + "grad_norm": 23054.814453125, + "learning_rate": 8.25700725776729e-05, + "loss": 0.4427, + "step": 68950 + }, + { + "epoch": 0.35599857600569595, + "grad_norm": 20473.818359375, + "learning_rate": 8.254234725014061e-05, + "loss": 0.4452, + "step": 69000 + }, + { + "epoch": 0.3562565459883088, + "grad_norm": 22081.576171875, + "learning_rate": 8.251460455213347e-05, + "loss": 0.4533, + "step": 69050 + }, + { + "epoch": 0.3565145159709216, + "grad_norm": 21840.048828125, + "learning_rate": 8.248684449846004e-05, + "loss": 0.4503, + "step": 69100 + }, + { + "epoch": 0.35677248595353445, + "grad_norm": 21595.234375, + "learning_rate": 8.245906710393808e-05, + "loss": 0.4459, + "step": 69150 + }, + { + "epoch": 0.3570304559361473, + "grad_norm": 22540.302734375, + "learning_rate": 8.243127238339463e-05, + "loss": 0.4461, + "step": 69200 + }, + { + "epoch": 0.3572884259187601, + "grad_norm": 20646.5859375, + "learning_rate": 8.2403460351666e-05, + "loss": 0.4522, + "step": 69250 + }, + { + "epoch": 0.35754639590137294, + "grad_norm": 20219.978515625, + "learning_rate": 8.237563102359767e-05, + "loss": 0.4464, + "step": 69300 + }, + { + "epoch": 0.35780436588398573, + "grad_norm": 21399.888671875, + "learning_rate": 8.234778441404441e-05, + "loss": 0.451, + "step": 69350 + }, + { + "epoch": 0.3580623358665986, + "grad_norm": 23263.193359375, + "learning_rate": 8.231992053787024e-05, + "loss": 0.4491, + "step": 69400 + }, + { + "epoch": 0.3583203058492114, + "grad_norm": 20740.455078125, + "learning_rate": 8.229203940994829e-05, + "loss": 0.4456, + "step": 69450 + }, + { + "epoch": 0.3585782758318242, + "grad_norm": 21715.078125, + "learning_rate": 8.226414104516102e-05, + "loss": 0.4467, + "step": 69500 + }, + { + "epoch": 0.358836245814437, + "grad_norm": 19771.517578125, + "learning_rate": 8.223622545840001e-05, + "loss": 0.4505, + "step": 69550 + }, + { + "epoch": 0.35909421579704986, + "grad_norm": 20944.298828125, + "learning_rate": 8.220829266456608e-05, + "loss": 0.4481, + "step": 69600 + }, + { + "epoch": 0.35935218577966266, + "grad_norm": 22313.017578125, + "learning_rate": 8.21803426785692e-05, + "loss": 0.4503, + "step": 69650 + }, + { + "epoch": 0.3596101557622755, + "grad_norm": 22525.5859375, + "learning_rate": 8.215237551532853e-05, + "loss": 0.4488, + "step": 69700 + }, + { + "epoch": 0.3598681257448883, + "grad_norm": 22731.85546875, + "learning_rate": 8.21243911897724e-05, + "loss": 0.4476, + "step": 69750 + }, + { + "epoch": 0.36012609572750115, + "grad_norm": 20872.9375, + "learning_rate": 8.20963897168383e-05, + "loss": 0.4485, + "step": 69800 + }, + { + "epoch": 0.360384065710114, + "grad_norm": 21066.095703125, + "learning_rate": 8.206837111147289e-05, + "loss": 0.4511, + "step": 69850 + }, + { + "epoch": 0.3606420356927268, + "grad_norm": 21823.62890625, + "learning_rate": 8.204033538863197e-05, + "loss": 0.4415, + "step": 69900 + }, + { + "epoch": 0.36090000567533964, + "grad_norm": 19639.724609375, + "learning_rate": 8.201228256328042e-05, + "loss": 0.4456, + "step": 69950 + }, + { + "epoch": 0.36115797565795243, + "grad_norm": 25321.20703125, + "learning_rate": 8.198421265039231e-05, + "loss": 0.4506, + "step": 70000 + }, + { + "epoch": 0.36115797565795243, + "eval_loss": 0.43597322702407837, + "eval_runtime": 3285.9769, + "eval_samples_per_second": 943.744, + "eval_steps_per_second": 1.843, + "step": 70000 + }, + { + "epoch": 0.3614159456405653, + "grad_norm": 19558.943359375, + "learning_rate": 8.195612566495084e-05, + "loss": 0.4502, + "step": 70050 + }, + { + "epoch": 0.3616739156231781, + "grad_norm": 21766.482421875, + "learning_rate": 8.192802162194828e-05, + "loss": 0.4444, + "step": 70100 + }, + { + "epoch": 0.3619318856057909, + "grad_norm": 23117.017578125, + "learning_rate": 8.189990053638603e-05, + "loss": 0.4476, + "step": 70150 + }, + { + "epoch": 0.3621898555884037, + "grad_norm": 19175.60546875, + "learning_rate": 8.18717624232746e-05, + "loss": 0.4479, + "step": 70200 + }, + { + "epoch": 0.36244782557101657, + "grad_norm": 22124.80078125, + "learning_rate": 8.184360729763351e-05, + "loss": 0.449, + "step": 70250 + }, + { + "epoch": 0.36270579555362936, + "grad_norm": 21717.501953125, + "learning_rate": 8.181543517449147e-05, + "loss": 0.4488, + "step": 70300 + }, + { + "epoch": 0.3629637655362422, + "grad_norm": 20235.162109375, + "learning_rate": 8.178724606888621e-05, + "loss": 0.4496, + "step": 70350 + }, + { + "epoch": 0.363221735518855, + "grad_norm": 22513.677734375, + "learning_rate": 8.175903999586455e-05, + "loss": 0.4463, + "step": 70400 + }, + { + "epoch": 0.36347970550146785, + "grad_norm": 21388.1953125, + "learning_rate": 8.173081697048228e-05, + "loss": 0.4446, + "step": 70450 + }, + { + "epoch": 0.36373767548408065, + "grad_norm": 20549.271484375, + "learning_rate": 8.170257700780435e-05, + "loss": 0.4421, + "step": 70500 + }, + { + "epoch": 0.3639956454666935, + "grad_norm": 21219.158203125, + "learning_rate": 8.16743201229047e-05, + "loss": 0.4472, + "step": 70550 + }, + { + "epoch": 0.36425361544930634, + "grad_norm": 20570.34375, + "learning_rate": 8.164604633086632e-05, + "loss": 0.4487, + "step": 70600 + }, + { + "epoch": 0.36451158543191914, + "grad_norm": 17376.671875, + "learning_rate": 8.161775564678118e-05, + "loss": 0.4413, + "step": 70650 + }, + { + "epoch": 0.364769555414532, + "grad_norm": 21676.33984375, + "learning_rate": 8.158944808575032e-05, + "loss": 0.4433, + "step": 70700 + }, + { + "epoch": 0.3650275253971448, + "grad_norm": 21901.001953125, + "learning_rate": 8.156112366288378e-05, + "loss": 0.4465, + "step": 70750 + }, + { + "epoch": 0.36528549537975763, + "grad_norm": 20330.720703125, + "learning_rate": 8.153278239330056e-05, + "loss": 0.4456, + "step": 70800 + }, + { + "epoch": 0.3655434653623704, + "grad_norm": 22179.904296875, + "learning_rate": 8.15044242921287e-05, + "loss": 0.4465, + "step": 70850 + }, + { + "epoch": 0.3658014353449833, + "grad_norm": 21384.66015625, + "learning_rate": 8.14760493745052e-05, + "loss": 0.4476, + "step": 70900 + }, + { + "epoch": 0.36605940532759607, + "grad_norm": 21706.103515625, + "learning_rate": 8.144765765557604e-05, + "loss": 0.4475, + "step": 70950 + }, + { + "epoch": 0.3663173753102089, + "grad_norm": 20332.5, + "learning_rate": 8.141924915049617e-05, + "loss": 0.449, + "step": 71000 + }, + { + "epoch": 0.3665753452928217, + "grad_norm": 22648.640625, + "learning_rate": 8.139082387442951e-05, + "loss": 0.4566, + "step": 71050 + }, + { + "epoch": 0.36683331527543456, + "grad_norm": 21496.291015625, + "learning_rate": 8.136238184254892e-05, + "loss": 0.4493, + "step": 71100 + }, + { + "epoch": 0.36709128525804735, + "grad_norm": 22114.169921875, + "learning_rate": 8.133392307003618e-05, + "loss": 0.4441, + "step": 71150 + }, + { + "epoch": 0.3673492552406602, + "grad_norm": 22476.390625, + "learning_rate": 8.130544757208205e-05, + "loss": 0.4391, + "step": 71200 + }, + { + "epoch": 0.367607225223273, + "grad_norm": 22175.044921875, + "learning_rate": 8.127695536388623e-05, + "loss": 0.4439, + "step": 71250 + }, + { + "epoch": 0.36786519520588584, + "grad_norm": 19715.728515625, + "learning_rate": 8.124844646065724e-05, + "loss": 0.448, + "step": 71300 + }, + { + "epoch": 0.3681231651884987, + "grad_norm": 19609.146484375, + "learning_rate": 8.121992087761266e-05, + "loss": 0.4476, + "step": 71350 + }, + { + "epoch": 0.3683811351711115, + "grad_norm": 21872.12890625, + "learning_rate": 8.119137862997883e-05, + "loss": 0.4536, + "step": 71400 + }, + { + "epoch": 0.36863910515372433, + "grad_norm": 19710.619140625, + "learning_rate": 8.116281973299107e-05, + "loss": 0.4466, + "step": 71450 + }, + { + "epoch": 0.3688970751363371, + "grad_norm": 21783.138671875, + "learning_rate": 8.113424420189357e-05, + "loss": 0.4422, + "step": 71500 + }, + { + "epoch": 0.36915504511895, + "grad_norm": 20527.984375, + "learning_rate": 8.110565205193941e-05, + "loss": 0.4499, + "step": 71550 + }, + { + "epoch": 0.36941301510156277, + "grad_norm": 21693.171875, + "learning_rate": 8.10770432983905e-05, + "loss": 0.4465, + "step": 71600 + }, + { + "epoch": 0.3696709850841756, + "grad_norm": 19817.142578125, + "learning_rate": 8.104841795651765e-05, + "loss": 0.4471, + "step": 71650 + }, + { + "epoch": 0.3699289550667884, + "grad_norm": 20883.767578125, + "learning_rate": 8.101977604160052e-05, + "loss": 0.4507, + "step": 71700 + }, + { + "epoch": 0.37018692504940126, + "grad_norm": 21206.943359375, + "learning_rate": 8.099111756892759e-05, + "loss": 0.4415, + "step": 71750 + }, + { + "epoch": 0.37044489503201405, + "grad_norm": 21431.19140625, + "learning_rate": 8.096244255379621e-05, + "loss": 0.4542, + "step": 71800 + }, + { + "epoch": 0.3707028650146269, + "grad_norm": 23020.34375, + "learning_rate": 8.093375101151255e-05, + "loss": 0.4481, + "step": 71850 + }, + { + "epoch": 0.3709608349972397, + "grad_norm": 20704.1171875, + "learning_rate": 8.09050429573916e-05, + "loss": 0.4427, + "step": 71900 + }, + { + "epoch": 0.37121880497985255, + "grad_norm": 20195.037109375, + "learning_rate": 8.087631840675715e-05, + "loss": 0.4416, + "step": 71950 + }, + { + "epoch": 0.3714767749624654, + "grad_norm": 21187.99609375, + "learning_rate": 8.084757737494184e-05, + "loss": 0.452, + "step": 72000 + }, + { + "epoch": 0.3717347449450782, + "grad_norm": 20694.912109375, + "learning_rate": 8.081881987728703e-05, + "loss": 0.4416, + "step": 72050 + }, + { + "epoch": 0.37199271492769104, + "grad_norm": 23006.939453125, + "learning_rate": 8.079004592914297e-05, + "loss": 0.4426, + "step": 72100 + }, + { + "epoch": 0.37225068491030383, + "grad_norm": 21854.025390625, + "learning_rate": 8.076125554586859e-05, + "loss": 0.4453, + "step": 72150 + }, + { + "epoch": 0.3725086548929167, + "grad_norm": 19155.400390625, + "learning_rate": 8.073244874283166e-05, + "loss": 0.4539, + "step": 72200 + }, + { + "epoch": 0.3727666248755295, + "grad_norm": 22085.5625, + "learning_rate": 8.070362553540869e-05, + "loss": 0.4474, + "step": 72250 + }, + { + "epoch": 0.3730245948581423, + "grad_norm": 21225.626953125, + "learning_rate": 8.067478593898495e-05, + "loss": 0.4431, + "step": 72300 + }, + { + "epoch": 0.3732825648407551, + "grad_norm": 21605.546875, + "learning_rate": 8.064592996895446e-05, + "loss": 0.4534, + "step": 72350 + }, + { + "epoch": 0.37354053482336796, + "grad_norm": 20774.87109375, + "learning_rate": 8.061705764071999e-05, + "loss": 0.4462, + "step": 72400 + }, + { + "epoch": 0.37379850480598076, + "grad_norm": 21871.390625, + "learning_rate": 8.0588168969693e-05, + "loss": 0.4445, + "step": 72450 + }, + { + "epoch": 0.3740564747885936, + "grad_norm": 22102.560546875, + "learning_rate": 8.05592639712937e-05, + "loss": 0.4478, + "step": 72500 + }, + { + "epoch": 0.3743144447712064, + "grad_norm": 21172.283203125, + "learning_rate": 8.053034266095105e-05, + "loss": 0.4469, + "step": 72550 + }, + { + "epoch": 0.37457241475381925, + "grad_norm": 21827.390625, + "learning_rate": 8.050140505410268e-05, + "loss": 0.4485, + "step": 72600 + }, + { + "epoch": 0.37483038473643204, + "grad_norm": 21271.87890625, + "learning_rate": 8.047245116619492e-05, + "loss": 0.45, + "step": 72650 + }, + { + "epoch": 0.3750883547190449, + "grad_norm": 21192.6484375, + "learning_rate": 8.04434810126828e-05, + "loss": 0.442, + "step": 72700 + }, + { + "epoch": 0.37534632470165774, + "grad_norm": 21529.736328125, + "learning_rate": 8.041449460903001e-05, + "loss": 0.4462, + "step": 72750 + }, + { + "epoch": 0.37560429468427053, + "grad_norm": 18609.474609375, + "learning_rate": 8.038549197070893e-05, + "loss": 0.4436, + "step": 72800 + }, + { + "epoch": 0.3758622646668834, + "grad_norm": 21631.82421875, + "learning_rate": 8.035647311320062e-05, + "loss": 0.4507, + "step": 72850 + }, + { + "epoch": 0.3761202346494962, + "grad_norm": 22347.056640625, + "learning_rate": 8.03274380519948e-05, + "loss": 0.4472, + "step": 72900 + }, + { + "epoch": 0.376378204632109, + "grad_norm": 20416.37109375, + "learning_rate": 8.029838680258979e-05, + "loss": 0.4475, + "step": 72950 + }, + { + "epoch": 0.3766361746147218, + "grad_norm": 21952.27734375, + "learning_rate": 8.026931938049259e-05, + "loss": 0.4449, + "step": 73000 + }, + { + "epoch": 0.37689414459733467, + "grad_norm": 23068.12109375, + "learning_rate": 8.024023580121885e-05, + "loss": 0.4477, + "step": 73050 + }, + { + "epoch": 0.37715211457994746, + "grad_norm": 21956.462890625, + "learning_rate": 8.021113608029281e-05, + "loss": 0.4459, + "step": 73100 + }, + { + "epoch": 0.3774100845625603, + "grad_norm": 20933.28125, + "learning_rate": 8.018202023324733e-05, + "loss": 0.4481, + "step": 73150 + }, + { + "epoch": 0.3776680545451731, + "grad_norm": 23138.638671875, + "learning_rate": 8.015288827562389e-05, + "loss": 0.437, + "step": 73200 + }, + { + "epoch": 0.37792602452778595, + "grad_norm": 20973.119140625, + "learning_rate": 8.012374022297255e-05, + "loss": 0.4454, + "step": 73250 + }, + { + "epoch": 0.37818399451039875, + "grad_norm": 21328.29296875, + "learning_rate": 8.0094576090852e-05, + "loss": 0.4426, + "step": 73300 + }, + { + "epoch": 0.3784419644930116, + "grad_norm": 20653.591796875, + "learning_rate": 8.006539589482949e-05, + "loss": 0.4448, + "step": 73350 + }, + { + "epoch": 0.3786999344756244, + "grad_norm": 21520.181640625, + "learning_rate": 8.003619965048083e-05, + "loss": 0.4428, + "step": 73400 + }, + { + "epoch": 0.37895790445823724, + "grad_norm": 20736.89453125, + "learning_rate": 8.000698737339041e-05, + "loss": 0.4483, + "step": 73450 + }, + { + "epoch": 0.3792158744408501, + "grad_norm": 23887.587890625, + "learning_rate": 7.997775907915118e-05, + "loss": 0.4518, + "step": 73500 + }, + { + "epoch": 0.3794738444234629, + "grad_norm": 23771.8671875, + "learning_rate": 7.994851478336465e-05, + "loss": 0.4479, + "step": 73550 + }, + { + "epoch": 0.37973181440607573, + "grad_norm": 21563.27734375, + "learning_rate": 7.991925450164084e-05, + "loss": 0.4433, + "step": 73600 + }, + { + "epoch": 0.3799897843886885, + "grad_norm": 21403.751953125, + "learning_rate": 7.988997824959832e-05, + "loss": 0.4443, + "step": 73650 + }, + { + "epoch": 0.38024775437130137, + "grad_norm": 22136.51171875, + "learning_rate": 7.986068604286421e-05, + "loss": 0.446, + "step": 73700 + }, + { + "epoch": 0.38050572435391417, + "grad_norm": 22143.857421875, + "learning_rate": 7.98313778970741e-05, + "loss": 0.4416, + "step": 73750 + }, + { + "epoch": 0.380763694336527, + "grad_norm": 22035.1171875, + "learning_rate": 7.980205382787211e-05, + "loss": 0.4413, + "step": 73800 + }, + { + "epoch": 0.3810216643191398, + "grad_norm": 21744.25390625, + "learning_rate": 7.97727138509109e-05, + "loss": 0.4463, + "step": 73850 + }, + { + "epoch": 0.38127963430175266, + "grad_norm": 21739.26171875, + "learning_rate": 7.974335798185153e-05, + "loss": 0.4415, + "step": 73900 + }, + { + "epoch": 0.38153760428436545, + "grad_norm": 20974.59765625, + "learning_rate": 7.971398623636361e-05, + "loss": 0.4457, + "step": 73950 + }, + { + "epoch": 0.3817955742669783, + "grad_norm": 19807.79296875, + "learning_rate": 7.968459863012523e-05, + "loss": 0.4423, + "step": 74000 + }, + { + "epoch": 0.3820535442495911, + "grad_norm": 21711.158203125, + "learning_rate": 7.96551951788229e-05, + "loss": 0.4466, + "step": 74050 + }, + { + "epoch": 0.38231151423220394, + "grad_norm": 19187.47265625, + "learning_rate": 7.962577589815163e-05, + "loss": 0.4387, + "step": 74100 + }, + { + "epoch": 0.3825694842148168, + "grad_norm": 19402.611328125, + "learning_rate": 7.959634080381486e-05, + "loss": 0.444, + "step": 74150 + }, + { + "epoch": 0.3828274541974296, + "grad_norm": 21287.9765625, + "learning_rate": 7.956688991152445e-05, + "loss": 0.4386, + "step": 74200 + }, + { + "epoch": 0.38308542418004243, + "grad_norm": 20430.591796875, + "learning_rate": 7.953742323700075e-05, + "loss": 0.4453, + "step": 74250 + }, + { + "epoch": 0.3833433941626552, + "grad_norm": 23246.041015625, + "learning_rate": 7.950794079597248e-05, + "loss": 0.4448, + "step": 74300 + }, + { + "epoch": 0.3836013641452681, + "grad_norm": 23098.74609375, + "learning_rate": 7.94784426041768e-05, + "loss": 0.4449, + "step": 74350 + }, + { + "epoch": 0.38385933412788087, + "grad_norm": 21504.71484375, + "learning_rate": 7.944892867735929e-05, + "loss": 0.4423, + "step": 74400 + }, + { + "epoch": 0.3841173041104937, + "grad_norm": 20115.0859375, + "learning_rate": 7.941939903127386e-05, + "loss": 0.4462, + "step": 74450 + }, + { + "epoch": 0.3843752740931065, + "grad_norm": 20473.681640625, + "learning_rate": 7.938985368168293e-05, + "loss": 0.4541, + "step": 74500 + }, + { + "epoch": 0.38463324407571936, + "grad_norm": 19664.6640625, + "learning_rate": 7.93602926443572e-05, + "loss": 0.4439, + "step": 74550 + }, + { + "epoch": 0.38489121405833215, + "grad_norm": 20806.474609375, + "learning_rate": 7.933071593507579e-05, + "loss": 0.439, + "step": 74600 + }, + { + "epoch": 0.385149184040945, + "grad_norm": 20905.197265625, + "learning_rate": 7.930112356962618e-05, + "loss": 0.444, + "step": 74650 + }, + { + "epoch": 0.3854071540235578, + "grad_norm": 26333.470703125, + "learning_rate": 7.927151556380417e-05, + "loss": 0.4462, + "step": 74700 + }, + { + "epoch": 0.38566512400617065, + "grad_norm": 20478.18359375, + "learning_rate": 7.924189193341396e-05, + "loss": 0.4456, + "step": 74750 + }, + { + "epoch": 0.38592309398878344, + "grad_norm": 20605.662109375, + "learning_rate": 7.921225269426808e-05, + "loss": 0.4412, + "step": 74800 + }, + { + "epoch": 0.3861810639713963, + "grad_norm": 23029.943359375, + "learning_rate": 7.918259786218738e-05, + "loss": 0.4427, + "step": 74850 + }, + { + "epoch": 0.38643903395400914, + "grad_norm": 23275.130859375, + "learning_rate": 7.915292745300103e-05, + "loss": 0.4436, + "step": 74900 + }, + { + "epoch": 0.38669700393662193, + "grad_norm": 22123.671875, + "learning_rate": 7.91232414825465e-05, + "loss": 0.4456, + "step": 74950 + }, + { + "epoch": 0.3869549739192348, + "grad_norm": 22476.365234375, + "learning_rate": 7.909353996666961e-05, + "loss": 0.4424, + "step": 75000 + }, + { + "epoch": 0.3869549739192348, + "eval_loss": 0.43277591466903687, + "eval_runtime": 3260.4686, + "eval_samples_per_second": 951.127, + "eval_steps_per_second": 1.858, + "step": 75000 + }, + { + "epoch": 0.3872129439018476, + "grad_norm": 22150.966796875, + "learning_rate": 7.906382292122448e-05, + "loss": 0.4407, + "step": 75050 + }, + { + "epoch": 0.3874709138844604, + "grad_norm": 20100.5625, + "learning_rate": 7.903409036207343e-05, + "loss": 0.4443, + "step": 75100 + }, + { + "epoch": 0.3877288838670732, + "grad_norm": 22078.353515625, + "learning_rate": 7.900434230508715e-05, + "loss": 0.4468, + "step": 75150 + }, + { + "epoch": 0.38798685384968606, + "grad_norm": 20395.498046875, + "learning_rate": 7.897457876614461e-05, + "loss": 0.4424, + "step": 75200 + }, + { + "epoch": 0.38824482383229886, + "grad_norm": 23190.4140625, + "learning_rate": 7.894479976113298e-05, + "loss": 0.4394, + "step": 75250 + }, + { + "epoch": 0.3885027938149117, + "grad_norm": 21523.7265625, + "learning_rate": 7.891500530594771e-05, + "loss": 0.4441, + "step": 75300 + }, + { + "epoch": 0.3887607637975245, + "grad_norm": 22941.23828125, + "learning_rate": 7.888519541649253e-05, + "loss": 0.443, + "step": 75350 + }, + { + "epoch": 0.38901873378013735, + "grad_norm": 21467.90234375, + "learning_rate": 7.885537010867936e-05, + "loss": 0.4478, + "step": 75400 + }, + { + "epoch": 0.38927670376275014, + "grad_norm": 22635.732421875, + "learning_rate": 7.882552939842837e-05, + "loss": 0.4415, + "step": 75450 + }, + { + "epoch": 0.389534673745363, + "grad_norm": 21242.326171875, + "learning_rate": 7.879567330166797e-05, + "loss": 0.4352, + "step": 75500 + }, + { + "epoch": 0.38979264372797584, + "grad_norm": 20005.158203125, + "learning_rate": 7.876580183433475e-05, + "loss": 0.4393, + "step": 75550 + }, + { + "epoch": 0.39005061371058863, + "grad_norm": 23355.044921875, + "learning_rate": 7.873591501237351e-05, + "loss": 0.4465, + "step": 75600 + }, + { + "epoch": 0.3903085836932015, + "grad_norm": 21217.359375, + "learning_rate": 7.870601285173731e-05, + "loss": 0.4437, + "step": 75650 + }, + { + "epoch": 0.3905665536758143, + "grad_norm": 22424.580078125, + "learning_rate": 7.867609536838729e-05, + "loss": 0.4397, + "step": 75700 + }, + { + "epoch": 0.3908245236584271, + "grad_norm": 20943.65234375, + "learning_rate": 7.864616257829285e-05, + "loss": 0.4427, + "step": 75750 + }, + { + "epoch": 0.3910824936410399, + "grad_norm": 23246.5625, + "learning_rate": 7.861621449743152e-05, + "loss": 0.4479, + "step": 75800 + }, + { + "epoch": 0.39134046362365277, + "grad_norm": 21575.830078125, + "learning_rate": 7.858625114178902e-05, + "loss": 0.4384, + "step": 75850 + }, + { + "epoch": 0.39159843360626556, + "grad_norm": 22053.5546875, + "learning_rate": 7.855627252735918e-05, + "loss": 0.4364, + "step": 75900 + }, + { + "epoch": 0.3918564035888784, + "grad_norm": 21934.55078125, + "learning_rate": 7.852627867014406e-05, + "loss": 0.4466, + "step": 75950 + }, + { + "epoch": 0.3921143735714912, + "grad_norm": 20184.078125, + "learning_rate": 7.849626958615374e-05, + "loss": 0.4422, + "step": 76000 + }, + { + "epoch": 0.39237234355410405, + "grad_norm": 21770.923828125, + "learning_rate": 7.846624529140652e-05, + "loss": 0.4382, + "step": 76050 + }, + { + "epoch": 0.39263031353671685, + "grad_norm": 21592.16796875, + "learning_rate": 7.843620580192877e-05, + "loss": 0.4404, + "step": 76100 + }, + { + "epoch": 0.3928882835193297, + "grad_norm": 19634.1875, + "learning_rate": 7.8406151133755e-05, + "loss": 0.4443, + "step": 76150 + }, + { + "epoch": 0.3931462535019425, + "grad_norm": 24045.01171875, + "learning_rate": 7.837608130292782e-05, + "loss": 0.438, + "step": 76200 + }, + { + "epoch": 0.39340422348455534, + "grad_norm": 21739.921875, + "learning_rate": 7.83459963254979e-05, + "loss": 0.4474, + "step": 76250 + }, + { + "epoch": 0.3936621934671682, + "grad_norm": 20915.56640625, + "learning_rate": 7.831589621752405e-05, + "loss": 0.4463, + "step": 76300 + }, + { + "epoch": 0.393920163449781, + "grad_norm": 18799.80078125, + "learning_rate": 7.828578099507308e-05, + "loss": 0.4401, + "step": 76350 + }, + { + "epoch": 0.39417813343239383, + "grad_norm": 19029.51171875, + "learning_rate": 7.825565067421995e-05, + "loss": 0.4428, + "step": 76400 + }, + { + "epoch": 0.3944361034150066, + "grad_norm": 22817.376953125, + "learning_rate": 7.822550527104762e-05, + "loss": 0.4467, + "step": 76450 + }, + { + "epoch": 0.39469407339761947, + "grad_norm": 19165.529296875, + "learning_rate": 7.819534480164713e-05, + "loss": 0.4365, + "step": 76500 + }, + { + "epoch": 0.39495204338023226, + "grad_norm": 22980.056640625, + "learning_rate": 7.816516928211756e-05, + "loss": 0.4386, + "step": 76550 + }, + { + "epoch": 0.3952100133628451, + "grad_norm": 21261.7109375, + "learning_rate": 7.813497872856603e-05, + "loss": 0.4358, + "step": 76600 + }, + { + "epoch": 0.3954679833454579, + "grad_norm": 21533.779296875, + "learning_rate": 7.810477315710763e-05, + "loss": 0.4444, + "step": 76650 + }, + { + "epoch": 0.39572595332807076, + "grad_norm": 20503.556640625, + "learning_rate": 7.807455258386556e-05, + "loss": 0.4446, + "step": 76700 + }, + { + "epoch": 0.39598392331068355, + "grad_norm": 21180.939453125, + "learning_rate": 7.804431702497093e-05, + "loss": 0.4486, + "step": 76750 + }, + { + "epoch": 0.3962418932932964, + "grad_norm": 24126.484375, + "learning_rate": 7.801406649656294e-05, + "loss": 0.4419, + "step": 76800 + }, + { + "epoch": 0.3964998632759092, + "grad_norm": 19791.345703125, + "learning_rate": 7.79838010147887e-05, + "loss": 0.4499, + "step": 76850 + }, + { + "epoch": 0.39675783325852204, + "grad_norm": 21118.822265625, + "learning_rate": 7.795352059580334e-05, + "loss": 0.4403, + "step": 76900 + }, + { + "epoch": 0.39701580324113483, + "grad_norm": 20787.6015625, + "learning_rate": 7.792322525577e-05, + "loss": 0.4394, + "step": 76950 + }, + { + "epoch": 0.3972737732237477, + "grad_norm": 21575.86328125, + "learning_rate": 7.789291501085972e-05, + "loss": 0.4482, + "step": 77000 + }, + { + "epoch": 0.39753174320636053, + "grad_norm": 21271.287109375, + "learning_rate": 7.78625898772515e-05, + "loss": 0.4413, + "step": 77050 + }, + { + "epoch": 0.3977897131889733, + "grad_norm": 21294.7890625, + "learning_rate": 7.783224987113235e-05, + "loss": 0.4393, + "step": 77100 + }, + { + "epoch": 0.3980476831715862, + "grad_norm": 21880.341796875, + "learning_rate": 7.780189500869716e-05, + "loss": 0.4464, + "step": 77150 + }, + { + "epoch": 0.39830565315419897, + "grad_norm": 22501.482421875, + "learning_rate": 7.777152530614876e-05, + "loss": 0.4384, + "step": 77200 + }, + { + "epoch": 0.3985636231368118, + "grad_norm": 20404.89453125, + "learning_rate": 7.774114077969792e-05, + "loss": 0.4355, + "step": 77250 + }, + { + "epoch": 0.3988215931194246, + "grad_norm": 21435.66015625, + "learning_rate": 7.77107414455633e-05, + "loss": 0.4468, + "step": 77300 + }, + { + "epoch": 0.39907956310203746, + "grad_norm": 20239.091796875, + "learning_rate": 7.768032731997148e-05, + "loss": 0.4453, + "step": 77350 + }, + { + "epoch": 0.39933753308465025, + "grad_norm": 19040.37109375, + "learning_rate": 7.764989841915694e-05, + "loss": 0.4487, + "step": 77400 + }, + { + "epoch": 0.3995955030672631, + "grad_norm": 22501.13671875, + "learning_rate": 7.761945475936203e-05, + "loss": 0.4488, + "step": 77450 + }, + { + "epoch": 0.3998534730498759, + "grad_norm": 20773.27734375, + "learning_rate": 7.7588996356837e-05, + "loss": 0.4384, + "step": 77500 + }, + { + "epoch": 0.40011144303248874, + "grad_norm": 22598.4140625, + "learning_rate": 7.755852322783994e-05, + "loss": 0.4358, + "step": 77550 + }, + { + "epoch": 0.40036941301510154, + "grad_norm": 20656.033203125, + "learning_rate": 7.752803538863683e-05, + "loss": 0.4434, + "step": 77600 + }, + { + "epoch": 0.4006273829977144, + "grad_norm": 20882.3125, + "learning_rate": 7.749753285550146e-05, + "loss": 0.4408, + "step": 77650 + }, + { + "epoch": 0.40088535298032724, + "grad_norm": 19519.408203125, + "learning_rate": 7.746701564471553e-05, + "loss": 0.439, + "step": 77700 + }, + { + "epoch": 0.40114332296294003, + "grad_norm": 21141.80859375, + "learning_rate": 7.74364837725685e-05, + "loss": 0.4422, + "step": 77750 + }, + { + "epoch": 0.4014012929455529, + "grad_norm": 21487.45703125, + "learning_rate": 7.74059372553577e-05, + "loss": 0.429, + "step": 77800 + }, + { + "epoch": 0.4016592629281657, + "grad_norm": 19889.447265625, + "learning_rate": 7.737537610938829e-05, + "loss": 0.4474, + "step": 77850 + }, + { + "epoch": 0.4019172329107785, + "grad_norm": 21914.947265625, + "learning_rate": 7.73448003509732e-05, + "loss": 0.4403, + "step": 77900 + }, + { + "epoch": 0.4021752028933913, + "grad_norm": 24025.521484375, + "learning_rate": 7.731420999643319e-05, + "loss": 0.4432, + "step": 77950 + }, + { + "epoch": 0.40243317287600416, + "grad_norm": 19703.50390625, + "learning_rate": 7.728360506209679e-05, + "loss": 0.443, + "step": 78000 + }, + { + "epoch": 0.40269114285861696, + "grad_norm": 21566.37890625, + "learning_rate": 7.725298556430034e-05, + "loss": 0.448, + "step": 78050 + }, + { + "epoch": 0.4029491128412298, + "grad_norm": 21902.564453125, + "learning_rate": 7.72223515193879e-05, + "loss": 0.438, + "step": 78100 + }, + { + "epoch": 0.4032070828238426, + "grad_norm": 20892.7578125, + "learning_rate": 7.719170294371136e-05, + "loss": 0.4382, + "step": 78150 + }, + { + "epoch": 0.40346505280645545, + "grad_norm": 21648.673828125, + "learning_rate": 7.716103985363033e-05, + "loss": 0.4378, + "step": 78200 + }, + { + "epoch": 0.40372302278906824, + "grad_norm": 23124.40625, + "learning_rate": 7.713036226551215e-05, + "loss": 0.442, + "step": 78250 + }, + { + "epoch": 0.4039809927716811, + "grad_norm": 25006.751953125, + "learning_rate": 7.709967019573195e-05, + "loss": 0.4397, + "step": 78300 + }, + { + "epoch": 0.4042389627542939, + "grad_norm": 20722.802734375, + "learning_rate": 7.706896366067256e-05, + "loss": 0.4388, + "step": 78350 + }, + { + "epoch": 0.40449693273690673, + "grad_norm": 20202.013671875, + "learning_rate": 7.703824267672452e-05, + "loss": 0.4404, + "step": 78400 + }, + { + "epoch": 0.4047549027195196, + "grad_norm": 21261.9375, + "learning_rate": 7.700750726028609e-05, + "loss": 0.4369, + "step": 78450 + }, + { + "epoch": 0.4050128727021324, + "grad_norm": 25343.57421875, + "learning_rate": 7.69767574277633e-05, + "loss": 0.4444, + "step": 78500 + }, + { + "epoch": 0.4052708426847452, + "grad_norm": 20222.767578125, + "learning_rate": 7.694599319556972e-05, + "loss": 0.4425, + "step": 78550 + }, + { + "epoch": 0.405528812667358, + "grad_norm": 22934.466796875, + "learning_rate": 7.691521458012678e-05, + "loss": 0.4411, + "step": 78600 + }, + { + "epoch": 0.40578678264997087, + "grad_norm": 22235.30078125, + "learning_rate": 7.688442159786346e-05, + "loss": 0.4445, + "step": 78650 + }, + { + "epoch": 0.40604475263258366, + "grad_norm": 21313.986328125, + "learning_rate": 7.68536142652165e-05, + "loss": 0.4341, + "step": 78700 + }, + { + "epoch": 0.4063027226151965, + "grad_norm": 20130.53515625, + "learning_rate": 7.68227925986302e-05, + "loss": 0.4395, + "step": 78750 + }, + { + "epoch": 0.4065606925978093, + "grad_norm": 19342.740234375, + "learning_rate": 7.679195661455664e-05, + "loss": 0.4424, + "step": 78800 + }, + { + "epoch": 0.40681866258042215, + "grad_norm": 21876.705078125, + "learning_rate": 7.676110632945543e-05, + "loss": 0.4415, + "step": 78850 + }, + { + "epoch": 0.40707663256303495, + "grad_norm": 23199.501953125, + "learning_rate": 7.673024175979384e-05, + "loss": 0.4423, + "step": 78900 + }, + { + "epoch": 0.4073346025456478, + "grad_norm": 22781.091796875, + "learning_rate": 7.669936292204683e-05, + "loss": 0.4398, + "step": 78950 + }, + { + "epoch": 0.4075925725282606, + "grad_norm": 24025.9375, + "learning_rate": 7.666846983269688e-05, + "loss": 0.4326, + "step": 79000 + }, + { + "epoch": 0.40785054251087344, + "grad_norm": 20797.056640625, + "learning_rate": 7.663756250823413e-05, + "loss": 0.4388, + "step": 79050 + }, + { + "epoch": 0.40810851249348623, + "grad_norm": 25106.67578125, + "learning_rate": 7.660664096515632e-05, + "loss": 0.4385, + "step": 79100 + }, + { + "epoch": 0.4083664824760991, + "grad_norm": 22217.36328125, + "learning_rate": 7.657570521996877e-05, + "loss": 0.4455, + "step": 79150 + }, + { + "epoch": 0.40862445245871193, + "grad_norm": 21679.291015625, + "learning_rate": 7.654475528918439e-05, + "loss": 0.4409, + "step": 79200 + }, + { + "epoch": 0.4088824224413247, + "grad_norm": 20133.583984375, + "learning_rate": 7.651379118932364e-05, + "loss": 0.4391, + "step": 79250 + }, + { + "epoch": 0.40914039242393757, + "grad_norm": 23019.171875, + "learning_rate": 7.648281293691457e-05, + "loss": 0.446, + "step": 79300 + }, + { + "epoch": 0.40939836240655036, + "grad_norm": 24098.38671875, + "learning_rate": 7.645182054849276e-05, + "loss": 0.4417, + "step": 79350 + }, + { + "epoch": 0.4096563323891632, + "grad_norm": 23057.240234375, + "learning_rate": 7.642081404060136e-05, + "loss": 0.4424, + "step": 79400 + }, + { + "epoch": 0.409914302371776, + "grad_norm": 20033.328125, + "learning_rate": 7.638979342979103e-05, + "loss": 0.4386, + "step": 79450 + }, + { + "epoch": 0.41017227235438886, + "grad_norm": 20978.68359375, + "learning_rate": 7.635875873261995e-05, + "loss": 0.4363, + "step": 79500 + }, + { + "epoch": 0.41043024233700165, + "grad_norm": 21347.068359375, + "learning_rate": 7.63277099656539e-05, + "loss": 0.4431, + "step": 79550 + }, + { + "epoch": 0.4106882123196145, + "grad_norm": 22031.8125, + "learning_rate": 7.629664714546604e-05, + "loss": 0.4313, + "step": 79600 + }, + { + "epoch": 0.4109461823022273, + "grad_norm": 23963.99609375, + "learning_rate": 7.626557028863717e-05, + "loss": 0.4363, + "step": 79650 + }, + { + "epoch": 0.41120415228484014, + "grad_norm": 20183.259765625, + "learning_rate": 7.623447941175548e-05, + "loss": 0.4419, + "step": 79700 + }, + { + "epoch": 0.41146212226745293, + "grad_norm": 23588.68359375, + "learning_rate": 7.620337453141667e-05, + "loss": 0.4388, + "step": 79750 + }, + { + "epoch": 0.4117200922500658, + "grad_norm": 22210.7265625, + "learning_rate": 7.617225566422395e-05, + "loss": 0.442, + "step": 79800 + }, + { + "epoch": 0.41197806223267863, + "grad_norm": 18647.93359375, + "learning_rate": 7.614112282678794e-05, + "loss": 0.4349, + "step": 79850 + }, + { + "epoch": 0.4122360322152914, + "grad_norm": 20993.388671875, + "learning_rate": 7.610997603572675e-05, + "loss": 0.4386, + "step": 79900 + }, + { + "epoch": 0.4124940021979043, + "grad_norm": 23693.26171875, + "learning_rate": 7.607881530766596e-05, + "loss": 0.4385, + "step": 79950 + }, + { + "epoch": 0.41275197218051707, + "grad_norm": 22608.26953125, + "learning_rate": 7.604764065923852e-05, + "loss": 0.4415, + "step": 80000 + }, + { + "epoch": 0.41275197218051707, + "eval_loss": 0.4290848970413208, + "eval_runtime": 3332.9887, + "eval_samples_per_second": 930.432, + "eval_steps_per_second": 1.817, + "step": 80000 + }, + { + "epoch": 0.4130099421631299, + "grad_norm": 23348.44921875, + "learning_rate": 7.60164521070849e-05, + "loss": 0.4392, + "step": 80050 + }, + { + "epoch": 0.4132679121457427, + "grad_norm": 19942.9921875, + "learning_rate": 7.598524966785293e-05, + "loss": 0.4362, + "step": 80100 + }, + { + "epoch": 0.41352588212835556, + "grad_norm": 22776.587890625, + "learning_rate": 7.595403335819786e-05, + "loss": 0.4402, + "step": 80150 + }, + { + "epoch": 0.41378385211096835, + "grad_norm": 22519.923828125, + "learning_rate": 7.592280319478233e-05, + "loss": 0.4412, + "step": 80200 + }, + { + "epoch": 0.4140418220935812, + "grad_norm": 22480.52734375, + "learning_rate": 7.589155919427645e-05, + "loss": 0.4393, + "step": 80250 + }, + { + "epoch": 0.414299792076194, + "grad_norm": 20900.625, + "learning_rate": 7.586030137335762e-05, + "loss": 0.4344, + "step": 80300 + }, + { + "epoch": 0.41455776205880684, + "grad_norm": 21272.306640625, + "learning_rate": 7.582902974871069e-05, + "loss": 0.4385, + "step": 80350 + }, + { + "epoch": 0.41481573204141964, + "grad_norm": 21448.478515625, + "learning_rate": 7.57977443370278e-05, + "loss": 0.4395, + "step": 80400 + }, + { + "epoch": 0.4150737020240325, + "grad_norm": 21854.537109375, + "learning_rate": 7.576644515500855e-05, + "loss": 0.4411, + "step": 80450 + }, + { + "epoch": 0.4153316720066453, + "grad_norm": 21458.689453125, + "learning_rate": 7.573513221935979e-05, + "loss": 0.4429, + "step": 80500 + }, + { + "epoch": 0.41558964198925813, + "grad_norm": 21895.71875, + "learning_rate": 7.57038055467958e-05, + "loss": 0.4391, + "step": 80550 + }, + { + "epoch": 0.415847611971871, + "grad_norm": 23495.921875, + "learning_rate": 7.567246515403812e-05, + "loss": 0.4398, + "step": 80600 + }, + { + "epoch": 0.41610558195448377, + "grad_norm": 26117.8671875, + "learning_rate": 7.564111105781568e-05, + "loss": 0.4407, + "step": 80650 + }, + { + "epoch": 0.4163635519370966, + "grad_norm": 21881.818359375, + "learning_rate": 7.560974327486466e-05, + "loss": 0.4336, + "step": 80700 + }, + { + "epoch": 0.4166215219197094, + "grad_norm": 21309.1015625, + "learning_rate": 7.557836182192859e-05, + "loss": 0.4371, + "step": 80750 + }, + { + "epoch": 0.41687949190232226, + "grad_norm": 21723.498046875, + "learning_rate": 7.554696671575826e-05, + "loss": 0.4384, + "step": 80800 + }, + { + "epoch": 0.41713746188493506, + "grad_norm": 19767.9609375, + "learning_rate": 7.55155579731118e-05, + "loss": 0.4375, + "step": 80850 + }, + { + "epoch": 0.4173954318675479, + "grad_norm": 18992.958984375, + "learning_rate": 7.548413561075456e-05, + "loss": 0.4419, + "step": 80900 + }, + { + "epoch": 0.4176534018501607, + "grad_norm": 21593.255859375, + "learning_rate": 7.545269964545921e-05, + "loss": 0.4372, + "step": 80950 + }, + { + "epoch": 0.41791137183277355, + "grad_norm": 19369.3125, + "learning_rate": 7.542125009400565e-05, + "loss": 0.4402, + "step": 81000 + }, + { + "epoch": 0.41816934181538634, + "grad_norm": 20552.06640625, + "learning_rate": 7.538978697318105e-05, + "loss": 0.4418, + "step": 81050 + }, + { + "epoch": 0.4184273117979992, + "grad_norm": 21554.94140625, + "learning_rate": 7.53583102997798e-05, + "loss": 0.4406, + "step": 81100 + }, + { + "epoch": 0.418685281780612, + "grad_norm": 21098.296875, + "learning_rate": 7.532682009060356e-05, + "loss": 0.443, + "step": 81150 + }, + { + "epoch": 0.41894325176322483, + "grad_norm": 24148.71484375, + "learning_rate": 7.529531636246116e-05, + "loss": 0.4345, + "step": 81200 + }, + { + "epoch": 0.4192012217458376, + "grad_norm": 20404.298828125, + "learning_rate": 7.526379913216872e-05, + "loss": 0.4335, + "step": 81250 + }, + { + "epoch": 0.4194591917284505, + "grad_norm": 22061.607421875, + "learning_rate": 7.52322684165495e-05, + "loss": 0.4385, + "step": 81300 + }, + { + "epoch": 0.4197171617110633, + "grad_norm": 18455.380859375, + "learning_rate": 7.520072423243398e-05, + "loss": 0.4337, + "step": 81350 + }, + { + "epoch": 0.4199751316936761, + "grad_norm": 23344.2734375, + "learning_rate": 7.516916659665987e-05, + "loss": 0.4401, + "step": 81400 + }, + { + "epoch": 0.42023310167628897, + "grad_norm": 20872.77734375, + "learning_rate": 7.5137595526072e-05, + "loss": 0.4394, + "step": 81450 + }, + { + "epoch": 0.42049107165890176, + "grad_norm": 21003.841796875, + "learning_rate": 7.51060110375224e-05, + "loss": 0.4402, + "step": 81500 + }, + { + "epoch": 0.4207490416415146, + "grad_norm": 22772.330078125, + "learning_rate": 7.507441314787025e-05, + "loss": 0.4438, + "step": 81550 + }, + { + "epoch": 0.4210070116241274, + "grad_norm": 19593.216796875, + "learning_rate": 7.504280187398189e-05, + "loss": 0.4375, + "step": 81600 + }, + { + "epoch": 0.42126498160674025, + "grad_norm": 20914.66796875, + "learning_rate": 7.501117723273084e-05, + "loss": 0.4397, + "step": 81650 + }, + { + "epoch": 0.42152295158935305, + "grad_norm": 20479.12109375, + "learning_rate": 7.497953924099768e-05, + "loss": 0.4365, + "step": 81700 + }, + { + "epoch": 0.4217809215719659, + "grad_norm": 20309.25, + "learning_rate": 7.494788791567017e-05, + "loss": 0.4461, + "step": 81750 + }, + { + "epoch": 0.4220388915545787, + "grad_norm": 21467.72265625, + "learning_rate": 7.491622327364318e-05, + "loss": 0.4354, + "step": 81800 + }, + { + "epoch": 0.42229686153719154, + "grad_norm": 20826.80859375, + "learning_rate": 7.488454533181871e-05, + "loss": 0.4398, + "step": 81850 + }, + { + "epoch": 0.42255483151980433, + "grad_norm": 20537.826171875, + "learning_rate": 7.485285410710577e-05, + "loss": 0.4443, + "step": 81900 + }, + { + "epoch": 0.4228128015024172, + "grad_norm": 19521.810546875, + "learning_rate": 7.482114961642057e-05, + "loss": 0.4379, + "step": 81950 + }, + { + "epoch": 0.42307077148503003, + "grad_norm": 19407.5234375, + "learning_rate": 7.478943187668633e-05, + "loss": 0.4429, + "step": 82000 + }, + { + "epoch": 0.4233287414676428, + "grad_norm": 23058.337890625, + "learning_rate": 7.475770090483338e-05, + "loss": 0.4362, + "step": 82050 + }, + { + "epoch": 0.42358671145025567, + "grad_norm": 27362.29296875, + "learning_rate": 7.472595671779907e-05, + "loss": 0.4413, + "step": 82100 + }, + { + "epoch": 0.42384468143286846, + "grad_norm": 20389.08203125, + "learning_rate": 7.469419933252789e-05, + "loss": 0.4386, + "step": 82150 + }, + { + "epoch": 0.4241026514154813, + "grad_norm": 21554.896484375, + "learning_rate": 7.466242876597125e-05, + "loss": 0.4387, + "step": 82200 + }, + { + "epoch": 0.4243606213980941, + "grad_norm": 23449.822265625, + "learning_rate": 7.463064503508772e-05, + "loss": 0.4402, + "step": 82250 + }, + { + "epoch": 0.42461859138070696, + "grad_norm": 23945.1328125, + "learning_rate": 7.459884815684279e-05, + "loss": 0.4393, + "step": 82300 + }, + { + "epoch": 0.42487656136331975, + "grad_norm": 21705.064453125, + "learning_rate": 7.456703814820904e-05, + "loss": 0.4374, + "step": 82350 + }, + { + "epoch": 0.4251345313459326, + "grad_norm": 20050.66796875, + "learning_rate": 7.453521502616607e-05, + "loss": 0.4433, + "step": 82400 + }, + { + "epoch": 0.4253925013285454, + "grad_norm": 24757.845703125, + "learning_rate": 7.45033788077004e-05, + "loss": 0.4362, + "step": 82450 + }, + { + "epoch": 0.42565047131115824, + "grad_norm": 21754.42578125, + "learning_rate": 7.44715295098056e-05, + "loss": 0.4386, + "step": 82500 + }, + { + "epoch": 0.42590844129377103, + "grad_norm": 22891.12890625, + "learning_rate": 7.443966714948222e-05, + "loss": 0.4438, + "step": 82550 + }, + { + "epoch": 0.4261664112763839, + "grad_norm": 22174.580078125, + "learning_rate": 7.440779174373776e-05, + "loss": 0.4388, + "step": 82600 + }, + { + "epoch": 0.4264243812589967, + "grad_norm": 20407.677734375, + "learning_rate": 7.43759033095867e-05, + "loss": 0.4412, + "step": 82650 + }, + { + "epoch": 0.4266823512416095, + "grad_norm": 21960.552734375, + "learning_rate": 7.434400186405045e-05, + "loss": 0.4394, + "step": 82700 + }, + { + "epoch": 0.4269403212242224, + "grad_norm": 20736.583984375, + "learning_rate": 7.431208742415741e-05, + "loss": 0.4382, + "step": 82750 + }, + { + "epoch": 0.42719829120683517, + "grad_norm": 21133.63671875, + "learning_rate": 7.428016000694286e-05, + "loss": 0.4379, + "step": 82800 + }, + { + "epoch": 0.427456261189448, + "grad_norm": 23741.525390625, + "learning_rate": 7.424821962944908e-05, + "loss": 0.4398, + "step": 82850 + }, + { + "epoch": 0.4277142311720608, + "grad_norm": 21936.802734375, + "learning_rate": 7.42162663087252e-05, + "loss": 0.4383, + "step": 82900 + }, + { + "epoch": 0.42797220115467366, + "grad_norm": 24459.85546875, + "learning_rate": 7.418430006182727e-05, + "loss": 0.4393, + "step": 82950 + }, + { + "epoch": 0.42823017113728645, + "grad_norm": 21729.9921875, + "learning_rate": 7.415232090581828e-05, + "loss": 0.4421, + "step": 83000 + }, + { + "epoch": 0.4284881411198993, + "grad_norm": 21081.5703125, + "learning_rate": 7.412032885776807e-05, + "loss": 0.4414, + "step": 83050 + }, + { + "epoch": 0.4287461111025121, + "grad_norm": 20296.740234375, + "learning_rate": 7.408832393475338e-05, + "loss": 0.4316, + "step": 83100 + }, + { + "epoch": 0.42900408108512494, + "grad_norm": 20874.30078125, + "learning_rate": 7.405630615385781e-05, + "loss": 0.433, + "step": 83150 + }, + { + "epoch": 0.42926205106773774, + "grad_norm": 20673.11328125, + "learning_rate": 7.402427553217183e-05, + "loss": 0.4386, + "step": 83200 + }, + { + "epoch": 0.4295200210503506, + "grad_norm": 22462.07421875, + "learning_rate": 7.39922320867928e-05, + "loss": 0.4464, + "step": 83250 + }, + { + "epoch": 0.4297779910329634, + "grad_norm": 20411.771484375, + "learning_rate": 7.396017583482487e-05, + "loss": 0.444, + "step": 83300 + }, + { + "epoch": 0.43003596101557623, + "grad_norm": 21137.6953125, + "learning_rate": 7.392810679337902e-05, + "loss": 0.4416, + "step": 83350 + }, + { + "epoch": 0.4302939309981891, + "grad_norm": 23059.064453125, + "learning_rate": 7.38960249795731e-05, + "loss": 0.4401, + "step": 83400 + }, + { + "epoch": 0.43055190098080187, + "grad_norm": 20305.22265625, + "learning_rate": 7.386393041053176e-05, + "loss": 0.4399, + "step": 83450 + }, + { + "epoch": 0.4308098709634147, + "grad_norm": 22247.779296875, + "learning_rate": 7.38318231033865e-05, + "loss": 0.4362, + "step": 83500 + }, + { + "epoch": 0.4310678409460275, + "grad_norm": 22231.337890625, + "learning_rate": 7.379970307527552e-05, + "loss": 0.4417, + "step": 83550 + }, + { + "epoch": 0.43132581092864036, + "grad_norm": 21788.875, + "learning_rate": 7.376757034334388e-05, + "loss": 0.4374, + "step": 83600 + }, + { + "epoch": 0.43158378091125316, + "grad_norm": 22237.51953125, + "learning_rate": 7.373542492474343e-05, + "loss": 0.4372, + "step": 83650 + }, + { + "epoch": 0.431841750893866, + "grad_norm": 21732.943359375, + "learning_rate": 7.370326683663278e-05, + "loss": 0.4395, + "step": 83700 + }, + { + "epoch": 0.4320997208764788, + "grad_norm": 19517.212890625, + "learning_rate": 7.367109609617729e-05, + "loss": 0.4371, + "step": 83750 + }, + { + "epoch": 0.43235769085909165, + "grad_norm": 23681.388671875, + "learning_rate": 7.363891272054903e-05, + "loss": 0.4383, + "step": 83800 + }, + { + "epoch": 0.43261566084170444, + "grad_norm": 23889.822265625, + "learning_rate": 7.360671672692691e-05, + "loss": 0.441, + "step": 83850 + }, + { + "epoch": 0.4328736308243173, + "grad_norm": 21159.45703125, + "learning_rate": 7.357450813249654e-05, + "loss": 0.4328, + "step": 83900 + }, + { + "epoch": 0.4331316008069301, + "grad_norm": 20617.83984375, + "learning_rate": 7.354228695445023e-05, + "loss": 0.4395, + "step": 83950 + }, + { + "epoch": 0.43338957078954293, + "grad_norm": 19741.568359375, + "learning_rate": 7.351005320998699e-05, + "loss": 0.4356, + "step": 84000 + }, + { + "epoch": 0.4336475407721557, + "grad_norm": 21407.771484375, + "learning_rate": 7.347780691631259e-05, + "loss": 0.4322, + "step": 84050 + }, + { + "epoch": 0.4339055107547686, + "grad_norm": 22396.5625, + "learning_rate": 7.344554809063947e-05, + "loss": 0.4379, + "step": 84100 + }, + { + "epoch": 0.4341634807373814, + "grad_norm": 23536.361328125, + "learning_rate": 7.34132767501868e-05, + "loss": 0.4372, + "step": 84150 + }, + { + "epoch": 0.4344214507199942, + "grad_norm": 23622.90234375, + "learning_rate": 7.338099291218036e-05, + "loss": 0.4361, + "step": 84200 + }, + { + "epoch": 0.43467942070260707, + "grad_norm": 24463.931640625, + "learning_rate": 7.334869659385264e-05, + "loss": 0.4478, + "step": 84250 + }, + { + "epoch": 0.43493739068521986, + "grad_norm": 21666.328125, + "learning_rate": 7.331638781244283e-05, + "loss": 0.4387, + "step": 84300 + }, + { + "epoch": 0.4351953606678327, + "grad_norm": 21145.6875, + "learning_rate": 7.328406658519669e-05, + "loss": 0.4362, + "step": 84350 + }, + { + "epoch": 0.4354533306504455, + "grad_norm": 21766.228515625, + "learning_rate": 7.325173292936667e-05, + "loss": 0.4433, + "step": 84400 + }, + { + "epoch": 0.43571130063305835, + "grad_norm": 23118.056640625, + "learning_rate": 7.321938686221185e-05, + "loss": 0.4317, + "step": 84450 + }, + { + "epoch": 0.43596927061567115, + "grad_norm": 20925.833984375, + "learning_rate": 7.318702840099793e-05, + "loss": 0.4348, + "step": 84500 + }, + { + "epoch": 0.436227240598284, + "grad_norm": 21725.630859375, + "learning_rate": 7.315465756299727e-05, + "loss": 0.4363, + "step": 84550 + }, + { + "epoch": 0.4364852105808968, + "grad_norm": 20223.537109375, + "learning_rate": 7.312227436548875e-05, + "loss": 0.4363, + "step": 84600 + }, + { + "epoch": 0.43674318056350964, + "grad_norm": 22766.71484375, + "learning_rate": 7.308987882575793e-05, + "loss": 0.442, + "step": 84650 + }, + { + "epoch": 0.43700115054612243, + "grad_norm": 20453.341796875, + "learning_rate": 7.305747096109688e-05, + "loss": 0.4362, + "step": 84700 + }, + { + "epoch": 0.4372591205287353, + "grad_norm": 20761.466796875, + "learning_rate": 7.302505078880431e-05, + "loss": 0.435, + "step": 84750 + }, + { + "epoch": 0.4375170905113481, + "grad_norm": 20815.27734375, + "learning_rate": 7.299261832618551e-05, + "loss": 0.4398, + "step": 84800 + }, + { + "epoch": 0.4377750604939609, + "grad_norm": 22528.06640625, + "learning_rate": 7.296017359055224e-05, + "loss": 0.44, + "step": 84850 + }, + { + "epoch": 0.43803303047657377, + "grad_norm": 21391.71484375, + "learning_rate": 7.292771659922293e-05, + "loss": 0.4376, + "step": 84900 + }, + { + "epoch": 0.43829100045918656, + "grad_norm": 21485.966796875, + "learning_rate": 7.289524736952245e-05, + "loss": 0.4424, + "step": 84950 + }, + { + "epoch": 0.4385489704417994, + "grad_norm": 21160.314453125, + "learning_rate": 7.286276591878228e-05, + "loss": 0.4473, + "step": 85000 + }, + { + "epoch": 0.4385489704417994, + "eval_loss": 0.4252757728099823, + "eval_runtime": 3252.991, + "eval_samples_per_second": 953.313, + "eval_steps_per_second": 1.862, + "step": 85000 + }, + { + "epoch": 0.4388069404244122, + "grad_norm": 29667.109375, + "learning_rate": 7.283027226434036e-05, + "loss": 0.4414, + "step": 85050 + }, + { + "epoch": 0.43906491040702506, + "grad_norm": 24990.86328125, + "learning_rate": 7.27977664235412e-05, + "loss": 0.4321, + "step": 85100 + }, + { + "epoch": 0.43932288038963785, + "grad_norm": 21708.86328125, + "learning_rate": 7.276524841373576e-05, + "loss": 0.4331, + "step": 85150 + }, + { + "epoch": 0.4395808503722507, + "grad_norm": 22323.1015625, + "learning_rate": 7.273271825228157e-05, + "loss": 0.4372, + "step": 85200 + }, + { + "epoch": 0.4398388203548635, + "grad_norm": 21696.2734375, + "learning_rate": 7.270017595654255e-05, + "loss": 0.4271, + "step": 85250 + }, + { + "epoch": 0.44009679033747634, + "grad_norm": 23364.560546875, + "learning_rate": 7.266762154388917e-05, + "loss": 0.4327, + "step": 85300 + }, + { + "epoch": 0.44035476032008913, + "grad_norm": 21834.607421875, + "learning_rate": 7.263505503169834e-05, + "loss": 0.4337, + "step": 85350 + }, + { + "epoch": 0.440612730302702, + "grad_norm": 18636.244140625, + "learning_rate": 7.260247643735343e-05, + "loss": 0.4393, + "step": 85400 + }, + { + "epoch": 0.4408707002853148, + "grad_norm": 20385.875, + "learning_rate": 7.256988577824427e-05, + "loss": 0.4398, + "step": 85450 + }, + { + "epoch": 0.4411286702679276, + "grad_norm": 21459.576171875, + "learning_rate": 7.253728307176713e-05, + "loss": 0.435, + "step": 85500 + }, + { + "epoch": 0.4413866402505405, + "grad_norm": 22838.716796875, + "learning_rate": 7.25046683353247e-05, + "loss": 0.4368, + "step": 85550 + }, + { + "epoch": 0.44164461023315327, + "grad_norm": 23016.4140625, + "learning_rate": 7.247204158632608e-05, + "loss": 0.4353, + "step": 85600 + }, + { + "epoch": 0.4419025802157661, + "grad_norm": 22318.193359375, + "learning_rate": 7.243940284218682e-05, + "loss": 0.4374, + "step": 85650 + }, + { + "epoch": 0.4421605501983789, + "grad_norm": 20475.376953125, + "learning_rate": 7.240675212032884e-05, + "loss": 0.4339, + "step": 85700 + }, + { + "epoch": 0.44241852018099176, + "grad_norm": 22276.287109375, + "learning_rate": 7.237408943818042e-05, + "loss": 0.4275, + "step": 85750 + }, + { + "epoch": 0.44267649016360455, + "grad_norm": 22131.654296875, + "learning_rate": 7.234141481317634e-05, + "loss": 0.4373, + "step": 85800 + }, + { + "epoch": 0.4429344601462174, + "grad_norm": 24779.14453125, + "learning_rate": 7.230872826275765e-05, + "loss": 0.4347, + "step": 85850 + }, + { + "epoch": 0.4431924301288302, + "grad_norm": 22474.443359375, + "learning_rate": 7.227602980437179e-05, + "loss": 0.4341, + "step": 85900 + }, + { + "epoch": 0.44345040011144304, + "grad_norm": 21620.056640625, + "learning_rate": 7.224331945547258e-05, + "loss": 0.4399, + "step": 85950 + }, + { + "epoch": 0.44370837009405584, + "grad_norm": 21546.8046875, + "learning_rate": 7.221059723352014e-05, + "loss": 0.4437, + "step": 86000 + }, + { + "epoch": 0.4439663400766687, + "grad_norm": 22283.0078125, + "learning_rate": 7.2177863155981e-05, + "loss": 0.4403, + "step": 86050 + }, + { + "epoch": 0.4442243100592815, + "grad_norm": 21332.576171875, + "learning_rate": 7.214511724032795e-05, + "loss": 0.4369, + "step": 86100 + }, + { + "epoch": 0.44448228004189433, + "grad_norm": 23106.01953125, + "learning_rate": 7.211235950404013e-05, + "loss": 0.4369, + "step": 86150 + }, + { + "epoch": 0.4447402500245071, + "grad_norm": 21826.2734375, + "learning_rate": 7.207958996460298e-05, + "loss": 0.4407, + "step": 86200 + }, + { + "epoch": 0.44499822000711997, + "grad_norm": 22308.90625, + "learning_rate": 7.204680863950825e-05, + "loss": 0.4349, + "step": 86250 + }, + { + "epoch": 0.4452561899897328, + "grad_norm": 24916.359375, + "learning_rate": 7.2014015546254e-05, + "loss": 0.436, + "step": 86300 + }, + { + "epoch": 0.4455141599723456, + "grad_norm": 22585.77734375, + "learning_rate": 7.198121070234453e-05, + "loss": 0.4311, + "step": 86350 + }, + { + "epoch": 0.44577212995495846, + "grad_norm": 22984.658203125, + "learning_rate": 7.194839412529042e-05, + "loss": 0.4324, + "step": 86400 + }, + { + "epoch": 0.44603009993757126, + "grad_norm": 22495.552734375, + "learning_rate": 7.191556583260853e-05, + "loss": 0.4306, + "step": 86450 + }, + { + "epoch": 0.4462880699201841, + "grad_norm": 21413.2578125, + "learning_rate": 7.188272584182196e-05, + "loss": 0.4404, + "step": 86500 + }, + { + "epoch": 0.4465460399027969, + "grad_norm": 23719.43359375, + "learning_rate": 7.184987417046007e-05, + "loss": 0.4321, + "step": 86550 + }, + { + "epoch": 0.44680400988540975, + "grad_norm": 22586.095703125, + "learning_rate": 7.181701083605846e-05, + "loss": 0.4349, + "step": 86600 + }, + { + "epoch": 0.44706197986802254, + "grad_norm": 20580.166015625, + "learning_rate": 7.178413585615891e-05, + "loss": 0.4323, + "step": 86650 + }, + { + "epoch": 0.4473199498506354, + "grad_norm": 21345.71875, + "learning_rate": 7.175124924830948e-05, + "loss": 0.4326, + "step": 86700 + }, + { + "epoch": 0.4475779198332482, + "grad_norm": 20615.333984375, + "learning_rate": 7.171835103006438e-05, + "loss": 0.4425, + "step": 86750 + }, + { + "epoch": 0.44783588981586103, + "grad_norm": 25518.546875, + "learning_rate": 7.168544121898407e-05, + "loss": 0.4307, + "step": 86800 + }, + { + "epoch": 0.4480938597984738, + "grad_norm": 23149.703125, + "learning_rate": 7.165251983263512e-05, + "loss": 0.4336, + "step": 86850 + }, + { + "epoch": 0.4483518297810867, + "grad_norm": 22026.19140625, + "learning_rate": 7.16195868885904e-05, + "loss": 0.4401, + "step": 86900 + }, + { + "epoch": 0.44860979976369947, + "grad_norm": 21140.90234375, + "learning_rate": 7.158664240442881e-05, + "loss": 0.436, + "step": 86950 + }, + { + "epoch": 0.4488677697463123, + "grad_norm": 25489.1796875, + "learning_rate": 7.155368639773552e-05, + "loss": 0.4379, + "step": 87000 + }, + { + "epoch": 0.44912573972892517, + "grad_norm": 21035.275390625, + "learning_rate": 7.152071888610176e-05, + "loss": 0.433, + "step": 87050 + }, + { + "epoch": 0.44938370971153796, + "grad_norm": 25905.03515625, + "learning_rate": 7.148773988712503e-05, + "loss": 0.4423, + "step": 87100 + }, + { + "epoch": 0.4496416796941508, + "grad_norm": 21237.857421875, + "learning_rate": 7.14547494184088e-05, + "loss": 0.4346, + "step": 87150 + }, + { + "epoch": 0.4498996496767636, + "grad_norm": 19255.748046875, + "learning_rate": 7.14217474975628e-05, + "loss": 0.4333, + "step": 87200 + }, + { + "epoch": 0.45015761965937645, + "grad_norm": 22115.05078125, + "learning_rate": 7.138873414220277e-05, + "loss": 0.4371, + "step": 87250 + }, + { + "epoch": 0.45041558964198924, + "grad_norm": 23271.462890625, + "learning_rate": 7.135570936995064e-05, + "loss": 0.4362, + "step": 87300 + }, + { + "epoch": 0.4506735596246021, + "grad_norm": 24245.02734375, + "learning_rate": 7.132267319843438e-05, + "loss": 0.4371, + "step": 87350 + }, + { + "epoch": 0.4509315296072149, + "grad_norm": 22234.224609375, + "learning_rate": 7.128962564528805e-05, + "loss": 0.4306, + "step": 87400 + }, + { + "epoch": 0.45118949958982774, + "grad_norm": 22704.115234375, + "learning_rate": 7.12565667281518e-05, + "loss": 0.4408, + "step": 87450 + }, + { + "epoch": 0.45144746957244053, + "grad_norm": 21906.650390625, + "learning_rate": 7.122349646467183e-05, + "loss": 0.4322, + "step": 87500 + }, + { + "epoch": 0.4517054395550534, + "grad_norm": 21960.501953125, + "learning_rate": 7.119041487250045e-05, + "loss": 0.4322, + "step": 87550 + }, + { + "epoch": 0.45196340953766617, + "grad_norm": 20264.14453125, + "learning_rate": 7.11573219692959e-05, + "loss": 0.4403, + "step": 87600 + }, + { + "epoch": 0.452221379520279, + "grad_norm": 20237.078125, + "learning_rate": 7.112421777272259e-05, + "loss": 0.4421, + "step": 87650 + }, + { + "epoch": 0.45247934950289187, + "grad_norm": 22111.3203125, + "learning_rate": 7.109110230045087e-05, + "loss": 0.4386, + "step": 87700 + }, + { + "epoch": 0.45273731948550466, + "grad_norm": 20690.015625, + "learning_rate": 7.105797557015715e-05, + "loss": 0.4315, + "step": 87750 + }, + { + "epoch": 0.4529952894681175, + "grad_norm": 23273.888671875, + "learning_rate": 7.102483759952384e-05, + "loss": 0.4397, + "step": 87800 + }, + { + "epoch": 0.4532532594507303, + "grad_norm": 20268.541015625, + "learning_rate": 7.099168840623935e-05, + "loss": 0.4381, + "step": 87850 + }, + { + "epoch": 0.45351122943334315, + "grad_norm": 21591.724609375, + "learning_rate": 7.095852800799806e-05, + "loss": 0.4368, + "step": 87900 + }, + { + "epoch": 0.45376919941595595, + "grad_norm": 20683.994140625, + "learning_rate": 7.092535642250035e-05, + "loss": 0.4315, + "step": 87950 + }, + { + "epoch": 0.4540271693985688, + "grad_norm": 22910.26953125, + "learning_rate": 7.089217366745258e-05, + "loss": 0.4415, + "step": 88000 + }, + { + "epoch": 0.4542851393811816, + "grad_norm": 22321.40234375, + "learning_rate": 7.085897976056706e-05, + "loss": 0.4386, + "step": 88050 + }, + { + "epoch": 0.45454310936379444, + "grad_norm": 20730.521484375, + "learning_rate": 7.082577471956206e-05, + "loss": 0.4335, + "step": 88100 + }, + { + "epoch": 0.45480107934640723, + "grad_norm": 23302.033203125, + "learning_rate": 7.079255856216177e-05, + "loss": 0.4366, + "step": 88150 + }, + { + "epoch": 0.4550590493290201, + "grad_norm": 21125.5625, + "learning_rate": 7.075933130609636e-05, + "loss": 0.4388, + "step": 88200 + }, + { + "epoch": 0.4553170193116329, + "grad_norm": 24245.548828125, + "learning_rate": 7.072609296910187e-05, + "loss": 0.4369, + "step": 88250 + }, + { + "epoch": 0.4555749892942457, + "grad_norm": 19609.1484375, + "learning_rate": 7.06928435689203e-05, + "loss": 0.4287, + "step": 88300 + }, + { + "epoch": 0.4558329592768585, + "grad_norm": 21653.08984375, + "learning_rate": 7.065958312329953e-05, + "loss": 0.4357, + "step": 88350 + }, + { + "epoch": 0.45609092925947137, + "grad_norm": 23725.236328125, + "learning_rate": 7.062631164999331e-05, + "loss": 0.4382, + "step": 88400 + }, + { + "epoch": 0.4563488992420842, + "grad_norm": 21436.92578125, + "learning_rate": 7.059302916676137e-05, + "loss": 0.4373, + "step": 88450 + }, + { + "epoch": 0.456606869224697, + "grad_norm": 20179.189453125, + "learning_rate": 7.05597356913692e-05, + "loss": 0.4304, + "step": 88500 + }, + { + "epoch": 0.45686483920730986, + "grad_norm": 22804.22265625, + "learning_rate": 7.052643124158824e-05, + "loss": 0.4343, + "step": 88550 + }, + { + "epoch": 0.45712280918992265, + "grad_norm": 21530.931640625, + "learning_rate": 7.049311583519574e-05, + "loss": 0.4364, + "step": 88600 + }, + { + "epoch": 0.4573807791725355, + "grad_norm": 21411.646484375, + "learning_rate": 7.045978948997486e-05, + "loss": 0.436, + "step": 88650 + }, + { + "epoch": 0.4576387491551483, + "grad_norm": 20853.962890625, + "learning_rate": 7.042645222371451e-05, + "loss": 0.436, + "step": 88700 + }, + { + "epoch": 0.45789671913776114, + "grad_norm": 20940.28125, + "learning_rate": 7.039310405420952e-05, + "loss": 0.4349, + "step": 88750 + }, + { + "epoch": 0.45815468912037394, + "grad_norm": 22368.05078125, + "learning_rate": 7.035974499926045e-05, + "loss": 0.4355, + "step": 88800 + }, + { + "epoch": 0.4584126591029868, + "grad_norm": 21155.3984375, + "learning_rate": 7.032637507667377e-05, + "loss": 0.4292, + "step": 88850 + }, + { + "epoch": 0.4586706290855996, + "grad_norm": 21627.353515625, + "learning_rate": 7.029299430426164e-05, + "loss": 0.4404, + "step": 88900 + }, + { + "epoch": 0.45892859906821243, + "grad_norm": 22008.23046875, + "learning_rate": 7.025960269984212e-05, + "loss": 0.431, + "step": 88950 + }, + { + "epoch": 0.4591865690508252, + "grad_norm": 21588.109375, + "learning_rate": 7.022620028123898e-05, + "loss": 0.4319, + "step": 89000 + }, + { + "epoch": 0.45944453903343807, + "grad_norm": 21680.646484375, + "learning_rate": 7.019278706628179e-05, + "loss": 0.4403, + "step": 89050 + }, + { + "epoch": 0.4597025090160509, + "grad_norm": 25427.423828125, + "learning_rate": 7.015936307280587e-05, + "loss": 0.435, + "step": 89100 + }, + { + "epoch": 0.4599604789986637, + "grad_norm": 22674.693359375, + "learning_rate": 7.01259283186523e-05, + "loss": 0.4377, + "step": 89150 + }, + { + "epoch": 0.46021844898127656, + "grad_norm": 24841.029296875, + "learning_rate": 7.009248282166793e-05, + "loss": 0.4387, + "step": 89200 + }, + { + "epoch": 0.46047641896388936, + "grad_norm": 21259.369140625, + "learning_rate": 7.005902659970528e-05, + "loss": 0.4355, + "step": 89250 + }, + { + "epoch": 0.4607343889465022, + "grad_norm": 19364.466796875, + "learning_rate": 7.002555967062265e-05, + "loss": 0.4353, + "step": 89300 + }, + { + "epoch": 0.460992358929115, + "grad_norm": 25116.47265625, + "learning_rate": 6.999208205228405e-05, + "loss": 0.4328, + "step": 89350 + }, + { + "epoch": 0.46125032891172785, + "grad_norm": 24426.4296875, + "learning_rate": 6.995859376255918e-05, + "loss": 0.4331, + "step": 89400 + }, + { + "epoch": 0.46150829889434064, + "grad_norm": 20802.759765625, + "learning_rate": 6.99250948193234e-05, + "loss": 0.4294, + "step": 89450 + }, + { + "epoch": 0.4617662688769535, + "grad_norm": 23164.2109375, + "learning_rate": 6.989158524045787e-05, + "loss": 0.4338, + "step": 89500 + }, + { + "epoch": 0.4620242388595663, + "grad_norm": 20543.28515625, + "learning_rate": 6.98580650438493e-05, + "loss": 0.4243, + "step": 89550 + }, + { + "epoch": 0.46228220884217913, + "grad_norm": 22468.732421875, + "learning_rate": 6.982453424739016e-05, + "loss": 0.4306, + "step": 89600 + }, + { + "epoch": 0.4625401788247919, + "grad_norm": 22903.12890625, + "learning_rate": 6.979099286897849e-05, + "loss": 0.4316, + "step": 89650 + }, + { + "epoch": 0.4627981488074048, + "grad_norm": 23074.068359375, + "learning_rate": 6.975744092651808e-05, + "loss": 0.4371, + "step": 89700 + }, + { + "epoch": 0.46305611879001757, + "grad_norm": 22003.00390625, + "learning_rate": 6.972387843791827e-05, + "loss": 0.4329, + "step": 89750 + }, + { + "epoch": 0.4633140887726304, + "grad_norm": 21524.93359375, + "learning_rate": 6.969030542109407e-05, + "loss": 0.4348, + "step": 89800 + }, + { + "epoch": 0.46357205875524327, + "grad_norm": 20501.130859375, + "learning_rate": 6.965672189396614e-05, + "loss": 0.4286, + "step": 89850 + }, + { + "epoch": 0.46383002873785606, + "grad_norm": 21559.396484375, + "learning_rate": 6.962312787446068e-05, + "loss": 0.434, + "step": 89900 + }, + { + "epoch": 0.4640879987204689, + "grad_norm": 21185.537109375, + "learning_rate": 6.958952338050955e-05, + "loss": 0.4326, + "step": 89950 + }, + { + "epoch": 0.4643459687030817, + "grad_norm": 23004.626953125, + "learning_rate": 6.955590843005016e-05, + "loss": 0.4272, + "step": 90000 + }, + { + "epoch": 0.4643459687030817, + "eval_loss": 0.4223860800266266, + "eval_runtime": 3251.8949, + "eval_samples_per_second": 953.635, + "eval_steps_per_second": 1.863, + "step": 90000 + }, + { + "epoch": 0.46460393868569455, + "grad_norm": 20333.259765625, + "learning_rate": 6.952228304102553e-05, + "loss": 0.4338, + "step": 90050 + }, + { + "epoch": 0.46486190866830734, + "grad_norm": 25967.029296875, + "learning_rate": 6.948864723138423e-05, + "loss": 0.4352, + "step": 90100 + }, + { + "epoch": 0.4651198786509202, + "grad_norm": 22849.9375, + "learning_rate": 6.945500101908043e-05, + "loss": 0.4358, + "step": 90150 + }, + { + "epoch": 0.465377848633533, + "grad_norm": 20628.9453125, + "learning_rate": 6.94213444220738e-05, + "loss": 0.4343, + "step": 90200 + }, + { + "epoch": 0.46563581861614584, + "grad_norm": 22179.84375, + "learning_rate": 6.938767745832959e-05, + "loss": 0.4314, + "step": 90250 + }, + { + "epoch": 0.46589378859875863, + "grad_norm": 24433.46484375, + "learning_rate": 6.935400014581858e-05, + "loss": 0.436, + "step": 90300 + }, + { + "epoch": 0.4661517585813715, + "grad_norm": 21914.666015625, + "learning_rate": 6.932031250251705e-05, + "loss": 0.431, + "step": 90350 + }, + { + "epoch": 0.46640972856398427, + "grad_norm": 19517.78125, + "learning_rate": 6.928661454640683e-05, + "loss": 0.4282, + "step": 90400 + }, + { + "epoch": 0.4666676985465971, + "grad_norm": 25924.5234375, + "learning_rate": 6.925290629547522e-05, + "loss": 0.4344, + "step": 90450 + }, + { + "epoch": 0.4669256685292099, + "grad_norm": 20866.927734375, + "learning_rate": 6.921918776771505e-05, + "loss": 0.4336, + "step": 90500 + }, + { + "epoch": 0.46718363851182276, + "grad_norm": 22734.5625, + "learning_rate": 6.91854589811246e-05, + "loss": 0.4375, + "step": 90550 + }, + { + "epoch": 0.4674416084944356, + "grad_norm": 21173.5703125, + "learning_rate": 6.915171995370766e-05, + "loss": 0.428, + "step": 90600 + }, + { + "epoch": 0.4676995784770484, + "grad_norm": 23864.681640625, + "learning_rate": 6.911797070347346e-05, + "loss": 0.4344, + "step": 90650 + }, + { + "epoch": 0.46795754845966125, + "grad_norm": 26236.091796875, + "learning_rate": 6.908421124843669e-05, + "loss": 0.4345, + "step": 90700 + }, + { + "epoch": 0.46821551844227405, + "grad_norm": 20788.6015625, + "learning_rate": 6.905044160661748e-05, + "loss": 0.4332, + "step": 90750 + }, + { + "epoch": 0.4684734884248869, + "grad_norm": 21382.2578125, + "learning_rate": 6.901666179604148e-05, + "loss": 0.4356, + "step": 90800 + }, + { + "epoch": 0.4687314584074997, + "grad_norm": 20230.220703125, + "learning_rate": 6.898287183473961e-05, + "loss": 0.4262, + "step": 90850 + }, + { + "epoch": 0.46898942839011254, + "grad_norm": 31838.697265625, + "learning_rate": 6.894907174074836e-05, + "loss": 0.4316, + "step": 90900 + }, + { + "epoch": 0.46924739837272533, + "grad_norm": 21029.5234375, + "learning_rate": 6.891526153210953e-05, + "loss": 0.4346, + "step": 90950 + }, + { + "epoch": 0.4695053683553382, + "grad_norm": 23617.826171875, + "learning_rate": 6.888144122687035e-05, + "loss": 0.4262, + "step": 91000 + }, + { + "epoch": 0.469763338337951, + "grad_norm": 23151.751953125, + "learning_rate": 6.884761084308349e-05, + "loss": 0.4296, + "step": 91050 + }, + { + "epoch": 0.4700213083205638, + "grad_norm": 19649.466796875, + "learning_rate": 6.881377039880692e-05, + "loss": 0.4325, + "step": 91100 + }, + { + "epoch": 0.4702792783031766, + "grad_norm": 20488.10546875, + "learning_rate": 6.8779919912104e-05, + "loss": 0.4352, + "step": 91150 + }, + { + "epoch": 0.47053724828578947, + "grad_norm": 21639.306640625, + "learning_rate": 6.874605940104349e-05, + "loss": 0.4319, + "step": 91200 + }, + { + "epoch": 0.4707952182684023, + "grad_norm": 21799.994140625, + "learning_rate": 6.871218888369947e-05, + "loss": 0.4315, + "step": 91250 + }, + { + "epoch": 0.4710531882510151, + "grad_norm": 22425.94140625, + "learning_rate": 6.867830837815137e-05, + "loss": 0.4381, + "step": 91300 + }, + { + "epoch": 0.47131115823362796, + "grad_norm": 22582.57421875, + "learning_rate": 6.864441790248396e-05, + "loss": 0.4297, + "step": 91350 + }, + { + "epoch": 0.47156912821624075, + "grad_norm": 21082.38671875, + "learning_rate": 6.861051747478726e-05, + "loss": 0.4292, + "step": 91400 + }, + { + "epoch": 0.4718270981988536, + "grad_norm": 23156.5546875, + "learning_rate": 6.857660711315672e-05, + "loss": 0.4276, + "step": 91450 + }, + { + "epoch": 0.4720850681814664, + "grad_norm": 21754.6796875, + "learning_rate": 6.854268683569302e-05, + "loss": 0.4369, + "step": 91500 + }, + { + "epoch": 0.47234303816407924, + "grad_norm": 22397.896484375, + "learning_rate": 6.850875666050216e-05, + "loss": 0.4312, + "step": 91550 + }, + { + "epoch": 0.47260100814669204, + "grad_norm": 21344.166015625, + "learning_rate": 6.847481660569537e-05, + "loss": 0.4291, + "step": 91600 + }, + { + "epoch": 0.4728589781293049, + "grad_norm": 23818.71484375, + "learning_rate": 6.844086668938923e-05, + "loss": 0.4352, + "step": 91650 + }, + { + "epoch": 0.4731169481119177, + "grad_norm": 21734.537109375, + "learning_rate": 6.840690692970554e-05, + "loss": 0.4326, + "step": 91700 + }, + { + "epoch": 0.47337491809453053, + "grad_norm": 22027.734375, + "learning_rate": 6.837293734477136e-05, + "loss": 0.4369, + "step": 91750 + }, + { + "epoch": 0.4736328880771433, + "grad_norm": 23111.103515625, + "learning_rate": 6.8338957952719e-05, + "loss": 0.4396, + "step": 91800 + }, + { + "epoch": 0.47389085805975617, + "grad_norm": 22521.767578125, + "learning_rate": 6.830496877168599e-05, + "loss": 0.4376, + "step": 91850 + }, + { + "epoch": 0.47414882804236896, + "grad_norm": 19730.158203125, + "learning_rate": 6.827096981981511e-05, + "loss": 0.4321, + "step": 91900 + }, + { + "epoch": 0.4744067980249818, + "grad_norm": 21871.134765625, + "learning_rate": 6.823696111525433e-05, + "loss": 0.4373, + "step": 91950 + }, + { + "epoch": 0.47466476800759466, + "grad_norm": 22332.384765625, + "learning_rate": 6.820294267615686e-05, + "loss": 0.4323, + "step": 92000 + }, + { + "epoch": 0.47492273799020746, + "grad_norm": 22426.59765625, + "learning_rate": 6.816891452068104e-05, + "loss": 0.4272, + "step": 92050 + }, + { + "epoch": 0.4751807079728203, + "grad_norm": 23286.05859375, + "learning_rate": 6.81348766669905e-05, + "loss": 0.4442, + "step": 92100 + }, + { + "epoch": 0.4754386779554331, + "grad_norm": 21696.1171875, + "learning_rate": 6.810082913325395e-05, + "loss": 0.4288, + "step": 92150 + }, + { + "epoch": 0.47569664793804595, + "grad_norm": 20548.908203125, + "learning_rate": 6.80667719376453e-05, + "loss": 0.4358, + "step": 92200 + }, + { + "epoch": 0.47595461792065874, + "grad_norm": 22605.1640625, + "learning_rate": 6.803270509834363e-05, + "loss": 0.4327, + "step": 92250 + }, + { + "epoch": 0.4762125879032716, + "grad_norm": 23604.30078125, + "learning_rate": 6.799862863353318e-05, + "loss": 0.441, + "step": 92300 + }, + { + "epoch": 0.4764705578858844, + "grad_norm": 22117.1796875, + "learning_rate": 6.796454256140328e-05, + "loss": 0.4289, + "step": 92350 + }, + { + "epoch": 0.47672852786849723, + "grad_norm": 22476.54296875, + "learning_rate": 6.793044690014842e-05, + "loss": 0.4319, + "step": 92400 + }, + { + "epoch": 0.47698649785111, + "grad_norm": 20855.140625, + "learning_rate": 6.789634166796821e-05, + "loss": 0.4326, + "step": 92450 + }, + { + "epoch": 0.4772444678337229, + "grad_norm": 23704.125, + "learning_rate": 6.786222688306734e-05, + "loss": 0.4374, + "step": 92500 + }, + { + "epoch": 0.47750243781633567, + "grad_norm": 20677.91015625, + "learning_rate": 6.782810256365568e-05, + "loss": 0.4261, + "step": 92550 + }, + { + "epoch": 0.4777604077989485, + "grad_norm": 21245.837890625, + "learning_rate": 6.779396872794807e-05, + "loss": 0.4309, + "step": 92600 + }, + { + "epoch": 0.4780183777815613, + "grad_norm": 25415.859375, + "learning_rate": 6.775982539416453e-05, + "loss": 0.437, + "step": 92650 + }, + { + "epoch": 0.47827634776417416, + "grad_norm": 20582.556640625, + "learning_rate": 6.772567258053007e-05, + "loss": 0.4349, + "step": 92700 + }, + { + "epoch": 0.478534317746787, + "grad_norm": 20002.013671875, + "learning_rate": 6.769151030527483e-05, + "loss": 0.4263, + "step": 92750 + }, + { + "epoch": 0.4787922877293998, + "grad_norm": 23287.6875, + "learning_rate": 6.765733858663397e-05, + "loss": 0.4332, + "step": 92800 + }, + { + "epoch": 0.47905025771201265, + "grad_norm": 22023.66796875, + "learning_rate": 6.76231574428477e-05, + "loss": 0.4339, + "step": 92850 + }, + { + "epoch": 0.47930822769462544, + "grad_norm": 21299.185546875, + "learning_rate": 6.758896689216122e-05, + "loss": 0.4293, + "step": 92900 + }, + { + "epoch": 0.4795661976772383, + "grad_norm": 21979.560546875, + "learning_rate": 6.755476695282479e-05, + "loss": 0.4314, + "step": 92950 + }, + { + "epoch": 0.4798241676598511, + "grad_norm": 21399.029296875, + "learning_rate": 6.752055764309372e-05, + "loss": 0.4374, + "step": 93000 + }, + { + "epoch": 0.48008213764246394, + "grad_norm": 23827.685546875, + "learning_rate": 6.748633898122823e-05, + "loss": 0.4348, + "step": 93050 + }, + { + "epoch": 0.48034010762507673, + "grad_norm": 21079.61328125, + "learning_rate": 6.74521109854936e-05, + "loss": 0.4312, + "step": 93100 + }, + { + "epoch": 0.4805980776076896, + "grad_norm": 20395.04296875, + "learning_rate": 6.741787367416006e-05, + "loss": 0.4246, + "step": 93150 + }, + { + "epoch": 0.48085604759030237, + "grad_norm": 21922.576171875, + "learning_rate": 6.738362706550284e-05, + "loss": 0.4355, + "step": 93200 + }, + { + "epoch": 0.4811140175729152, + "grad_norm": 21317.001953125, + "learning_rate": 6.734937117780211e-05, + "loss": 0.4302, + "step": 93250 + }, + { + "epoch": 0.481371987555528, + "grad_norm": 21387.46484375, + "learning_rate": 6.731510602934298e-05, + "loss": 0.434, + "step": 93300 + }, + { + "epoch": 0.48162995753814086, + "grad_norm": 24289.28515625, + "learning_rate": 6.728083163841554e-05, + "loss": 0.4338, + "step": 93350 + }, + { + "epoch": 0.4818879275207537, + "grad_norm": 23514.162109375, + "learning_rate": 6.72465480233148e-05, + "loss": 0.4357, + "step": 93400 + }, + { + "epoch": 0.4821458975033665, + "grad_norm": 21481.0859375, + "learning_rate": 6.721225520234068e-05, + "loss": 0.4307, + "step": 93450 + }, + { + "epoch": 0.48240386748597935, + "grad_norm": 25044.396484375, + "learning_rate": 6.717795319379805e-05, + "loss": 0.4335, + "step": 93500 + }, + { + "epoch": 0.48266183746859215, + "grad_norm": 21193.333984375, + "learning_rate": 6.714364201599662e-05, + "loss": 0.4243, + "step": 93550 + }, + { + "epoch": 0.482919807451205, + "grad_norm": 19113.275390625, + "learning_rate": 6.710932168725105e-05, + "loss": 0.4331, + "step": 93600 + }, + { + "epoch": 0.4831777774338178, + "grad_norm": 21924.162109375, + "learning_rate": 6.707499222588087e-05, + "loss": 0.4309, + "step": 93650 + }, + { + "epoch": 0.48343574741643064, + "grad_norm": 21123.498046875, + "learning_rate": 6.704065365021048e-05, + "loss": 0.4392, + "step": 93700 + }, + { + "epoch": 0.48369371739904343, + "grad_norm": 22201.29296875, + "learning_rate": 6.700630597856914e-05, + "loss": 0.4281, + "step": 93750 + }, + { + "epoch": 0.4839516873816563, + "grad_norm": 24237.494140625, + "learning_rate": 6.697194922929096e-05, + "loss": 0.4367, + "step": 93800 + }, + { + "epoch": 0.4842096573642691, + "grad_norm": 21306.8125, + "learning_rate": 6.693758342071495e-05, + "loss": 0.4374, + "step": 93850 + }, + { + "epoch": 0.4844676273468819, + "grad_norm": 22120.75, + "learning_rate": 6.690320857118488e-05, + "loss": 0.4309, + "step": 93900 + }, + { + "epoch": 0.4847255973294947, + "grad_norm": 20799.59765625, + "learning_rate": 6.686882469904939e-05, + "loss": 0.4262, + "step": 93950 + }, + { + "epoch": 0.48498356731210757, + "grad_norm": 22964.642578125, + "learning_rate": 6.683443182266192e-05, + "loss": 0.4338, + "step": 94000 + }, + { + "epoch": 0.48524153729472036, + "grad_norm": 22017.076171875, + "learning_rate": 6.68000299603807e-05, + "loss": 0.4317, + "step": 94050 + }, + { + "epoch": 0.4854995072773332, + "grad_norm": 21423.890625, + "learning_rate": 6.676561913056884e-05, + "loss": 0.4329, + "step": 94100 + }, + { + "epoch": 0.48575747725994606, + "grad_norm": 22123.390625, + "learning_rate": 6.67311993515941e-05, + "loss": 0.4309, + "step": 94150 + }, + { + "epoch": 0.48601544724255885, + "grad_norm": 23107.208984375, + "learning_rate": 6.669677064182915e-05, + "loss": 0.4316, + "step": 94200 + }, + { + "epoch": 0.4862734172251717, + "grad_norm": 21250.33203125, + "learning_rate": 6.666233301965132e-05, + "loss": 0.4289, + "step": 94250 + }, + { + "epoch": 0.4865313872077845, + "grad_norm": 21629.720703125, + "learning_rate": 6.66278865034428e-05, + "loss": 0.4301, + "step": 94300 + }, + { + "epoch": 0.48678935719039734, + "grad_norm": 23665.4609375, + "learning_rate": 6.659343111159043e-05, + "loss": 0.4267, + "step": 94350 + }, + { + "epoch": 0.48704732717301014, + "grad_norm": 23254.232421875, + "learning_rate": 6.655896686248583e-05, + "loss": 0.4266, + "step": 94400 + }, + { + "epoch": 0.487305297155623, + "grad_norm": 22491.404296875, + "learning_rate": 6.652449377452539e-05, + "loss": 0.4278, + "step": 94450 + }, + { + "epoch": 0.4875632671382358, + "grad_norm": 21071.74609375, + "learning_rate": 6.649001186611015e-05, + "loss": 0.4308, + "step": 94500 + }, + { + "epoch": 0.4878212371208486, + "grad_norm": 20860.861328125, + "learning_rate": 6.64555211556459e-05, + "loss": 0.4308, + "step": 94550 + }, + { + "epoch": 0.4880792071034614, + "grad_norm": 21733.033203125, + "learning_rate": 6.642102166154308e-05, + "loss": 0.4376, + "step": 94600 + }, + { + "epoch": 0.48833717708607427, + "grad_norm": 22799.3984375, + "learning_rate": 6.638651340221687e-05, + "loss": 0.4289, + "step": 94650 + }, + { + "epoch": 0.48859514706868706, + "grad_norm": 21678.296875, + "learning_rate": 6.635199639608709e-05, + "loss": 0.4301, + "step": 94700 + }, + { + "epoch": 0.4888531170512999, + "grad_norm": 20510.052734375, + "learning_rate": 6.631747066157831e-05, + "loss": 0.4276, + "step": 94750 + }, + { + "epoch": 0.48911108703391276, + "grad_norm": 21075.474609375, + "learning_rate": 6.628293621711964e-05, + "loss": 0.435, + "step": 94800 + }, + { + "epoch": 0.48936905701652555, + "grad_norm": 22063.083984375, + "learning_rate": 6.624839308114492e-05, + "loss": 0.434, + "step": 94850 + }, + { + "epoch": 0.4896270269991384, + "grad_norm": 20185.99609375, + "learning_rate": 6.621384127209261e-05, + "loss": 0.4246, + "step": 94900 + }, + { + "epoch": 0.4898849969817512, + "grad_norm": 22002.326171875, + "learning_rate": 6.61792808084058e-05, + "loss": 0.4272, + "step": 94950 + }, + { + "epoch": 0.49014296696436405, + "grad_norm": 22271.25, + "learning_rate": 6.614471170853218e-05, + "loss": 0.4323, + "step": 95000 + }, + { + "epoch": 0.49014296696436405, + "eval_loss": 0.4187907576560974, + "eval_runtime": 3274.3922, + "eval_samples_per_second": 947.083, + "eval_steps_per_second": 1.85, + "step": 95000 + }, + { + "epoch": 0.49040093694697684, + "grad_norm": 20668.224609375, + "learning_rate": 6.611013399092406e-05, + "loss": 0.4285, + "step": 95050 + }, + { + "epoch": 0.4906589069295897, + "grad_norm": 20890.05078125, + "learning_rate": 6.607554767403838e-05, + "loss": 0.4333, + "step": 95100 + }, + { + "epoch": 0.4909168769122025, + "grad_norm": 22767.6875, + "learning_rate": 6.604095277633664e-05, + "loss": 0.4284, + "step": 95150 + }, + { + "epoch": 0.49117484689481533, + "grad_norm": 22603.083984375, + "learning_rate": 6.600634931628493e-05, + "loss": 0.4332, + "step": 95200 + }, + { + "epoch": 0.4914328168774281, + "grad_norm": 25005.8984375, + "learning_rate": 6.597173731235388e-05, + "loss": 0.4284, + "step": 95250 + }, + { + "epoch": 0.491690786860041, + "grad_norm": 23687.4765625, + "learning_rate": 6.593711678301874e-05, + "loss": 0.4316, + "step": 95300 + }, + { + "epoch": 0.49194875684265377, + "grad_norm": 19670.087890625, + "learning_rate": 6.590248774675926e-05, + "loss": 0.4326, + "step": 95350 + }, + { + "epoch": 0.4922067268252666, + "grad_norm": 23065.818359375, + "learning_rate": 6.586785022205977e-05, + "loss": 0.4316, + "step": 95400 + }, + { + "epoch": 0.4924646968078794, + "grad_norm": 21279.01953125, + "learning_rate": 6.583320422740909e-05, + "loss": 0.4278, + "step": 95450 + }, + { + "epoch": 0.49272266679049226, + "grad_norm": 19707.6328125, + "learning_rate": 6.579854978130057e-05, + "loss": 0.4272, + "step": 95500 + }, + { + "epoch": 0.4929806367731051, + "grad_norm": 22938.3515625, + "learning_rate": 6.57638869022321e-05, + "loss": 0.4316, + "step": 95550 + }, + { + "epoch": 0.4932386067557179, + "grad_norm": 24812.65625, + "learning_rate": 6.572921560870607e-05, + "loss": 0.4315, + "step": 95600 + }, + { + "epoch": 0.49349657673833075, + "grad_norm": 21462.873046875, + "learning_rate": 6.569453591922931e-05, + "loss": 0.4299, + "step": 95650 + }, + { + "epoch": 0.49375454672094354, + "grad_norm": 22590.384765625, + "learning_rate": 6.565984785231318e-05, + "loss": 0.4294, + "step": 95700 + }, + { + "epoch": 0.4940125167035564, + "grad_norm": 23677.619140625, + "learning_rate": 6.56251514264735e-05, + "loss": 0.4379, + "step": 95750 + }, + { + "epoch": 0.4942704866861692, + "grad_norm": 22078.87109375, + "learning_rate": 6.559044666023057e-05, + "loss": 0.4276, + "step": 95800 + }, + { + "epoch": 0.49452845666878203, + "grad_norm": 22440.369140625, + "learning_rate": 6.55557335721091e-05, + "loss": 0.4279, + "step": 95850 + }, + { + "epoch": 0.49478642665139483, + "grad_norm": 24544.12109375, + "learning_rate": 6.552101218063826e-05, + "loss": 0.4305, + "step": 95900 + }, + { + "epoch": 0.4950443966340077, + "grad_norm": 21647.107421875, + "learning_rate": 6.548628250435167e-05, + "loss": 0.4328, + "step": 95950 + }, + { + "epoch": 0.49530236661662047, + "grad_norm": 21392.28125, + "learning_rate": 6.545154456178735e-05, + "loss": 0.4299, + "step": 96000 + }, + { + "epoch": 0.4955603365992333, + "grad_norm": 19458.55078125, + "learning_rate": 6.541679837148775e-05, + "loss": 0.4375, + "step": 96050 + }, + { + "epoch": 0.4958183065818461, + "grad_norm": 21774.14453125, + "learning_rate": 6.53820439519997e-05, + "loss": 0.4348, + "step": 96100 + }, + { + "epoch": 0.49607627656445896, + "grad_norm": 22902.63671875, + "learning_rate": 6.534728132187444e-05, + "loss": 0.4297, + "step": 96150 + }, + { + "epoch": 0.49633424654707176, + "grad_norm": 20869.306640625, + "learning_rate": 6.531251049966762e-05, + "loss": 0.4313, + "step": 96200 + }, + { + "epoch": 0.4965922165296846, + "grad_norm": 23554.537109375, + "learning_rate": 6.527773150393919e-05, + "loss": 0.4313, + "step": 96250 + }, + { + "epoch": 0.49685018651229745, + "grad_norm": 23000.92578125, + "learning_rate": 6.524294435325351e-05, + "loss": 0.4266, + "step": 96300 + }, + { + "epoch": 0.49710815649491025, + "grad_norm": 21331.72265625, + "learning_rate": 6.52081490661793e-05, + "loss": 0.4261, + "step": 96350 + }, + { + "epoch": 0.4973661264775231, + "grad_norm": 22540.75, + "learning_rate": 6.517334566128961e-05, + "loss": 0.4282, + "step": 96400 + }, + { + "epoch": 0.4976240964601359, + "grad_norm": 21733.560546875, + "learning_rate": 6.51385341571618e-05, + "loss": 0.43, + "step": 96450 + }, + { + "epoch": 0.49788206644274874, + "grad_norm": 23288.21875, + "learning_rate": 6.510371457237765e-05, + "loss": 0.4306, + "step": 96500 + }, + { + "epoch": 0.49814003642536153, + "grad_norm": 24475.9453125, + "learning_rate": 6.506888692552309e-05, + "loss": 0.4299, + "step": 96550 + }, + { + "epoch": 0.4983980064079744, + "grad_norm": 20756.5078125, + "learning_rate": 6.503405123518847e-05, + "loss": 0.4292, + "step": 96600 + }, + { + "epoch": 0.4986559763905872, + "grad_norm": 21059.365234375, + "learning_rate": 6.499920751996845e-05, + "loss": 0.4261, + "step": 96650 + }, + { + "epoch": 0.4989139463732, + "grad_norm": 22173.65625, + "learning_rate": 6.496435579846188e-05, + "loss": 0.4309, + "step": 96700 + }, + { + "epoch": 0.4991719163558128, + "grad_norm": 23941.49609375, + "learning_rate": 6.492949608927196e-05, + "loss": 0.4355, + "step": 96750 + }, + { + "epoch": 0.49942988633842567, + "grad_norm": 22027.400390625, + "learning_rate": 6.489462841100611e-05, + "loss": 0.433, + "step": 96800 + }, + { + "epoch": 0.49968785632103846, + "grad_norm": 21414.77734375, + "learning_rate": 6.485975278227605e-05, + "loss": 0.4291, + "step": 96850 + }, + { + "epoch": 0.4999458263036513, + "grad_norm": 23023.60546875, + "learning_rate": 6.482486922169767e-05, + "loss": 0.4309, + "step": 96900 + }, + { + "epoch": 0.5002037962862641, + "grad_norm": 23856.318359375, + "learning_rate": 6.478997774789119e-05, + "loss": 0.4314, + "step": 96950 + }, + { + "epoch": 0.500461766268877, + "grad_norm": 21834.822265625, + "learning_rate": 6.475507837948096e-05, + "loss": 0.4319, + "step": 97000 + }, + { + "epoch": 0.5007197362514898, + "grad_norm": 22487.779296875, + "learning_rate": 6.472017113509561e-05, + "loss": 0.4281, + "step": 97050 + }, + { + "epoch": 0.5009777062341026, + "grad_norm": 23955.73046875, + "learning_rate": 6.468525603336796e-05, + "loss": 0.4324, + "step": 97100 + }, + { + "epoch": 0.5012356762167154, + "grad_norm": 23631.203125, + "learning_rate": 6.4650333092935e-05, + "loss": 0.4333, + "step": 97150 + }, + { + "epoch": 0.5014936461993282, + "grad_norm": 21347.26953125, + "learning_rate": 6.461540233243792e-05, + "loss": 0.421, + "step": 97200 + }, + { + "epoch": 0.5017516161819411, + "grad_norm": 23590.9140625, + "learning_rate": 6.458046377052209e-05, + "loss": 0.4347, + "step": 97250 + }, + { + "epoch": 0.5020095861645539, + "grad_norm": 23192.708984375, + "learning_rate": 6.454551742583703e-05, + "loss": 0.4363, + "step": 97300 + }, + { + "epoch": 0.5022675561471667, + "grad_norm": 23588.974609375, + "learning_rate": 6.451056331703643e-05, + "loss": 0.4268, + "step": 97350 + }, + { + "epoch": 0.5025255261297795, + "grad_norm": 19536.3046875, + "learning_rate": 6.44756014627781e-05, + "loss": 0.4268, + "step": 97400 + }, + { + "epoch": 0.5027834961123924, + "grad_norm": 20248.345703125, + "learning_rate": 6.444063188172401e-05, + "loss": 0.4286, + "step": 97450 + }, + { + "epoch": 0.5030414660950052, + "grad_norm": 21598.1171875, + "learning_rate": 6.440565459254027e-05, + "loss": 0.4302, + "step": 97500 + }, + { + "epoch": 0.503299436077618, + "grad_norm": 25492.541015625, + "learning_rate": 6.437066961389704e-05, + "loss": 0.4223, + "step": 97550 + }, + { + "epoch": 0.5035574060602308, + "grad_norm": 22227.8125, + "learning_rate": 6.433567696446865e-05, + "loss": 0.4194, + "step": 97600 + }, + { + "epoch": 0.5038153760428437, + "grad_norm": 23799.134765625, + "learning_rate": 6.430067666293348e-05, + "loss": 0.4239, + "step": 97650 + }, + { + "epoch": 0.5040733460254565, + "grad_norm": 25147.080078125, + "learning_rate": 6.426566872797403e-05, + "loss": 0.4369, + "step": 97700 + }, + { + "epoch": 0.5043313160080694, + "grad_norm": 22497.68359375, + "learning_rate": 6.423065317827686e-05, + "loss": 0.4332, + "step": 97750 + }, + { + "epoch": 0.5045892859906821, + "grad_norm": 23273.966796875, + "learning_rate": 6.419563003253258e-05, + "loss": 0.4331, + "step": 97800 + }, + { + "epoch": 0.5048472559732949, + "grad_norm": 21943.7734375, + "learning_rate": 6.416059930943585e-05, + "loss": 0.4331, + "step": 97850 + }, + { + "epoch": 0.5051052259559078, + "grad_norm": 23134.685546875, + "learning_rate": 6.412556102768544e-05, + "loss": 0.4283, + "step": 97900 + }, + { + "epoch": 0.5053631959385206, + "grad_norm": 21504.177734375, + "learning_rate": 6.409051520598405e-05, + "loss": 0.4319, + "step": 97950 + }, + { + "epoch": 0.5056211659211334, + "grad_norm": 25481.029296875, + "learning_rate": 6.405546186303852e-05, + "loss": 0.4268, + "step": 98000 + }, + { + "epoch": 0.5058791359037462, + "grad_norm": 21170.70703125, + "learning_rate": 6.402040101755961e-05, + "loss": 0.4253, + "step": 98050 + }, + { + "epoch": 0.5061371058863591, + "grad_norm": 20005.333984375, + "learning_rate": 6.398533268826212e-05, + "loss": 0.4267, + "step": 98100 + }, + { + "epoch": 0.5063950758689719, + "grad_norm": 20913.32421875, + "learning_rate": 6.395025689386485e-05, + "loss": 0.4245, + "step": 98150 + }, + { + "epoch": 0.5066530458515847, + "grad_norm": 24310.720703125, + "learning_rate": 6.391517365309059e-05, + "loss": 0.4246, + "step": 98200 + }, + { + "epoch": 0.5069110158341975, + "grad_norm": 21981.455078125, + "learning_rate": 6.388008298466607e-05, + "loss": 0.4286, + "step": 98250 + }, + { + "epoch": 0.5071689858168104, + "grad_norm": 23764.30078125, + "learning_rate": 6.384498490732202e-05, + "loss": 0.4282, + "step": 98300 + }, + { + "epoch": 0.5074269557994232, + "grad_norm": 20518.447265625, + "learning_rate": 6.380987943979314e-05, + "loss": 0.4333, + "step": 98350 + }, + { + "epoch": 0.5076849257820361, + "grad_norm": 23327.80859375, + "learning_rate": 6.377476660081803e-05, + "loss": 0.4255, + "step": 98400 + }, + { + "epoch": 0.5079428957646488, + "grad_norm": 19600.84375, + "learning_rate": 6.373964640913924e-05, + "loss": 0.4277, + "step": 98450 + }, + { + "epoch": 0.5082008657472616, + "grad_norm": 23252.146484375, + "learning_rate": 6.370451888350322e-05, + "loss": 0.4311, + "step": 98500 + }, + { + "epoch": 0.5084588357298745, + "grad_norm": 21930.736328125, + "learning_rate": 6.366938404266041e-05, + "loss": 0.4329, + "step": 98550 + }, + { + "epoch": 0.5087168057124873, + "grad_norm": 21249.69140625, + "learning_rate": 6.36342419053651e-05, + "loss": 0.4257, + "step": 98600 + }, + { + "epoch": 0.5089747756951001, + "grad_norm": 21809.4609375, + "learning_rate": 6.359909249037548e-05, + "loss": 0.431, + "step": 98650 + }, + { + "epoch": 0.5092327456777129, + "grad_norm": 23142.6796875, + "learning_rate": 6.356393581645359e-05, + "loss": 0.4329, + "step": 98700 + }, + { + "epoch": 0.5094907156603258, + "grad_norm": 21783.541015625, + "learning_rate": 6.352877190236542e-05, + "loss": 0.4362, + "step": 98750 + }, + { + "epoch": 0.5097486856429386, + "grad_norm": 22534.080078125, + "learning_rate": 6.349360076688079e-05, + "loss": 0.4302, + "step": 98800 + }, + { + "epoch": 0.5100066556255514, + "grad_norm": 22630.03515625, + "learning_rate": 6.345842242877336e-05, + "loss": 0.4314, + "step": 98850 + }, + { + "epoch": 0.5102646256081642, + "grad_norm": 23446.0390625, + "learning_rate": 6.342323690682064e-05, + "loss": 0.428, + "step": 98900 + }, + { + "epoch": 0.5105225955907771, + "grad_norm": 25644.2734375, + "learning_rate": 6.338804421980398e-05, + "loss": 0.4219, + "step": 98950 + }, + { + "epoch": 0.5107805655733899, + "grad_norm": 23159.580078125, + "learning_rate": 6.335284438650856e-05, + "loss": 0.434, + "step": 99000 + }, + { + "epoch": 0.5110385355560026, + "grad_norm": 23536.556640625, + "learning_rate": 6.331763742572337e-05, + "loss": 0.4293, + "step": 99050 + }, + { + "epoch": 0.5112965055386155, + "grad_norm": 23240.662109375, + "learning_rate": 6.328242335624121e-05, + "loss": 0.434, + "step": 99100 + }, + { + "epoch": 0.5115544755212283, + "grad_norm": 22368.94921875, + "learning_rate": 6.324720219685866e-05, + "loss": 0.4295, + "step": 99150 + }, + { + "epoch": 0.5118124455038412, + "grad_norm": 23257.068359375, + "learning_rate": 6.321197396637608e-05, + "loss": 0.4198, + "step": 99200 + }, + { + "epoch": 0.512070415486454, + "grad_norm": 21806.6953125, + "learning_rate": 6.317673868359765e-05, + "loss": 0.4241, + "step": 99250 + }, + { + "epoch": 0.5123283854690668, + "grad_norm": 24117.416015625, + "learning_rate": 6.314149636733125e-05, + "loss": 0.4261, + "step": 99300 + }, + { + "epoch": 0.5125863554516796, + "grad_norm": 25262.626953125, + "learning_rate": 6.310624703638858e-05, + "loss": 0.4234, + "step": 99350 + }, + { + "epoch": 0.5128443254342925, + "grad_norm": 22739.923828125, + "learning_rate": 6.3070990709585e-05, + "loss": 0.4299, + "step": 99400 + }, + { + "epoch": 0.5131022954169053, + "grad_norm": 20651.646484375, + "learning_rate": 6.303572740573971e-05, + "loss": 0.4307, + "step": 99450 + }, + { + "epoch": 0.5133602653995181, + "grad_norm": 22125.037109375, + "learning_rate": 6.300045714367555e-05, + "loss": 0.4216, + "step": 99500 + }, + { + "epoch": 0.5136182353821309, + "grad_norm": 22210.080078125, + "learning_rate": 6.29651799422191e-05, + "loss": 0.429, + "step": 99550 + }, + { + "epoch": 0.5138762053647438, + "grad_norm": 23850.673828125, + "learning_rate": 6.292989582020063e-05, + "loss": 0.4337, + "step": 99600 + }, + { + "epoch": 0.5141341753473566, + "grad_norm": 21346.251953125, + "learning_rate": 6.289460479645417e-05, + "loss": 0.4352, + "step": 99650 + }, + { + "epoch": 0.5143921453299694, + "grad_norm": 22687.080078125, + "learning_rate": 6.285930688981735e-05, + "loss": 0.433, + "step": 99700 + }, + { + "epoch": 0.5146501153125822, + "grad_norm": 20447.666015625, + "learning_rate": 6.282400211913154e-05, + "loss": 0.4288, + "step": 99750 + }, + { + "epoch": 0.514908085295195, + "grad_norm": 21768.51953125, + "learning_rate": 6.278869050324168e-05, + "loss": 0.4363, + "step": 99800 + }, + { + "epoch": 0.5151660552778079, + "grad_norm": 21896.47265625, + "learning_rate": 6.27533720609965e-05, + "loss": 0.4307, + "step": 99850 + }, + { + "epoch": 0.5154240252604207, + "grad_norm": 22967.384765625, + "learning_rate": 6.271804681124827e-05, + "loss": 0.4295, + "step": 99900 + }, + { + "epoch": 0.5156819952430335, + "grad_norm": 20233.869140625, + "learning_rate": 6.268271477285292e-05, + "loss": 0.4329, + "step": 99950 + }, + { + "epoch": 0.5159399652256463, + "grad_norm": 20550.060546875, + "learning_rate": 6.264737596466998e-05, + "loss": 0.4267, + "step": 100000 + }, + { + "epoch": 0.5159399652256463, + "eval_loss": 0.4161209166049957, + "eval_runtime": 2887.0736, + "eval_samples_per_second": 1074.14, + "eval_steps_per_second": 2.098, + "step": 100000 + }, + { + "epoch": 0.5161979352082592, + "grad_norm": 22327.767578125, + "learning_rate": 6.261203040556267e-05, + "loss": 0.4272, + "step": 100050 + }, + { + "epoch": 0.516455905190872, + "grad_norm": 22512.1640625, + "learning_rate": 6.257667811439776e-05, + "loss": 0.4267, + "step": 100100 + }, + { + "epoch": 0.5167138751734848, + "grad_norm": 22710.8828125, + "learning_rate": 6.254131911004561e-05, + "loss": 0.42, + "step": 100150 + }, + { + "epoch": 0.5169718451560976, + "grad_norm": 21731.365234375, + "learning_rate": 6.250595341138014e-05, + "loss": 0.4259, + "step": 100200 + }, + { + "epoch": 0.5172298151387105, + "grad_norm": 21478.970703125, + "learning_rate": 6.247058103727892e-05, + "loss": 0.4217, + "step": 100250 + }, + { + "epoch": 0.5174877851213233, + "grad_norm": 22431.939453125, + "learning_rate": 6.243520200662303e-05, + "loss": 0.4272, + "step": 100300 + }, + { + "epoch": 0.5177457551039361, + "grad_norm": 22137.5078125, + "learning_rate": 6.239981633829709e-05, + "loss": 0.4301, + "step": 100350 + }, + { + "epoch": 0.5180037250865489, + "grad_norm": 22802.220703125, + "learning_rate": 6.23644240511893e-05, + "loss": 0.4346, + "step": 100400 + }, + { + "epoch": 0.5182616950691618, + "grad_norm": 20567.640625, + "learning_rate": 6.232902516419137e-05, + "loss": 0.4271, + "step": 100450 + }, + { + "epoch": 0.5185196650517746, + "grad_norm": 20855.70703125, + "learning_rate": 6.229361969619855e-05, + "loss": 0.4237, + "step": 100500 + }, + { + "epoch": 0.5187776350343875, + "grad_norm": 22052.44921875, + "learning_rate": 6.225820766610958e-05, + "loss": 0.4324, + "step": 100550 + }, + { + "epoch": 0.5190356050170002, + "grad_norm": 21984.818359375, + "learning_rate": 6.222278909282674e-05, + "loss": 0.4315, + "step": 100600 + }, + { + "epoch": 0.519293574999613, + "grad_norm": 22044.8359375, + "learning_rate": 6.218736399525575e-05, + "loss": 0.4324, + "step": 100650 + }, + { + "epoch": 0.5195515449822259, + "grad_norm": 22661.78515625, + "learning_rate": 6.215193239230586e-05, + "loss": 0.4273, + "step": 100700 + }, + { + "epoch": 0.5198095149648387, + "grad_norm": 22091.01171875, + "learning_rate": 6.211649430288976e-05, + "loss": 0.4252, + "step": 100750 + }, + { + "epoch": 0.5200674849474515, + "grad_norm": 22164.376953125, + "learning_rate": 6.208104974592364e-05, + "loss": 0.4272, + "step": 100800 + }, + { + "epoch": 0.5203254549300643, + "grad_norm": 23387.287109375, + "learning_rate": 6.20455987403271e-05, + "loss": 0.4281, + "step": 100850 + }, + { + "epoch": 0.5205834249126772, + "grad_norm": 22505.326171875, + "learning_rate": 6.201014130502317e-05, + "loss": 0.4285, + "step": 100900 + }, + { + "epoch": 0.52084139489529, + "grad_norm": 21150.341796875, + "learning_rate": 6.19746774589384e-05, + "loss": 0.4274, + "step": 100950 + }, + { + "epoch": 0.5210993648779028, + "grad_norm": 23076.650390625, + "learning_rate": 6.193920722100268e-05, + "loss": 0.4289, + "step": 101000 + }, + { + "epoch": 0.5213573348605156, + "grad_norm": 20890.41796875, + "learning_rate": 6.190373061014932e-05, + "loss": 0.4305, + "step": 101050 + }, + { + "epoch": 0.5216153048431285, + "grad_norm": 22231.6328125, + "learning_rate": 6.186824764531507e-05, + "loss": 0.4304, + "step": 101100 + }, + { + "epoch": 0.5218732748257413, + "grad_norm": 22094.197265625, + "learning_rate": 6.183275834544005e-05, + "loss": 0.4279, + "step": 101150 + }, + { + "epoch": 0.522131244808354, + "grad_norm": 23188.353515625, + "learning_rate": 6.179726272946774e-05, + "loss": 0.4272, + "step": 101200 + }, + { + "epoch": 0.5223892147909669, + "grad_norm": 22908.5, + "learning_rate": 6.176176081634504e-05, + "loss": 0.4229, + "step": 101250 + }, + { + "epoch": 0.5226471847735797, + "grad_norm": 21536.37109375, + "learning_rate": 6.172625262502215e-05, + "loss": 0.4267, + "step": 101300 + }, + { + "epoch": 0.5229051547561926, + "grad_norm": 22923.38671875, + "learning_rate": 6.169073817445268e-05, + "loss": 0.4256, + "step": 101350 + }, + { + "epoch": 0.5231631247388054, + "grad_norm": 22802.669921875, + "learning_rate": 6.165521748359356e-05, + "loss": 0.4241, + "step": 101400 + }, + { + "epoch": 0.5234210947214182, + "grad_norm": 22852.59765625, + "learning_rate": 6.161969057140504e-05, + "loss": 0.4275, + "step": 101450 + }, + { + "epoch": 0.523679064704031, + "grad_norm": 27410.056640625, + "learning_rate": 6.158415745685068e-05, + "loss": 0.4316, + "step": 101500 + }, + { + "epoch": 0.5239370346866439, + "grad_norm": 21783.482421875, + "learning_rate": 6.15486181588974e-05, + "loss": 0.4235, + "step": 101550 + }, + { + "epoch": 0.5241950046692567, + "grad_norm": 21013.259765625, + "learning_rate": 6.151307269651536e-05, + "loss": 0.426, + "step": 101600 + }, + { + "epoch": 0.5244529746518695, + "grad_norm": 23852.673828125, + "learning_rate": 6.147752108867807e-05, + "loss": 0.4226, + "step": 101650 + }, + { + "epoch": 0.5247109446344823, + "grad_norm": 24846.427734375, + "learning_rate": 6.144196335436225e-05, + "loss": 0.4277, + "step": 101700 + }, + { + "epoch": 0.5249689146170952, + "grad_norm": 21197.177734375, + "learning_rate": 6.140639951254796e-05, + "loss": 0.4247, + "step": 101750 + }, + { + "epoch": 0.525226884599708, + "grad_norm": 24620.37890625, + "learning_rate": 6.137082958221848e-05, + "loss": 0.429, + "step": 101800 + }, + { + "epoch": 0.5254848545823207, + "grad_norm": 22811.875, + "learning_rate": 6.133525358236036e-05, + "loss": 0.4274, + "step": 101850 + }, + { + "epoch": 0.5257428245649336, + "grad_norm": 20224.125, + "learning_rate": 6.129967153196336e-05, + "loss": 0.4338, + "step": 101900 + }, + { + "epoch": 0.5260007945475464, + "grad_norm": 21489.734375, + "learning_rate": 6.126408345002052e-05, + "loss": 0.4333, + "step": 101950 + }, + { + "epoch": 0.5262587645301593, + "grad_norm": 21771.20703125, + "learning_rate": 6.122848935552804e-05, + "loss": 0.4258, + "step": 102000 + }, + { + "epoch": 0.5265167345127721, + "grad_norm": 23362.43359375, + "learning_rate": 6.119288926748537e-05, + "loss": 0.4234, + "step": 102050 + }, + { + "epoch": 0.5267747044953849, + "grad_norm": 20869.46484375, + "learning_rate": 6.115728320489516e-05, + "loss": 0.4233, + "step": 102100 + }, + { + "epoch": 0.5270326744779977, + "grad_norm": 21146.568359375, + "learning_rate": 6.11216711867632e-05, + "loss": 0.4243, + "step": 102150 + }, + { + "epoch": 0.5272906444606106, + "grad_norm": 24031.97265625, + "learning_rate": 6.108605323209853e-05, + "loss": 0.4334, + "step": 102200 + }, + { + "epoch": 0.5275486144432234, + "grad_norm": 23461.306640625, + "learning_rate": 6.10504293599133e-05, + "loss": 0.4289, + "step": 102250 + }, + { + "epoch": 0.5278065844258362, + "grad_norm": 21013.169921875, + "learning_rate": 6.101479958922287e-05, + "loss": 0.4334, + "step": 102300 + }, + { + "epoch": 0.528064554408449, + "grad_norm": 23328.306640625, + "learning_rate": 6.0979163939045716e-05, + "loss": 0.4285, + "step": 102350 + }, + { + "epoch": 0.5283225243910619, + "grad_norm": 21542.20703125, + "learning_rate": 6.094352242840343e-05, + "loss": 0.4321, + "step": 102400 + }, + { + "epoch": 0.5285804943736747, + "grad_norm": 20556.357421875, + "learning_rate": 6.09078750763208e-05, + "loss": 0.4255, + "step": 102450 + }, + { + "epoch": 0.5288384643562875, + "grad_norm": 24925.21875, + "learning_rate": 6.0872221901825666e-05, + "loss": 0.4225, + "step": 102500 + }, + { + "epoch": 0.5290964343389003, + "grad_norm": 22750.419921875, + "learning_rate": 6.0836562923949016e-05, + "loss": 0.4287, + "step": 102550 + }, + { + "epoch": 0.5293544043215132, + "grad_norm": 21514.8984375, + "learning_rate": 6.080089816172489e-05, + "loss": 0.4254, + "step": 102600 + }, + { + "epoch": 0.529612374304126, + "grad_norm": 23347.03125, + "learning_rate": 6.07652276341905e-05, + "loss": 0.4346, + "step": 102650 + }, + { + "epoch": 0.5298703442867388, + "grad_norm": 23180.916015625, + "learning_rate": 6.072955136038604e-05, + "loss": 0.4244, + "step": 102700 + }, + { + "epoch": 0.5301283142693516, + "grad_norm": 20701.431640625, + "learning_rate": 6.069386935935484e-05, + "loss": 0.43, + "step": 102750 + }, + { + "epoch": 0.5303862842519644, + "grad_norm": 23350.99609375, + "learning_rate": 6.0658181650143245e-05, + "loss": 0.4217, + "step": 102800 + }, + { + "epoch": 0.5306442542345773, + "grad_norm": 21068.111328125, + "learning_rate": 6.062248825180066e-05, + "loss": 0.4278, + "step": 102850 + }, + { + "epoch": 0.5309022242171901, + "grad_norm": 23415.25, + "learning_rate": 6.0586789183379554e-05, + "loss": 0.4331, + "step": 102900 + }, + { + "epoch": 0.5311601941998029, + "grad_norm": 22186.048828125, + "learning_rate": 6.055108446393538e-05, + "loss": 0.4327, + "step": 102950 + }, + { + "epoch": 0.5314181641824157, + "grad_norm": 20644.166015625, + "learning_rate": 6.051537411252662e-05, + "loss": 0.4264, + "step": 103000 + }, + { + "epoch": 0.5316761341650286, + "grad_norm": 21755.712890625, + "learning_rate": 6.047965814821478e-05, + "loss": 0.4253, + "step": 103050 + }, + { + "epoch": 0.5319341041476414, + "grad_norm": 22319.177734375, + "learning_rate": 6.044393659006435e-05, + "loss": 0.4238, + "step": 103100 + }, + { + "epoch": 0.5321920741302542, + "grad_norm": 22544.064453125, + "learning_rate": 6.040820945714281e-05, + "loss": 0.4306, + "step": 103150 + }, + { + "epoch": 0.532450044112867, + "grad_norm": 21484.53125, + "learning_rate": 6.037247676852059e-05, + "loss": 0.4254, + "step": 103200 + }, + { + "epoch": 0.5327080140954799, + "grad_norm": 23923.201171875, + "learning_rate": 6.033673854327114e-05, + "loss": 0.4258, + "step": 103250 + }, + { + "epoch": 0.5329659840780927, + "grad_norm": 20412.08984375, + "learning_rate": 6.03009948004708e-05, + "loss": 0.4286, + "step": 103300 + }, + { + "epoch": 0.5332239540607056, + "grad_norm": 19932.908203125, + "learning_rate": 6.026524555919891e-05, + "loss": 0.4367, + "step": 103350 + }, + { + "epoch": 0.5334819240433183, + "grad_norm": 21761.033203125, + "learning_rate": 6.022949083853772e-05, + "loss": 0.4272, + "step": 103400 + }, + { + "epoch": 0.5337398940259311, + "grad_norm": 23392.29296875, + "learning_rate": 6.019373065757239e-05, + "loss": 0.4274, + "step": 103450 + }, + { + "epoch": 0.533997864008544, + "grad_norm": 26151.69921875, + "learning_rate": 6.015796503539103e-05, + "loss": 0.4189, + "step": 103500 + }, + { + "epoch": 0.5342558339911568, + "grad_norm": 22503.529296875, + "learning_rate": 6.012219399108463e-05, + "loss": 0.428, + "step": 103550 + }, + { + "epoch": 0.5345138039737696, + "grad_norm": 25906.685546875, + "learning_rate": 6.008641754374709e-05, + "loss": 0.4287, + "step": 103600 + }, + { + "epoch": 0.5347717739563824, + "grad_norm": 23784.685546875, + "learning_rate": 6.005063571247517e-05, + "loss": 0.4276, + "step": 103650 + }, + { + "epoch": 0.5350297439389953, + "grad_norm": 21574.30078125, + "learning_rate": 6.0014848516368515e-05, + "loss": 0.4344, + "step": 103700 + }, + { + "epoch": 0.5352877139216081, + "grad_norm": 22296.921875, + "learning_rate": 5.9979055974529675e-05, + "loss": 0.4322, + "step": 103750 + }, + { + "epoch": 0.5355456839042209, + "grad_norm": 21478.611328125, + "learning_rate": 5.994325810606397e-05, + "loss": 0.429, + "step": 103800 + }, + { + "epoch": 0.5358036538868337, + "grad_norm": 22572.37109375, + "learning_rate": 5.9907454930079645e-05, + "loss": 0.4281, + "step": 103850 + }, + { + "epoch": 0.5360616238694466, + "grad_norm": 23416.80859375, + "learning_rate": 5.98716464656877e-05, + "loss": 0.4266, + "step": 103900 + }, + { + "epoch": 0.5363195938520594, + "grad_norm": 23470.626953125, + "learning_rate": 5.983583273200204e-05, + "loss": 0.426, + "step": 103950 + }, + { + "epoch": 0.5365775638346721, + "grad_norm": 24464.38671875, + "learning_rate": 5.980001374813933e-05, + "loss": 0.4218, + "step": 104000 + }, + { + "epoch": 0.536835533817285, + "grad_norm": 23835.29296875, + "learning_rate": 5.976418953321904e-05, + "loss": 0.4261, + "step": 104050 + }, + { + "epoch": 0.5370935037998978, + "grad_norm": 23344.654296875, + "learning_rate": 5.972836010636346e-05, + "loss": 0.4292, + "step": 104100 + }, + { + "epoch": 0.5373514737825107, + "grad_norm": 23925.935546875, + "learning_rate": 5.9692525486697616e-05, + "loss": 0.4323, + "step": 104150 + }, + { + "epoch": 0.5376094437651235, + "grad_norm": 23155.76953125, + "learning_rate": 5.965668569334937e-05, + "loss": 0.428, + "step": 104200 + }, + { + "epoch": 0.5378674137477363, + "grad_norm": 22334.19921875, + "learning_rate": 5.962084074544928e-05, + "loss": 0.4129, + "step": 104250 + }, + { + "epoch": 0.5381253837303491, + "grad_norm": 20239.66796875, + "learning_rate": 5.95849906621307e-05, + "loss": 0.4335, + "step": 104300 + }, + { + "epoch": 0.538383353712962, + "grad_norm": 22626.19140625, + "learning_rate": 5.9549135462529704e-05, + "loss": 0.4274, + "step": 104350 + }, + { + "epoch": 0.5386413236955748, + "grad_norm": 21798.65625, + "learning_rate": 5.951327516578512e-05, + "loss": 0.4258, + "step": 104400 + }, + { + "epoch": 0.5388992936781876, + "grad_norm": 21796.7421875, + "learning_rate": 5.947740979103845e-05, + "loss": 0.4263, + "step": 104450 + }, + { + "epoch": 0.5391572636608004, + "grad_norm": 22380.21484375, + "learning_rate": 5.944153935743396e-05, + "loss": 0.4218, + "step": 104500 + }, + { + "epoch": 0.5394152336434133, + "grad_norm": 22526.4296875, + "learning_rate": 5.940566388411859e-05, + "loss": 0.4233, + "step": 104550 + }, + { + "epoch": 0.5396732036260261, + "grad_norm": 22876.5703125, + "learning_rate": 5.936978339024195e-05, + "loss": 0.4296, + "step": 104600 + }, + { + "epoch": 0.5399311736086388, + "grad_norm": 22592.654296875, + "learning_rate": 5.9333897894956394e-05, + "loss": 0.4287, + "step": 104650 + }, + { + "epoch": 0.5401891435912517, + "grad_norm": 21235.43359375, + "learning_rate": 5.929800741741688e-05, + "loss": 0.4269, + "step": 104700 + }, + { + "epoch": 0.5404471135738645, + "grad_norm": 22049.05859375, + "learning_rate": 5.926211197678104e-05, + "loss": 0.4266, + "step": 104750 + }, + { + "epoch": 0.5407050835564774, + "grad_norm": 23252.845703125, + "learning_rate": 5.922621159220918e-05, + "loss": 0.4223, + "step": 104800 + }, + { + "epoch": 0.5409630535390902, + "grad_norm": 20577.1796875, + "learning_rate": 5.919030628286424e-05, + "loss": 0.4302, + "step": 104850 + }, + { + "epoch": 0.541221023521703, + "grad_norm": 24854.8671875, + "learning_rate": 5.915439606791174e-05, + "loss": 0.4212, + "step": 104900 + }, + { + "epoch": 0.5414789935043158, + "grad_norm": 22561.552734375, + "learning_rate": 5.9118480966519906e-05, + "loss": 0.4196, + "step": 104950 + }, + { + "epoch": 0.5417369634869287, + "grad_norm": 23885.4765625, + "learning_rate": 5.9082560997859496e-05, + "loss": 0.421, + "step": 105000 + }, + { + "epoch": 0.5417369634869287, + "eval_loss": 0.4132173955440521, + "eval_runtime": 2876.3365, + "eval_samples_per_second": 1078.149, + "eval_steps_per_second": 2.106, + "step": 105000 + }, + { + "epoch": 0.5419949334695415, + "grad_norm": 20974.994140625, + "learning_rate": 5.90466361811039e-05, + "loss": 0.4228, + "step": 105050 + }, + { + "epoch": 0.5422529034521543, + "grad_norm": 24338.412109375, + "learning_rate": 5.9010706535429086e-05, + "loss": 0.4215, + "step": 105100 + }, + { + "epoch": 0.5425108734347671, + "grad_norm": 20734.796875, + "learning_rate": 5.8974772080013605e-05, + "loss": 0.4319, + "step": 105150 + }, + { + "epoch": 0.54276884341738, + "grad_norm": 21026.123046875, + "learning_rate": 5.8938832834038574e-05, + "loss": 0.4318, + "step": 105200 + }, + { + "epoch": 0.5430268133999928, + "grad_norm": 20023.287109375, + "learning_rate": 5.890288881668766e-05, + "loss": 0.4306, + "step": 105250 + }, + { + "epoch": 0.5432847833826056, + "grad_norm": 23171.42578125, + "learning_rate": 5.88669400471471e-05, + "loss": 0.4237, + "step": 105300 + }, + { + "epoch": 0.5435427533652184, + "grad_norm": 21692.109375, + "learning_rate": 5.8830986544605635e-05, + "loss": 0.4261, + "step": 105350 + }, + { + "epoch": 0.5438007233478312, + "grad_norm": 22358.216796875, + "learning_rate": 5.8795028328254566e-05, + "loss": 0.4204, + "step": 105400 + }, + { + "epoch": 0.5440586933304441, + "grad_norm": 22529.650390625, + "learning_rate": 5.875906541728766e-05, + "loss": 0.422, + "step": 105450 + }, + { + "epoch": 0.544316663313057, + "grad_norm": 18307.05859375, + "learning_rate": 5.8723097830901264e-05, + "loss": 0.4236, + "step": 105500 + }, + { + "epoch": 0.5445746332956697, + "grad_norm": 22356.583984375, + "learning_rate": 5.8687125588294154e-05, + "loss": 0.4213, + "step": 105550 + }, + { + "epoch": 0.5448326032782825, + "grad_norm": 21446.732421875, + "learning_rate": 5.8651148708667625e-05, + "loss": 0.4216, + "step": 105600 + }, + { + "epoch": 0.5450905732608954, + "grad_norm": 24014.49609375, + "learning_rate": 5.8615167211225416e-05, + "loss": 0.4283, + "step": 105650 + }, + { + "epoch": 0.5453485432435082, + "grad_norm": 22394.306640625, + "learning_rate": 5.8579181115173785e-05, + "loss": 0.4242, + "step": 105700 + }, + { + "epoch": 0.545606513226121, + "grad_norm": 25348.26171875, + "learning_rate": 5.8543190439721405e-05, + "loss": 0.4234, + "step": 105750 + }, + { + "epoch": 0.5458644832087338, + "grad_norm": 22638.720703125, + "learning_rate": 5.850719520407939e-05, + "loss": 0.4269, + "step": 105800 + }, + { + "epoch": 0.5461224531913467, + "grad_norm": 22702.841796875, + "learning_rate": 5.847119542746131e-05, + "loss": 0.4201, + "step": 105850 + }, + { + "epoch": 0.5463804231739595, + "grad_norm": 22299.849609375, + "learning_rate": 5.843519112908315e-05, + "loss": 0.4243, + "step": 105900 + }, + { + "epoch": 0.5466383931565723, + "grad_norm": 21965.283203125, + "learning_rate": 5.8399182328163304e-05, + "loss": 0.4209, + "step": 105950 + }, + { + "epoch": 0.5468963631391851, + "grad_norm": 22101.755859375, + "learning_rate": 5.836316904392256e-05, + "loss": 0.4254, + "step": 106000 + }, + { + "epoch": 0.547154333121798, + "grad_norm": 22735.970703125, + "learning_rate": 5.8327151295584126e-05, + "loss": 0.4251, + "step": 106050 + }, + { + "epoch": 0.5474123031044108, + "grad_norm": 24287.58203125, + "learning_rate": 5.829112910237359e-05, + "loss": 0.427, + "step": 106100 + }, + { + "epoch": 0.5476702730870235, + "grad_norm": 22509.02734375, + "learning_rate": 5.825510248351889e-05, + "loss": 0.4209, + "step": 106150 + }, + { + "epoch": 0.5479282430696364, + "grad_norm": 22325.32421875, + "learning_rate": 5.821907145825032e-05, + "loss": 0.4276, + "step": 106200 + }, + { + "epoch": 0.5481862130522492, + "grad_norm": 21362.255859375, + "learning_rate": 5.8183036045800556e-05, + "loss": 0.4273, + "step": 106250 + }, + { + "epoch": 0.5484441830348621, + "grad_norm": 22934.61328125, + "learning_rate": 5.814699626540461e-05, + "loss": 0.4318, + "step": 106300 + }, + { + "epoch": 0.5487021530174749, + "grad_norm": 23663.65625, + "learning_rate": 5.8110952136299814e-05, + "loss": 0.4246, + "step": 106350 + }, + { + "epoch": 0.5489601230000877, + "grad_norm": 20743.84765625, + "learning_rate": 5.807490367772584e-05, + "loss": 0.4289, + "step": 106400 + }, + { + "epoch": 0.5492180929827005, + "grad_norm": 20859.244140625, + "learning_rate": 5.8038850908924636e-05, + "loss": 0.4255, + "step": 106450 + }, + { + "epoch": 0.5494760629653134, + "grad_norm": 21824.990234375, + "learning_rate": 5.800279384914047e-05, + "loss": 0.4311, + "step": 106500 + }, + { + "epoch": 0.5497340329479262, + "grad_norm": 19514.681640625, + "learning_rate": 5.7966732517619926e-05, + "loss": 0.4311, + "step": 106550 + }, + { + "epoch": 0.549992002930539, + "grad_norm": 24263.765625, + "learning_rate": 5.7930666933611835e-05, + "loss": 0.4257, + "step": 106600 + }, + { + "epoch": 0.5502499729131518, + "grad_norm": 23152.279296875, + "learning_rate": 5.789459711636729e-05, + "loss": 0.4226, + "step": 106650 + }, + { + "epoch": 0.5505079428957647, + "grad_norm": 21756.8671875, + "learning_rate": 5.785852308513967e-05, + "loss": 0.4266, + "step": 106700 + }, + { + "epoch": 0.5507659128783775, + "grad_norm": 20913.3125, + "learning_rate": 5.78224448591846e-05, + "loss": 0.4228, + "step": 106750 + }, + { + "epoch": 0.5510238828609902, + "grad_norm": 24674.92578125, + "learning_rate": 5.778636245775996e-05, + "loss": 0.4246, + "step": 106800 + }, + { + "epoch": 0.5512818528436031, + "grad_norm": 24229.4296875, + "learning_rate": 5.775027590012579e-05, + "loss": 0.4244, + "step": 106850 + }, + { + "epoch": 0.5515398228262159, + "grad_norm": 21722.048828125, + "learning_rate": 5.771418520554443e-05, + "loss": 0.4264, + "step": 106900 + }, + { + "epoch": 0.5517977928088288, + "grad_norm": 22060.224609375, + "learning_rate": 5.7678090393280384e-05, + "loss": 0.4268, + "step": 106950 + }, + { + "epoch": 0.5520557627914416, + "grad_norm": 25690.306640625, + "learning_rate": 5.7641991482600366e-05, + "loss": 0.4298, + "step": 107000 + }, + { + "epoch": 0.5523137327740544, + "grad_norm": 24629.115234375, + "learning_rate": 5.7605888492773266e-05, + "loss": 0.4223, + "step": 107050 + }, + { + "epoch": 0.5525717027566672, + "grad_norm": 23552.78515625, + "learning_rate": 5.756978144307018e-05, + "loss": 0.4246, + "step": 107100 + }, + { + "epoch": 0.5528296727392801, + "grad_norm": 21611.703125, + "learning_rate": 5.753367035276431e-05, + "loss": 0.4173, + "step": 107150 + }, + { + "epoch": 0.5530876427218929, + "grad_norm": 24158.64453125, + "learning_rate": 5.749755524113111e-05, + "loss": 0.4211, + "step": 107200 + }, + { + "epoch": 0.5533456127045057, + "grad_norm": 23446.94140625, + "learning_rate": 5.746143612744811e-05, + "loss": 0.4262, + "step": 107250 + }, + { + "epoch": 0.5536035826871185, + "grad_norm": 21608.703125, + "learning_rate": 5.742531303099498e-05, + "loss": 0.424, + "step": 107300 + }, + { + "epoch": 0.5538615526697314, + "grad_norm": 25070.78125, + "learning_rate": 5.738918597105353e-05, + "loss": 0.4219, + "step": 107350 + }, + { + "epoch": 0.5541195226523442, + "grad_norm": 21161.5234375, + "learning_rate": 5.735305496690769e-05, + "loss": 0.4293, + "step": 107400 + }, + { + "epoch": 0.554377492634957, + "grad_norm": 23108.521484375, + "learning_rate": 5.7316920037843516e-05, + "loss": 0.427, + "step": 107450 + }, + { + "epoch": 0.5546354626175698, + "grad_norm": 22233.87890625, + "learning_rate": 5.728078120314909e-05, + "loss": 0.4204, + "step": 107500 + }, + { + "epoch": 0.5548934326001826, + "grad_norm": 24522.310546875, + "learning_rate": 5.724463848211464e-05, + "loss": 0.4257, + "step": 107550 + }, + { + "epoch": 0.5551514025827955, + "grad_norm": 22916.892578125, + "learning_rate": 5.720849189403244e-05, + "loss": 0.4251, + "step": 107600 + }, + { + "epoch": 0.5554093725654083, + "grad_norm": 20069.236328125, + "learning_rate": 5.7172341458196876e-05, + "loss": 0.424, + "step": 107650 + }, + { + "epoch": 0.5556673425480211, + "grad_norm": 25173.86328125, + "learning_rate": 5.713618719390432e-05, + "loss": 0.4294, + "step": 107700 + }, + { + "epoch": 0.5559253125306339, + "grad_norm": 21957.373046875, + "learning_rate": 5.710002912045323e-05, + "loss": 0.4239, + "step": 107750 + }, + { + "epoch": 0.5561832825132468, + "grad_norm": 20540.82421875, + "learning_rate": 5.706386725714407e-05, + "loss": 0.4259, + "step": 107800 + }, + { + "epoch": 0.5564412524958596, + "grad_norm": 22470.4921875, + "learning_rate": 5.702770162327936e-05, + "loss": 0.4273, + "step": 107850 + }, + { + "epoch": 0.5566992224784724, + "grad_norm": 21721.197265625, + "learning_rate": 5.69915322381636e-05, + "loss": 0.4233, + "step": 107900 + }, + { + "epoch": 0.5569571924610852, + "grad_norm": 21666.955078125, + "learning_rate": 5.6955359121103324e-05, + "loss": 0.4283, + "step": 107950 + }, + { + "epoch": 0.5572151624436981, + "grad_norm": 20970.9296875, + "learning_rate": 5.6919182291407014e-05, + "loss": 0.4275, + "step": 108000 + }, + { + "epoch": 0.5574731324263109, + "grad_norm": 22353.306640625, + "learning_rate": 5.688300176838518e-05, + "loss": 0.4244, + "step": 108050 + }, + { + "epoch": 0.5577311024089237, + "grad_norm": 22607.357421875, + "learning_rate": 5.68468175713503e-05, + "loss": 0.4252, + "step": 108100 + }, + { + "epoch": 0.5579890723915365, + "grad_norm": 21949.1015625, + "learning_rate": 5.681062971961677e-05, + "loss": 0.4194, + "step": 108150 + }, + { + "epoch": 0.5582470423741493, + "grad_norm": 23135.21875, + "learning_rate": 5.677443823250099e-05, + "loss": 0.425, + "step": 108200 + }, + { + "epoch": 0.5585050123567622, + "grad_norm": 19050.34765625, + "learning_rate": 5.673824312932123e-05, + "loss": 0.422, + "step": 108250 + }, + { + "epoch": 0.5587629823393749, + "grad_norm": 22969.15625, + "learning_rate": 5.67020444293978e-05, + "loss": 0.4253, + "step": 108300 + }, + { + "epoch": 0.5590209523219878, + "grad_norm": 22808.203125, + "learning_rate": 5.666584215205282e-05, + "loss": 0.4261, + "step": 108350 + }, + { + "epoch": 0.5592789223046006, + "grad_norm": 23061.126953125, + "learning_rate": 5.662963631661038e-05, + "loss": 0.4248, + "step": 108400 + }, + { + "epoch": 0.5595368922872135, + "grad_norm": 24134.693359375, + "learning_rate": 5.659342694239642e-05, + "loss": 0.4273, + "step": 108450 + }, + { + "epoch": 0.5597948622698263, + "grad_norm": 23659.2578125, + "learning_rate": 5.655721404873886e-05, + "loss": 0.427, + "step": 108500 + }, + { + "epoch": 0.5600528322524391, + "grad_norm": 20205.1953125, + "learning_rate": 5.652099765496741e-05, + "loss": 0.4257, + "step": 108550 + }, + { + "epoch": 0.5603108022350519, + "grad_norm": 21324.837890625, + "learning_rate": 5.6484777780413686e-05, + "loss": 0.4248, + "step": 108600 + }, + { + "epoch": 0.5605687722176648, + "grad_norm": 21779.849609375, + "learning_rate": 5.644855444441114e-05, + "loss": 0.4259, + "step": 108650 + }, + { + "epoch": 0.5608267422002776, + "grad_norm": 20502.0859375, + "learning_rate": 5.641232766629512e-05, + "loss": 0.4281, + "step": 108700 + }, + { + "epoch": 0.5610847121828904, + "grad_norm": 23600.5859375, + "learning_rate": 5.637609746540276e-05, + "loss": 0.4183, + "step": 108750 + }, + { + "epoch": 0.5613426821655032, + "grad_norm": 22977.41015625, + "learning_rate": 5.633986386107302e-05, + "loss": 0.4219, + "step": 108800 + }, + { + "epoch": 0.561600652148116, + "grad_norm": 23411.263671875, + "learning_rate": 5.630362687264672e-05, + "loss": 0.4268, + "step": 108850 + }, + { + "epoch": 0.5618586221307289, + "grad_norm": 20194.060546875, + "learning_rate": 5.6267386519466446e-05, + "loss": 0.4175, + "step": 108900 + }, + { + "epoch": 0.5621165921133416, + "grad_norm": 19387.88671875, + "learning_rate": 5.623114282087664e-05, + "loss": 0.4274, + "step": 108950 + }, + { + "epoch": 0.5623745620959545, + "grad_norm": 23158.28125, + "learning_rate": 5.619489579622343e-05, + "loss": 0.4222, + "step": 109000 + }, + { + "epoch": 0.5626325320785673, + "grad_norm": 23551.431640625, + "learning_rate": 5.6158645464854817e-05, + "loss": 0.428, + "step": 109050 + }, + { + "epoch": 0.5628905020611802, + "grad_norm": 23904.896484375, + "learning_rate": 5.6122391846120495e-05, + "loss": 0.4252, + "step": 109100 + }, + { + "epoch": 0.563148472043793, + "grad_norm": 21354.61328125, + "learning_rate": 5.608613495937197e-05, + "loss": 0.4202, + "step": 109150 + }, + { + "epoch": 0.5634064420264058, + "grad_norm": 23561.978515625, + "learning_rate": 5.6049874823962456e-05, + "loss": 0.4301, + "step": 109200 + }, + { + "epoch": 0.5636644120090186, + "grad_norm": 20979.53515625, + "learning_rate": 5.601361145924692e-05, + "loss": 0.4204, + "step": 109250 + }, + { + "epoch": 0.5639223819916315, + "grad_norm": 24039.125, + "learning_rate": 5.5977344884582e-05, + "loss": 0.4284, + "step": 109300 + }, + { + "epoch": 0.5641803519742443, + "grad_norm": 22242.35546875, + "learning_rate": 5.594107511932615e-05, + "loss": 0.4248, + "step": 109350 + }, + { + "epoch": 0.5644383219568571, + "grad_norm": 20016.1875, + "learning_rate": 5.5904802182839434e-05, + "loss": 0.4222, + "step": 109400 + }, + { + "epoch": 0.5646962919394699, + "grad_norm": 22243.0703125, + "learning_rate": 5.5868526094483666e-05, + "loss": 0.4276, + "step": 109450 + }, + { + "epoch": 0.5649542619220828, + "grad_norm": 23286.38671875, + "learning_rate": 5.58322468736223e-05, + "loss": 0.4208, + "step": 109500 + }, + { + "epoch": 0.5652122319046956, + "grad_norm": 21801.802734375, + "learning_rate": 5.579596453962047e-05, + "loss": 0.4275, + "step": 109550 + }, + { + "epoch": 0.5654702018873083, + "grad_norm": 23282.025390625, + "learning_rate": 5.575967911184502e-05, + "loss": 0.4255, + "step": 109600 + }, + { + "epoch": 0.5657281718699212, + "grad_norm": 25253.943359375, + "learning_rate": 5.572339060966439e-05, + "loss": 0.4239, + "step": 109650 + }, + { + "epoch": 0.565986141852534, + "grad_norm": 22364.595703125, + "learning_rate": 5.5687099052448675e-05, + "loss": 0.4255, + "step": 109700 + }, + { + "epoch": 0.5662441118351469, + "grad_norm": 23305.46484375, + "learning_rate": 5.565080445956961e-05, + "loss": 0.4254, + "step": 109750 + }, + { + "epoch": 0.5665020818177597, + "grad_norm": 20225.2421875, + "learning_rate": 5.561450685040054e-05, + "loss": 0.4239, + "step": 109800 + }, + { + "epoch": 0.5667600518003725, + "grad_norm": 20221.8203125, + "learning_rate": 5.557820624431645e-05, + "loss": 0.4171, + "step": 109850 + }, + { + "epoch": 0.5670180217829853, + "grad_norm": 19833.607421875, + "learning_rate": 5.554190266069387e-05, + "loss": 0.4224, + "step": 109900 + }, + { + "epoch": 0.5672759917655982, + "grad_norm": 19884.58203125, + "learning_rate": 5.550559611891095e-05, + "loss": 0.4196, + "step": 109950 + }, + { + "epoch": 0.567533961748211, + "grad_norm": 22072.25390625, + "learning_rate": 5.546928663834745e-05, + "loss": 0.4196, + "step": 110000 + }, + { + "epoch": 0.567533961748211, + "eval_loss": 0.4103853106498718, + "eval_runtime": 3606.5234, + "eval_samples_per_second": 859.864, + "eval_steps_per_second": 1.679, + "step": 110000 + }, + { + "epoch": 0.5677919317308238, + "grad_norm": 21647.181640625, + "learning_rate": 5.543297423838464e-05, + "loss": 0.414, + "step": 110050 + }, + { + "epoch": 0.5680499017134366, + "grad_norm": 23264.748046875, + "learning_rate": 5.5396658938405396e-05, + "loss": 0.4192, + "step": 110100 + }, + { + "epoch": 0.5683078716960495, + "grad_norm": 21868.10546875, + "learning_rate": 5.536034075779409e-05, + "loss": 0.4222, + "step": 110150 + }, + { + "epoch": 0.5685658416786623, + "grad_norm": 22489.07421875, + "learning_rate": 5.53240197159367e-05, + "loss": 0.4237, + "step": 110200 + }, + { + "epoch": 0.568823811661275, + "grad_norm": 21589.173828125, + "learning_rate": 5.5287695832220674e-05, + "loss": 0.4218, + "step": 110250 + }, + { + "epoch": 0.5690817816438879, + "grad_norm": 23184.103515625, + "learning_rate": 5.525136912603501e-05, + "loss": 0.4203, + "step": 110300 + }, + { + "epoch": 0.5693397516265007, + "grad_norm": 23085.970703125, + "learning_rate": 5.521503961677019e-05, + "loss": 0.4216, + "step": 110350 + }, + { + "epoch": 0.5695977216091136, + "grad_norm": 22217.3671875, + "learning_rate": 5.51787073238182e-05, + "loss": 0.424, + "step": 110400 + }, + { + "epoch": 0.5698556915917263, + "grad_norm": 23515.9375, + "learning_rate": 5.514237226657253e-05, + "loss": 0.4217, + "step": 110450 + }, + { + "epoch": 0.5701136615743392, + "grad_norm": 21375.2734375, + "learning_rate": 5.510603446442812e-05, + "loss": 0.4175, + "step": 110500 + }, + { + "epoch": 0.570371631556952, + "grad_norm": 21658.15625, + "learning_rate": 5.506969393678139e-05, + "loss": 0.4191, + "step": 110550 + }, + { + "epoch": 0.5706296015395649, + "grad_norm": 24653.294921875, + "learning_rate": 5.503335070303018e-05, + "loss": 0.419, + "step": 110600 + }, + { + "epoch": 0.5708875715221777, + "grad_norm": 21722.984375, + "learning_rate": 5.4997004782573855e-05, + "loss": 0.4237, + "step": 110650 + }, + { + "epoch": 0.5711455415047905, + "grad_norm": 20897.595703125, + "learning_rate": 5.496065619481312e-05, + "loss": 0.4211, + "step": 110700 + }, + { + "epoch": 0.5714035114874033, + "grad_norm": 20729.123046875, + "learning_rate": 5.4924304959150175e-05, + "loss": 0.4228, + "step": 110750 + }, + { + "epoch": 0.5716614814700162, + "grad_norm": 22107.888671875, + "learning_rate": 5.488795109498861e-05, + "loss": 0.4222, + "step": 110800 + }, + { + "epoch": 0.571919451452629, + "grad_norm": 24264.587890625, + "learning_rate": 5.485159462173337e-05, + "loss": 0.4232, + "step": 110850 + }, + { + "epoch": 0.5721774214352418, + "grad_norm": 23000.34375, + "learning_rate": 5.481523555879089e-05, + "loss": 0.4236, + "step": 110900 + }, + { + "epoch": 0.5724353914178546, + "grad_norm": 20345.26953125, + "learning_rate": 5.4778873925568905e-05, + "loss": 0.4254, + "step": 110950 + }, + { + "epoch": 0.5726933614004674, + "grad_norm": 25514.09765625, + "learning_rate": 5.4742509741476566e-05, + "loss": 0.4247, + "step": 111000 + }, + { + "epoch": 0.5729513313830803, + "grad_norm": 22510.115234375, + "learning_rate": 5.470614302592434e-05, + "loss": 0.4271, + "step": 111050 + }, + { + "epoch": 0.573209301365693, + "grad_norm": 24683.4921875, + "learning_rate": 5.466977379832411e-05, + "loss": 0.4207, + "step": 111100 + }, + { + "epoch": 0.5734672713483059, + "grad_norm": 22154.2890625, + "learning_rate": 5.4633402078089035e-05, + "loss": 0.422, + "step": 111150 + }, + { + "epoch": 0.5737252413309187, + "grad_norm": 23333.966796875, + "learning_rate": 5.459702788463367e-05, + "loss": 0.4218, + "step": 111200 + }, + { + "epoch": 0.5739832113135316, + "grad_norm": 26566.900390625, + "learning_rate": 5.4560651237373814e-05, + "loss": 0.4269, + "step": 111250 + }, + { + "epoch": 0.5742411812961444, + "grad_norm": 21463.828125, + "learning_rate": 5.452427215572666e-05, + "loss": 0.4196, + "step": 111300 + }, + { + "epoch": 0.5744991512787572, + "grad_norm": 24921.373046875, + "learning_rate": 5.448789065911064e-05, + "loss": 0.4248, + "step": 111350 + }, + { + "epoch": 0.57475712126137, + "grad_norm": 23610.16015625, + "learning_rate": 5.445150676694548e-05, + "loss": 0.4245, + "step": 111400 + }, + { + "epoch": 0.5750150912439829, + "grad_norm": 24598.2109375, + "learning_rate": 5.441512049865221e-05, + "loss": 0.4199, + "step": 111450 + }, + { + "epoch": 0.5752730612265957, + "grad_norm": 24330.02734375, + "learning_rate": 5.43787318736531e-05, + "loss": 0.423, + "step": 111500 + }, + { + "epoch": 0.5755310312092085, + "grad_norm": 23434.587890625, + "learning_rate": 5.434234091137171e-05, + "loss": 0.4214, + "step": 111550 + }, + { + "epoch": 0.5757890011918213, + "grad_norm": 25007.08203125, + "learning_rate": 5.430594763123283e-05, + "loss": 0.4258, + "step": 111600 + }, + { + "epoch": 0.5760469711744342, + "grad_norm": 24568.759765625, + "learning_rate": 5.4269552052662486e-05, + "loss": 0.4248, + "step": 111650 + }, + { + "epoch": 0.576304941157047, + "grad_norm": 22131.74609375, + "learning_rate": 5.423315419508792e-05, + "loss": 0.418, + "step": 111700 + }, + { + "epoch": 0.5765629111396597, + "grad_norm": 22058.443359375, + "learning_rate": 5.4196754077937626e-05, + "loss": 0.4289, + "step": 111750 + }, + { + "epoch": 0.5768208811222726, + "grad_norm": 23790.3203125, + "learning_rate": 5.4160351720641276e-05, + "loss": 0.4217, + "step": 111800 + }, + { + "epoch": 0.5770788511048854, + "grad_norm": 20349.287109375, + "learning_rate": 5.412394714262974e-05, + "loss": 0.4154, + "step": 111850 + }, + { + "epoch": 0.5773368210874983, + "grad_norm": 20262.9296875, + "learning_rate": 5.408754036333506e-05, + "loss": 0.4214, + "step": 111900 + }, + { + "epoch": 0.5775947910701111, + "grad_norm": 21678.17578125, + "learning_rate": 5.4051131402190494e-05, + "loss": 0.4168, + "step": 111950 + }, + { + "epoch": 0.5778527610527239, + "grad_norm": 22649.1875, + "learning_rate": 5.4014720278630415e-05, + "loss": 0.4206, + "step": 112000 + }, + { + "epoch": 0.5781107310353367, + "grad_norm": 22304.1328125, + "learning_rate": 5.39783070120904e-05, + "loss": 0.4199, + "step": 112050 + }, + { + "epoch": 0.5783687010179496, + "grad_norm": 22573.169921875, + "learning_rate": 5.394189162200715e-05, + "loss": 0.4325, + "step": 112100 + }, + { + "epoch": 0.5786266710005624, + "grad_norm": 22942.09765625, + "learning_rate": 5.390547412781847e-05, + "loss": 0.416, + "step": 112150 + }, + { + "epoch": 0.5788846409831752, + "grad_norm": 20210.18359375, + "learning_rate": 5.386905454896333e-05, + "loss": 0.4274, + "step": 112200 + }, + { + "epoch": 0.579142610965788, + "grad_norm": 22916.09375, + "learning_rate": 5.38326329048818e-05, + "loss": 0.4208, + "step": 112250 + }, + { + "epoch": 0.5794005809484009, + "grad_norm": 21563.78125, + "learning_rate": 5.379620921501503e-05, + "loss": 0.4264, + "step": 112300 + }, + { + "epoch": 0.5796585509310137, + "grad_norm": 20984.3671875, + "learning_rate": 5.375978349880528e-05, + "loss": 0.4229, + "step": 112350 + }, + { + "epoch": 0.5799165209136264, + "grad_norm": 22014.6640625, + "learning_rate": 5.372335577569592e-05, + "loss": 0.4205, + "step": 112400 + }, + { + "epoch": 0.5801744908962393, + "grad_norm": 22977.39453125, + "learning_rate": 5.3686926065131325e-05, + "loss": 0.4248, + "step": 112450 + }, + { + "epoch": 0.5804324608788521, + "grad_norm": 22589.521484375, + "learning_rate": 5.365049438655702e-05, + "loss": 0.4165, + "step": 112500 + }, + { + "epoch": 0.580690430861465, + "grad_norm": 24455.625, + "learning_rate": 5.3614060759419474e-05, + "loss": 0.4224, + "step": 112550 + }, + { + "epoch": 0.5809484008440777, + "grad_norm": 24485.833984375, + "learning_rate": 5.357762520316628e-05, + "loss": 0.4264, + "step": 112600 + }, + { + "epoch": 0.5812063708266906, + "grad_norm": 23294.244140625, + "learning_rate": 5.354118773724603e-05, + "loss": 0.4254, + "step": 112650 + }, + { + "epoch": 0.5814643408093034, + "grad_norm": 21813.884765625, + "learning_rate": 5.350474838110835e-05, + "loss": 0.4226, + "step": 112700 + }, + { + "epoch": 0.5817223107919163, + "grad_norm": 23532.0546875, + "learning_rate": 5.3468307154203836e-05, + "loss": 0.4236, + "step": 112750 + }, + { + "epoch": 0.5819802807745291, + "grad_norm": 24739.787109375, + "learning_rate": 5.343186407598413e-05, + "loss": 0.4276, + "step": 112800 + }, + { + "epoch": 0.5822382507571419, + "grad_norm": 23312.783203125, + "learning_rate": 5.339541916590184e-05, + "loss": 0.4281, + "step": 112850 + }, + { + "epoch": 0.5824962207397547, + "grad_norm": 24166.798828125, + "learning_rate": 5.335897244341054e-05, + "loss": 0.4238, + "step": 112900 + }, + { + "epoch": 0.5827541907223676, + "grad_norm": 23690.455078125, + "learning_rate": 5.332252392796478e-05, + "loss": 0.4181, + "step": 112950 + }, + { + "epoch": 0.5830121607049804, + "grad_norm": 23499.16015625, + "learning_rate": 5.32860736390201e-05, + "loss": 0.4143, + "step": 113000 + }, + { + "epoch": 0.5832701306875931, + "grad_norm": 23299.5625, + "learning_rate": 5.324962159603294e-05, + "loss": 0.4198, + "step": 113050 + }, + { + "epoch": 0.583528100670206, + "grad_norm": 22958.423828125, + "learning_rate": 5.321316781846071e-05, + "loss": 0.421, + "step": 113100 + }, + { + "epoch": 0.5837860706528188, + "grad_norm": 20775.119140625, + "learning_rate": 5.3176712325761704e-05, + "loss": 0.4148, + "step": 113150 + }, + { + "epoch": 0.5840440406354317, + "grad_norm": 23139.953125, + "learning_rate": 5.3140255137395155e-05, + "loss": 0.422, + "step": 113200 + }, + { + "epoch": 0.5843020106180444, + "grad_norm": 19829.94140625, + "learning_rate": 5.310379627282125e-05, + "loss": 0.4248, + "step": 113250 + }, + { + "epoch": 0.5845599806006573, + "grad_norm": 20085.572265625, + "learning_rate": 5.306733575150099e-05, + "loss": 0.4183, + "step": 113300 + }, + { + "epoch": 0.5848179505832701, + "grad_norm": 23206.27734375, + "learning_rate": 5.303087359289629e-05, + "loss": 0.423, + "step": 113350 + }, + { + "epoch": 0.585075920565883, + "grad_norm": 25039.34765625, + "learning_rate": 5.299440981646996e-05, + "loss": 0.4232, + "step": 113400 + }, + { + "epoch": 0.5853338905484958, + "grad_norm": 21276.865234375, + "learning_rate": 5.2957944441685646e-05, + "loss": 0.4205, + "step": 113450 + }, + { + "epoch": 0.5855918605311086, + "grad_norm": 22706.712890625, + "learning_rate": 5.292147748800788e-05, + "loss": 0.4225, + "step": 113500 + }, + { + "epoch": 0.5858498305137214, + "grad_norm": 18046.537109375, + "learning_rate": 5.2885008974902004e-05, + "loss": 0.4195, + "step": 113550 + }, + { + "epoch": 0.5861078004963343, + "grad_norm": 22363.5625, + "learning_rate": 5.28485389218342e-05, + "loss": 0.4149, + "step": 113600 + }, + { + "epoch": 0.5863657704789471, + "grad_norm": 24409.609375, + "learning_rate": 5.2812067348271466e-05, + "loss": 0.4224, + "step": 113650 + }, + { + "epoch": 0.5866237404615599, + "grad_norm": 23921.68359375, + "learning_rate": 5.277559427368164e-05, + "loss": 0.4274, + "step": 113700 + }, + { + "epoch": 0.5868817104441727, + "grad_norm": 23887.84765625, + "learning_rate": 5.273911971753335e-05, + "loss": 0.4185, + "step": 113750 + }, + { + "epoch": 0.5871396804267855, + "grad_norm": 23169.423828125, + "learning_rate": 5.270264369929597e-05, + "loss": 0.4218, + "step": 113800 + }, + { + "epoch": 0.5873976504093984, + "grad_norm": 23339.57421875, + "learning_rate": 5.266616623843972e-05, + "loss": 0.4211, + "step": 113850 + }, + { + "epoch": 0.5876556203920111, + "grad_norm": 22072.59765625, + "learning_rate": 5.2629687354435576e-05, + "loss": 0.4191, + "step": 113900 + }, + { + "epoch": 0.587913590374624, + "grad_norm": 24308.357421875, + "learning_rate": 5.259320706675523e-05, + "loss": 0.4168, + "step": 113950 + }, + { + "epoch": 0.5881715603572368, + "grad_norm": 20896.5703125, + "learning_rate": 5.255672539487119e-05, + "loss": 0.4201, + "step": 114000 + }, + { + "epoch": 0.5884295303398497, + "grad_norm": 20070.814453125, + "learning_rate": 5.252024235825661e-05, + "loss": 0.4216, + "step": 114050 + }, + { + "epoch": 0.5886875003224625, + "grad_norm": 24864.869140625, + "learning_rate": 5.2483757976385486e-05, + "loss": 0.4269, + "step": 114100 + }, + { + "epoch": 0.5889454703050753, + "grad_norm": 24734.0234375, + "learning_rate": 5.2447272268732436e-05, + "loss": 0.4196, + "step": 114150 + }, + { + "epoch": 0.5892034402876881, + "grad_norm": 22383.212890625, + "learning_rate": 5.2410785254772856e-05, + "loss": 0.4171, + "step": 114200 + }, + { + "epoch": 0.589461410270301, + "grad_norm": 22587.44921875, + "learning_rate": 5.237429695398276e-05, + "loss": 0.4232, + "step": 114250 + }, + { + "epoch": 0.5897193802529138, + "grad_norm": 23357.263671875, + "learning_rate": 5.2337807385838935e-05, + "loss": 0.4241, + "step": 114300 + }, + { + "epoch": 0.5899773502355266, + "grad_norm": 24632.125, + "learning_rate": 5.23013165698188e-05, + "loss": 0.4154, + "step": 114350 + }, + { + "epoch": 0.5902353202181394, + "grad_norm": 23191.818359375, + "learning_rate": 5.226482452540045e-05, + "loss": 0.424, + "step": 114400 + }, + { + "epoch": 0.5904932902007523, + "grad_norm": 23649.560546875, + "learning_rate": 5.2228331272062626e-05, + "loss": 0.427, + "step": 114450 + }, + { + "epoch": 0.5907512601833651, + "grad_norm": 23992.169921875, + "learning_rate": 5.21918368292847e-05, + "loss": 0.4267, + "step": 114500 + }, + { + "epoch": 0.5910092301659778, + "grad_norm": 21792.041015625, + "learning_rate": 5.215534121654673e-05, + "loss": 0.4272, + "step": 114550 + }, + { + "epoch": 0.5912672001485907, + "grad_norm": 25516.345703125, + "learning_rate": 5.211884445332935e-05, + "loss": 0.4207, + "step": 114600 + }, + { + "epoch": 0.5915251701312035, + "grad_norm": 22557.25390625, + "learning_rate": 5.208234655911384e-05, + "loss": 0.4228, + "step": 114650 + }, + { + "epoch": 0.5917831401138164, + "grad_norm": 24185.09765625, + "learning_rate": 5.2045847553382045e-05, + "loss": 0.4226, + "step": 114700 + }, + { + "epoch": 0.5920411100964291, + "grad_norm": 20565.134765625, + "learning_rate": 5.200934745561643e-05, + "loss": 0.4274, + "step": 114750 + }, + { + "epoch": 0.592299080079042, + "grad_norm": 24019.0, + "learning_rate": 5.197284628530007e-05, + "loss": 0.4234, + "step": 114800 + }, + { + "epoch": 0.5925570500616548, + "grad_norm": 26129.01171875, + "learning_rate": 5.193634406191658e-05, + "loss": 0.418, + "step": 114850 + }, + { + "epoch": 0.5928150200442677, + "grad_norm": 25187.611328125, + "learning_rate": 5.18998408049501e-05, + "loss": 0.4213, + "step": 114900 + }, + { + "epoch": 0.5930729900268805, + "grad_norm": 20145.14453125, + "learning_rate": 5.186333653388539e-05, + "loss": 0.418, + "step": 114950 + }, + { + "epoch": 0.5933309600094933, + "grad_norm": 22472.322265625, + "learning_rate": 5.182683126820773e-05, + "loss": 0.4209, + "step": 115000 + }, + { + "epoch": 0.5933309600094933, + "eval_loss": 0.4084908068180084, + "eval_runtime": 3582.6916, + "eval_samples_per_second": 865.584, + "eval_steps_per_second": 1.691, + "step": 115000 + }, + { + "epoch": 0.5935889299921061, + "grad_norm": 22404.052734375, + "learning_rate": 5.179032502740291e-05, + "loss": 0.4147, + "step": 115050 + }, + { + "epoch": 0.593846899974719, + "grad_norm": 21242.705078125, + "learning_rate": 5.175381783095725e-05, + "loss": 0.4237, + "step": 115100 + }, + { + "epoch": 0.5941048699573318, + "grad_norm": 22416.06640625, + "learning_rate": 5.171730969835758e-05, + "loss": 0.4185, + "step": 115150 + }, + { + "epoch": 0.5943628399399445, + "grad_norm": 22231.525390625, + "learning_rate": 5.1680800649091276e-05, + "loss": 0.4227, + "step": 115200 + }, + { + "epoch": 0.5946208099225574, + "grad_norm": 22431.462890625, + "learning_rate": 5.164429070264613e-05, + "loss": 0.4225, + "step": 115250 + }, + { + "epoch": 0.5948787799051702, + "grad_norm": 26008.57421875, + "learning_rate": 5.160777987851044e-05, + "loss": 0.4253, + "step": 115300 + }, + { + "epoch": 0.5951367498877831, + "grad_norm": 22555.501953125, + "learning_rate": 5.157126819617297e-05, + "loss": 0.4181, + "step": 115350 + }, + { + "epoch": 0.5953947198703958, + "grad_norm": 25113.587890625, + "learning_rate": 5.153475567512298e-05, + "loss": 0.4261, + "step": 115400 + }, + { + "epoch": 0.5956526898530087, + "grad_norm": 22877.908203125, + "learning_rate": 5.149824233485013e-05, + "loss": 0.4177, + "step": 115450 + }, + { + "epoch": 0.5959106598356215, + "grad_norm": 22468.34375, + "learning_rate": 5.1461728194844526e-05, + "loss": 0.4223, + "step": 115500 + }, + { + "epoch": 0.5961686298182344, + "grad_norm": 24525.326171875, + "learning_rate": 5.142521327459672e-05, + "loss": 0.4159, + "step": 115550 + }, + { + "epoch": 0.5964265998008472, + "grad_norm": 23334.296875, + "learning_rate": 5.1388697593597643e-05, + "loss": 0.4206, + "step": 115600 + }, + { + "epoch": 0.59668456978346, + "grad_norm": 21743.333984375, + "learning_rate": 5.1352181171338706e-05, + "loss": 0.4191, + "step": 115650 + }, + { + "epoch": 0.5969425397660728, + "grad_norm": 26287.66796875, + "learning_rate": 5.131566402731165e-05, + "loss": 0.4147, + "step": 115700 + }, + { + "epoch": 0.5972005097486857, + "grad_norm": 20856.890625, + "learning_rate": 5.12791461810086e-05, + "loss": 0.4248, + "step": 115750 + }, + { + "epoch": 0.5974584797312985, + "grad_norm": 22821.73046875, + "learning_rate": 5.124262765192208e-05, + "loss": 0.4239, + "step": 115800 + }, + { + "epoch": 0.5977164497139112, + "grad_norm": 24805.427734375, + "learning_rate": 5.1206108459545e-05, + "loss": 0.4172, + "step": 115850 + }, + { + "epoch": 0.5979744196965241, + "grad_norm": 25195.064453125, + "learning_rate": 5.116958862337057e-05, + "loss": 0.4242, + "step": 115900 + }, + { + "epoch": 0.5982323896791369, + "grad_norm": 22029.236328125, + "learning_rate": 5.1133068162892383e-05, + "loss": 0.4217, + "step": 115950 + }, + { + "epoch": 0.5984903596617498, + "grad_norm": 23296.77734375, + "learning_rate": 5.109654709760434e-05, + "loss": 0.4223, + "step": 116000 + }, + { + "epoch": 0.5987483296443625, + "grad_norm": 23822.447265625, + "learning_rate": 5.106002544700069e-05, + "loss": 0.4235, + "step": 116050 + }, + { + "epoch": 0.5990062996269754, + "grad_norm": 21188.46875, + "learning_rate": 5.1023503230576e-05, + "loss": 0.4275, + "step": 116100 + }, + { + "epoch": 0.5992642696095882, + "grad_norm": 24459.021484375, + "learning_rate": 5.0986980467825096e-05, + "loss": 0.4217, + "step": 116150 + }, + { + "epoch": 0.5995222395922011, + "grad_norm": 22304.396484375, + "learning_rate": 5.095045717824313e-05, + "loss": 0.42, + "step": 116200 + }, + { + "epoch": 0.5997802095748139, + "grad_norm": 20124.943359375, + "learning_rate": 5.0913933381325516e-05, + "loss": 0.4149, + "step": 116250 + }, + { + "epoch": 0.6000381795574267, + "grad_norm": 22610.3046875, + "learning_rate": 5.087740909656798e-05, + "loss": 0.4164, + "step": 116300 + }, + { + "epoch": 0.6002961495400395, + "grad_norm": 22058.974609375, + "learning_rate": 5.084088434346645e-05, + "loss": 0.4211, + "step": 116350 + }, + { + "epoch": 0.6005541195226524, + "grad_norm": 23463.626953125, + "learning_rate": 5.0804359141517134e-05, + "loss": 0.4182, + "step": 116400 + }, + { + "epoch": 0.6008120895052652, + "grad_norm": 25045.67578125, + "learning_rate": 5.076783351021648e-05, + "loss": 0.4202, + "step": 116450 + }, + { + "epoch": 0.601070059487878, + "grad_norm": 22583.076171875, + "learning_rate": 5.0731307469061184e-05, + "loss": 0.4177, + "step": 116500 + }, + { + "epoch": 0.6013280294704908, + "grad_norm": 26350.400390625, + "learning_rate": 5.069478103754811e-05, + "loss": 0.4193, + "step": 116550 + }, + { + "epoch": 0.6015859994531036, + "grad_norm": 21430.255859375, + "learning_rate": 5.0658254235174385e-05, + "loss": 0.422, + "step": 116600 + }, + { + "epoch": 0.6018439694357165, + "grad_norm": 21467.482421875, + "learning_rate": 5.0621727081437275e-05, + "loss": 0.4157, + "step": 116650 + }, + { + "epoch": 0.6021019394183292, + "grad_norm": 25780.095703125, + "learning_rate": 5.05851995958343e-05, + "loss": 0.4243, + "step": 116700 + }, + { + "epoch": 0.6023599094009421, + "grad_norm": 21074.52734375, + "learning_rate": 5.0548671797863125e-05, + "loss": 0.4271, + "step": 116750 + }, + { + "epoch": 0.6026178793835549, + "grad_norm": 25752.841796875, + "learning_rate": 5.051214370702155e-05, + "loss": 0.4209, + "step": 116800 + }, + { + "epoch": 0.6028758493661678, + "grad_norm": 23178.93359375, + "learning_rate": 5.047561534280758e-05, + "loss": 0.4232, + "step": 116850 + }, + { + "epoch": 0.6031338193487806, + "grad_norm": 23263.65625, + "learning_rate": 5.0439086724719355e-05, + "loss": 0.4196, + "step": 116900 + }, + { + "epoch": 0.6033917893313934, + "grad_norm": 20372.861328125, + "learning_rate": 5.040255787225514e-05, + "loss": 0.4194, + "step": 116950 + }, + { + "epoch": 0.6036497593140062, + "grad_norm": 23453.59375, + "learning_rate": 5.036602880491332e-05, + "loss": 0.4156, + "step": 117000 + }, + { + "epoch": 0.6039077292966191, + "grad_norm": 24039.392578125, + "learning_rate": 5.032949954219243e-05, + "loss": 0.4192, + "step": 117050 + }, + { + "epoch": 0.6041656992792319, + "grad_norm": 24246.55859375, + "learning_rate": 5.0292970103591044e-05, + "loss": 0.426, + "step": 117100 + }, + { + "epoch": 0.6044236692618447, + "grad_norm": 23403.130859375, + "learning_rate": 5.0256440508607904e-05, + "loss": 0.4195, + "step": 117150 + }, + { + "epoch": 0.6046816392444575, + "grad_norm": 21872.07421875, + "learning_rate": 5.021991077674179e-05, + "loss": 0.4214, + "step": 117200 + }, + { + "epoch": 0.6049396092270704, + "grad_norm": 22344.455078125, + "learning_rate": 5.018338092749155e-05, + "loss": 0.4205, + "step": 117250 + }, + { + "epoch": 0.6051975792096832, + "grad_norm": 22999.099609375, + "learning_rate": 5.014685098035612e-05, + "loss": 0.4203, + "step": 117300 + }, + { + "epoch": 0.6054555491922959, + "grad_norm": 21572.994140625, + "learning_rate": 5.011032095483448e-05, + "loss": 0.4279, + "step": 117350 + }, + { + "epoch": 0.6057135191749088, + "grad_norm": 21263.11328125, + "learning_rate": 5.007379087042566e-05, + "loss": 0.418, + "step": 117400 + }, + { + "epoch": 0.6059714891575216, + "grad_norm": 22789.671875, + "learning_rate": 5.00372607466287e-05, + "loss": 0.4196, + "step": 117450 + }, + { + "epoch": 0.6062294591401345, + "grad_norm": 21276.09765625, + "learning_rate": 5.000073060294267e-05, + "loss": 0.4125, + "step": 117500 + }, + { + "epoch": 0.6064874291227472, + "grad_norm": 22501.169921875, + "learning_rate": 4.9964200458866654e-05, + "loss": 0.4152, + "step": 117550 + }, + { + "epoch": 0.6067453991053601, + "grad_norm": 21645.912109375, + "learning_rate": 4.992767033389976e-05, + "loss": 0.4253, + "step": 117600 + }, + { + "epoch": 0.6070033690879729, + "grad_norm": 21256.7109375, + "learning_rate": 4.9891140247541025e-05, + "loss": 0.4214, + "step": 117650 + }, + { + "epoch": 0.6072613390705858, + "grad_norm": 22883.98046875, + "learning_rate": 4.985461021928952e-05, + "loss": 0.4238, + "step": 117700 + }, + { + "epoch": 0.6075193090531986, + "grad_norm": 21366.412109375, + "learning_rate": 4.981808026864426e-05, + "loss": 0.4225, + "step": 117750 + }, + { + "epoch": 0.6077772790358114, + "grad_norm": 24185.3515625, + "learning_rate": 4.978155041510425e-05, + "loss": 0.4196, + "step": 117800 + }, + { + "epoch": 0.6080352490184242, + "grad_norm": 21638.009765625, + "learning_rate": 4.974502067816838e-05, + "loss": 0.4221, + "step": 117850 + }, + { + "epoch": 0.608293219001037, + "grad_norm": 20867.111328125, + "learning_rate": 4.970849107733554e-05, + "loss": 0.4225, + "step": 117900 + }, + { + "epoch": 0.6085511889836499, + "grad_norm": 21785.69140625, + "learning_rate": 4.967196163210451e-05, + "loss": 0.4166, + "step": 117950 + }, + { + "epoch": 0.6088091589662626, + "grad_norm": 24691.8515625, + "learning_rate": 4.963543236197401e-05, + "loss": 0.4226, + "step": 118000 + }, + { + "epoch": 0.6090671289488755, + "grad_norm": 21214.1484375, + "learning_rate": 4.9598903286442675e-05, + "loss": 0.418, + "step": 118050 + }, + { + "epoch": 0.6093250989314883, + "grad_norm": 22802.849609375, + "learning_rate": 4.956237442500898e-05, + "loss": 0.4227, + "step": 118100 + }, + { + "epoch": 0.6095830689141012, + "grad_norm": 25204.90625, + "learning_rate": 4.952584579717135e-05, + "loss": 0.4152, + "step": 118150 + }, + { + "epoch": 0.6098410388967139, + "grad_norm": 21970.19140625, + "learning_rate": 4.9489317422428044e-05, + "loss": 0.4197, + "step": 118200 + }, + { + "epoch": 0.6100990088793268, + "grad_norm": 22331.013671875, + "learning_rate": 4.945278932027723e-05, + "loss": 0.4161, + "step": 118250 + }, + { + "epoch": 0.6103569788619396, + "grad_norm": 27234.177734375, + "learning_rate": 4.941626151021686e-05, + "loss": 0.4204, + "step": 118300 + }, + { + "epoch": 0.6106149488445525, + "grad_norm": 22253.0078125, + "learning_rate": 4.937973401174481e-05, + "loss": 0.4202, + "step": 118350 + }, + { + "epoch": 0.6108729188271653, + "grad_norm": 20930.27734375, + "learning_rate": 4.934320684435871e-05, + "loss": 0.4169, + "step": 118400 + }, + { + "epoch": 0.6111308888097781, + "grad_norm": 22569.205078125, + "learning_rate": 4.930668002755609e-05, + "loss": 0.4177, + "step": 118450 + }, + { + "epoch": 0.6113888587923909, + "grad_norm": 23197.943359375, + "learning_rate": 4.9270153580834256e-05, + "loss": 0.414, + "step": 118500 + }, + { + "epoch": 0.6116468287750038, + "grad_norm": 21927.1875, + "learning_rate": 4.923362752369029e-05, + "loss": 0.4203, + "step": 118550 + }, + { + "epoch": 0.6119047987576166, + "grad_norm": 23422.181640625, + "learning_rate": 4.919710187562112e-05, + "loss": 0.4213, + "step": 118600 + }, + { + "epoch": 0.6121627687402293, + "grad_norm": 23351.67578125, + "learning_rate": 4.9160576656123416e-05, + "loss": 0.4213, + "step": 118650 + }, + { + "epoch": 0.6124207387228422, + "grad_norm": 21228.416015625, + "learning_rate": 4.9124051884693664e-05, + "loss": 0.4192, + "step": 118700 + }, + { + "epoch": 0.612678708705455, + "grad_norm": 22555.9609375, + "learning_rate": 4.908752758082802e-05, + "loss": 0.4189, + "step": 118750 + }, + { + "epoch": 0.6129366786880679, + "grad_norm": 21010.859375, + "learning_rate": 4.905100376402251e-05, + "loss": 0.4194, + "step": 118800 + }, + { + "epoch": 0.6131946486706806, + "grad_norm": 23468.78515625, + "learning_rate": 4.901448045377279e-05, + "loss": 0.4151, + "step": 118850 + }, + { + "epoch": 0.6134526186532935, + "grad_norm": 23818.189453125, + "learning_rate": 4.8977957669574334e-05, + "loss": 0.4184, + "step": 118900 + }, + { + "epoch": 0.6137105886359063, + "grad_norm": 22162.76171875, + "learning_rate": 4.8941435430922294e-05, + "loss": 0.4181, + "step": 118950 + }, + { + "epoch": 0.6139685586185192, + "grad_norm": 22983.45703125, + "learning_rate": 4.8904913757311506e-05, + "loss": 0.4196, + "step": 119000 + }, + { + "epoch": 0.614226528601132, + "grad_norm": 22748.150390625, + "learning_rate": 4.886839266823656e-05, + "loss": 0.4195, + "step": 119050 + }, + { + "epoch": 0.6144844985837448, + "grad_norm": 23146.306640625, + "learning_rate": 4.8831872183191684e-05, + "loss": 0.4219, + "step": 119100 + }, + { + "epoch": 0.6147424685663576, + "grad_norm": 24951.591796875, + "learning_rate": 4.879535232167084e-05, + "loss": 0.4165, + "step": 119150 + }, + { + "epoch": 0.6150004385489705, + "grad_norm": 24381.689453125, + "learning_rate": 4.875883310316758e-05, + "loss": 0.4179, + "step": 119200 + }, + { + "epoch": 0.6152584085315833, + "grad_norm": 21191.4609375, + "learning_rate": 4.872231454717518e-05, + "loss": 0.4155, + "step": 119250 + }, + { + "epoch": 0.615516378514196, + "grad_norm": 21586.84375, + "learning_rate": 4.8685796673186526e-05, + "loss": 0.412, + "step": 119300 + }, + { + "epoch": 0.6157743484968089, + "grad_norm": 20381.505859375, + "learning_rate": 4.864927950069416e-05, + "loss": 0.4171, + "step": 119350 + }, + { + "epoch": 0.6160323184794217, + "grad_norm": 23258.296875, + "learning_rate": 4.861276304919026e-05, + "loss": 0.418, + "step": 119400 + }, + { + "epoch": 0.6162902884620346, + "grad_norm": 23629.14453125, + "learning_rate": 4.857624733816657e-05, + "loss": 0.4221, + "step": 119450 + }, + { + "epoch": 0.6165482584446473, + "grad_norm": 22892.7734375, + "learning_rate": 4.853973238711449e-05, + "loss": 0.4278, + "step": 119500 + }, + { + "epoch": 0.6168062284272602, + "grad_norm": 21639.669921875, + "learning_rate": 4.850321821552497e-05, + "loss": 0.4224, + "step": 119550 + }, + { + "epoch": 0.617064198409873, + "grad_norm": 21392.951171875, + "learning_rate": 4.84667048428886e-05, + "loss": 0.4192, + "step": 119600 + }, + { + "epoch": 0.6173221683924859, + "grad_norm": 22603.51953125, + "learning_rate": 4.843019228869548e-05, + "loss": 0.4169, + "step": 119650 + }, + { + "epoch": 0.6175801383750986, + "grad_norm": 22470.62109375, + "learning_rate": 4.8393680572435324e-05, + "loss": 0.4175, + "step": 119700 + }, + { + "epoch": 0.6178381083577115, + "grad_norm": 26185.634765625, + "learning_rate": 4.835716971359737e-05, + "loss": 0.4208, + "step": 119750 + }, + { + "epoch": 0.6180960783403243, + "grad_norm": 21508.12109375, + "learning_rate": 4.832065973167041e-05, + "loss": 0.4194, + "step": 119800 + }, + { + "epoch": 0.6183540483229372, + "grad_norm": 20717.205078125, + "learning_rate": 4.8284150646142784e-05, + "loss": 0.424, + "step": 119850 + }, + { + "epoch": 0.61861201830555, + "grad_norm": 20015.30078125, + "learning_rate": 4.8247642476502284e-05, + "loss": 0.4189, + "step": 119900 + }, + { + "epoch": 0.6188699882881628, + "grad_norm": 21596.349609375, + "learning_rate": 4.821113524223634e-05, + "loss": 0.4218, + "step": 119950 + }, + { + "epoch": 0.6191279582707756, + "grad_norm": 22051.921875, + "learning_rate": 4.817462896283173e-05, + "loss": 0.4184, + "step": 120000 + }, + { + "epoch": 0.6191279582707756, + "eval_loss": 0.40621376037597656, + "eval_runtime": 3588.5932, + "eval_samples_per_second": 864.16, + "eval_steps_per_second": 1.688, + "step": 120000 + }, + { + "epoch": 0.6193859282533885, + "grad_norm": 22562.478515625, + "learning_rate": 4.813812365777486e-05, + "loss": 0.4171, + "step": 120050 + }, + { + "epoch": 0.6196438982360013, + "grad_norm": 22531.505859375, + "learning_rate": 4.81016193465515e-05, + "loss": 0.4171, + "step": 120100 + }, + { + "epoch": 0.619901868218614, + "grad_norm": 21869.177734375, + "learning_rate": 4.8065116048647005e-05, + "loss": 0.4184, + "step": 120150 + }, + { + "epoch": 0.6201598382012269, + "grad_norm": 23087.56640625, + "learning_rate": 4.802861378354607e-05, + "loss": 0.4177, + "step": 120200 + }, + { + "epoch": 0.6204178081838397, + "grad_norm": 22546.060546875, + "learning_rate": 4.7992112570732925e-05, + "loss": 0.4213, + "step": 120250 + }, + { + "epoch": 0.6206757781664526, + "grad_norm": 22802.8984375, + "learning_rate": 4.795561242969122e-05, + "loss": 0.4218, + "step": 120300 + }, + { + "epoch": 0.6209337481490653, + "grad_norm": 19467.32421875, + "learning_rate": 4.791911337990401e-05, + "loss": 0.4141, + "step": 120350 + }, + { + "epoch": 0.6211917181316782, + "grad_norm": 25076.169921875, + "learning_rate": 4.78826154408538e-05, + "loss": 0.4178, + "step": 120400 + }, + { + "epoch": 0.621449688114291, + "grad_norm": 20815.935546875, + "learning_rate": 4.784611863202244e-05, + "loss": 0.4217, + "step": 120450 + }, + { + "epoch": 0.6217076580969039, + "grad_norm": 21686.271484375, + "learning_rate": 4.780962297289126e-05, + "loss": 0.4124, + "step": 120500 + }, + { + "epoch": 0.6219656280795167, + "grad_norm": 22759.310546875, + "learning_rate": 4.777312848294092e-05, + "loss": 0.4159, + "step": 120550 + }, + { + "epoch": 0.6222235980621295, + "grad_norm": 25325.75390625, + "learning_rate": 4.773663518165148e-05, + "loss": 0.4176, + "step": 120600 + }, + { + "epoch": 0.6224815680447423, + "grad_norm": 23474.958984375, + "learning_rate": 4.7700143088502344e-05, + "loss": 0.4143, + "step": 120650 + }, + { + "epoch": 0.6227395380273552, + "grad_norm": 25355.40625, + "learning_rate": 4.766365222297229e-05, + "loss": 0.4262, + "step": 120700 + }, + { + "epoch": 0.622997508009968, + "grad_norm": 22215.14453125, + "learning_rate": 4.762716260453945e-05, + "loss": 0.4149, + "step": 120750 + }, + { + "epoch": 0.6232554779925807, + "grad_norm": 23521.607421875, + "learning_rate": 4.759067425268126e-05, + "loss": 0.4223, + "step": 120800 + }, + { + "epoch": 0.6235134479751936, + "grad_norm": 24524.02734375, + "learning_rate": 4.7554187186874513e-05, + "loss": 0.4256, + "step": 120850 + }, + { + "epoch": 0.6237714179578064, + "grad_norm": 19954.322265625, + "learning_rate": 4.7517701426595266e-05, + "loss": 0.4119, + "step": 120900 + }, + { + "epoch": 0.6240293879404193, + "grad_norm": 21612.1953125, + "learning_rate": 4.748121699131893e-05, + "loss": 0.4196, + "step": 120950 + }, + { + "epoch": 0.624287357923032, + "grad_norm": 20466.0, + "learning_rate": 4.744473390052019e-05, + "loss": 0.4181, + "step": 121000 + }, + { + "epoch": 0.6245453279056449, + "grad_norm": 19992.173828125, + "learning_rate": 4.740825217367304e-05, + "loss": 0.4159, + "step": 121050 + }, + { + "epoch": 0.6248032978882577, + "grad_norm": 21553.1328125, + "learning_rate": 4.737177183025067e-05, + "loss": 0.4157, + "step": 121100 + }, + { + "epoch": 0.6250612678708706, + "grad_norm": 22242.568359375, + "learning_rate": 4.73352928897256e-05, + "loss": 0.4153, + "step": 121150 + }, + { + "epoch": 0.6253192378534834, + "grad_norm": 23883.212890625, + "learning_rate": 4.7298815371569606e-05, + "loss": 0.4173, + "step": 121200 + }, + { + "epoch": 0.6255772078360962, + "grad_norm": 22386.505859375, + "learning_rate": 4.7262339295253645e-05, + "loss": 0.4178, + "step": 121250 + }, + { + "epoch": 0.625835177818709, + "grad_norm": 22051.859375, + "learning_rate": 4.722586468024797e-05, + "loss": 0.4111, + "step": 121300 + }, + { + "epoch": 0.6260931478013219, + "grad_norm": 21374.4765625, + "learning_rate": 4.7189391546021996e-05, + "loss": 0.418, + "step": 121350 + }, + { + "epoch": 0.6263511177839347, + "grad_norm": 22240.453125, + "learning_rate": 4.7152919912044406e-05, + "loss": 0.4196, + "step": 121400 + }, + { + "epoch": 0.6266090877665474, + "grad_norm": 26278.798828125, + "learning_rate": 4.711644979778302e-05, + "loss": 0.4165, + "step": 121450 + }, + { + "epoch": 0.6268670577491603, + "grad_norm": 22151.77734375, + "learning_rate": 4.707998122270492e-05, + "loss": 0.422, + "step": 121500 + }, + { + "epoch": 0.6271250277317731, + "grad_norm": 21278.99609375, + "learning_rate": 4.7043514206276276e-05, + "loss": 0.4202, + "step": 121550 + }, + { + "epoch": 0.627382997714386, + "grad_norm": 24062.6015625, + "learning_rate": 4.70070487679625e-05, + "loss": 0.4174, + "step": 121600 + }, + { + "epoch": 0.6276409676969987, + "grad_norm": 21124.400390625, + "learning_rate": 4.697058492722815e-05, + "loss": 0.4156, + "step": 121650 + }, + { + "epoch": 0.6278989376796116, + "grad_norm": 22513.48046875, + "learning_rate": 4.6934122703536894e-05, + "loss": 0.4198, + "step": 121700 + }, + { + "epoch": 0.6281569076622244, + "grad_norm": 24250.720703125, + "learning_rate": 4.689766211635159e-05, + "loss": 0.4197, + "step": 121750 + }, + { + "epoch": 0.6284148776448373, + "grad_norm": 23831.220703125, + "learning_rate": 4.686120318513415e-05, + "loss": 0.415, + "step": 121800 + }, + { + "epoch": 0.62867284762745, + "grad_norm": 24005.458984375, + "learning_rate": 4.682474592934569e-05, + "loss": 0.4154, + "step": 121850 + }, + { + "epoch": 0.6289308176100629, + "grad_norm": 21365.09375, + "learning_rate": 4.6788290368446355e-05, + "loss": 0.4164, + "step": 121900 + }, + { + "epoch": 0.6291887875926757, + "grad_norm": 23601.689453125, + "learning_rate": 4.675183652189545e-05, + "loss": 0.418, + "step": 121950 + }, + { + "epoch": 0.6294467575752886, + "grad_norm": 21023.33203125, + "learning_rate": 4.671538440915129e-05, + "loss": 0.4181, + "step": 122000 + }, + { + "epoch": 0.6297047275579014, + "grad_norm": 22292.671875, + "learning_rate": 4.667893404967133e-05, + "loss": 0.4203, + "step": 122050 + }, + { + "epoch": 0.6299626975405141, + "grad_norm": 21975.3671875, + "learning_rate": 4.664248546291207e-05, + "loss": 0.4162, + "step": 122100 + }, + { + "epoch": 0.630220667523127, + "grad_norm": 22591.34765625, + "learning_rate": 4.660603866832906e-05, + "loss": 0.4146, + "step": 122150 + }, + { + "epoch": 0.6304786375057398, + "grad_norm": 23449.529296875, + "learning_rate": 4.6569593685376895e-05, + "loss": 0.4205, + "step": 122200 + }, + { + "epoch": 0.6307366074883527, + "grad_norm": 21614.046875, + "learning_rate": 4.653315053350918e-05, + "loss": 0.4173, + "step": 122250 + }, + { + "epoch": 0.6309945774709654, + "grad_norm": 26004.5859375, + "learning_rate": 4.649670923217859e-05, + "loss": 0.4137, + "step": 122300 + }, + { + "epoch": 0.6312525474535783, + "grad_norm": 23640.357421875, + "learning_rate": 4.646026980083676e-05, + "loss": 0.4165, + "step": 122350 + }, + { + "epoch": 0.6315105174361911, + "grad_norm": 23575.3984375, + "learning_rate": 4.6423832258934396e-05, + "loss": 0.4179, + "step": 122400 + }, + { + "epoch": 0.631768487418804, + "grad_norm": 26795.05078125, + "learning_rate": 4.63873966259211e-05, + "loss": 0.4171, + "step": 122450 + }, + { + "epoch": 0.6320264574014167, + "grad_norm": 22246.931640625, + "learning_rate": 4.6350962921245515e-05, + "loss": 0.4188, + "step": 122500 + }, + { + "epoch": 0.6322844273840296, + "grad_norm": 22268.3359375, + "learning_rate": 4.63145311643553e-05, + "loss": 0.4141, + "step": 122550 + }, + { + "epoch": 0.6325423973666424, + "grad_norm": 23749.38671875, + "learning_rate": 4.627810137469696e-05, + "loss": 0.4132, + "step": 122600 + }, + { + "epoch": 0.6328003673492553, + "grad_norm": 22449.15625, + "learning_rate": 4.624167357171606e-05, + "loss": 0.4177, + "step": 122650 + }, + { + "epoch": 0.6330583373318681, + "grad_norm": 22132.927734375, + "learning_rate": 4.6205247774857e-05, + "loss": 0.4211, + "step": 122700 + }, + { + "epoch": 0.6333163073144809, + "grad_norm": 20199.654296875, + "learning_rate": 4.616882400356323e-05, + "loss": 0.4127, + "step": 122750 + }, + { + "epoch": 0.6335742772970937, + "grad_norm": 23172.29296875, + "learning_rate": 4.613240227727699e-05, + "loss": 0.4173, + "step": 122800 + }, + { + "epoch": 0.6338322472797066, + "grad_norm": 23373.6640625, + "learning_rate": 4.609598261543954e-05, + "loss": 0.4139, + "step": 122850 + }, + { + "epoch": 0.6340902172623194, + "grad_norm": 22187.794921875, + "learning_rate": 4.6059565037490965e-05, + "loss": 0.4233, + "step": 122900 + }, + { + "epoch": 0.6343481872449321, + "grad_norm": 21762.28515625, + "learning_rate": 4.602314956287027e-05, + "loss": 0.4195, + "step": 122950 + }, + { + "epoch": 0.634606157227545, + "grad_norm": 24228.3125, + "learning_rate": 4.598673621101535e-05, + "loss": 0.4218, + "step": 123000 + }, + { + "epoch": 0.6348641272101578, + "grad_norm": 20360.208984375, + "learning_rate": 4.595032500136291e-05, + "loss": 0.4266, + "step": 123050 + }, + { + "epoch": 0.6351220971927707, + "grad_norm": 22763.712890625, + "learning_rate": 4.5913915953348574e-05, + "loss": 0.4153, + "step": 123100 + }, + { + "epoch": 0.6353800671753834, + "grad_norm": 25601.05078125, + "learning_rate": 4.5877509086406766e-05, + "loss": 0.4201, + "step": 123150 + }, + { + "epoch": 0.6356380371579963, + "grad_norm": 22695.91015625, + "learning_rate": 4.584110441997081e-05, + "loss": 0.4174, + "step": 123200 + }, + { + "epoch": 0.6358960071406091, + "grad_norm": 24915.857421875, + "learning_rate": 4.5804701973472755e-05, + "loss": 0.416, + "step": 123250 + }, + { + "epoch": 0.636153977123222, + "grad_norm": 24066.427734375, + "learning_rate": 4.576830176634356e-05, + "loss": 0.415, + "step": 123300 + }, + { + "epoch": 0.6364119471058348, + "grad_norm": 25726.71484375, + "learning_rate": 4.573190381801293e-05, + "loss": 0.4204, + "step": 123350 + }, + { + "epoch": 0.6366699170884476, + "grad_norm": 24271.998046875, + "learning_rate": 4.56955081479094e-05, + "loss": 0.4166, + "step": 123400 + }, + { + "epoch": 0.6369278870710604, + "grad_norm": 20897.818359375, + "learning_rate": 4.5659114775460286e-05, + "loss": 0.4156, + "step": 123450 + }, + { + "epoch": 0.6371858570536733, + "grad_norm": 24409.841796875, + "learning_rate": 4.562272372009163e-05, + "loss": 0.4208, + "step": 123500 + }, + { + "epoch": 0.6374438270362861, + "grad_norm": 24757.927734375, + "learning_rate": 4.5586335001228296e-05, + "loss": 0.4167, + "step": 123550 + }, + { + "epoch": 0.6377017970188988, + "grad_norm": 22433.091796875, + "learning_rate": 4.554994863829387e-05, + "loss": 0.4206, + "step": 123600 + }, + { + "epoch": 0.6379597670015117, + "grad_norm": 22757.798828125, + "learning_rate": 4.5513564650710706e-05, + "loss": 0.4113, + "step": 123650 + }, + { + "epoch": 0.6382177369841245, + "grad_norm": 22652.9140625, + "learning_rate": 4.547718305789984e-05, + "loss": 0.4224, + "step": 123700 + }, + { + "epoch": 0.6384757069667374, + "grad_norm": 25416.0390625, + "learning_rate": 4.5440803879281086e-05, + "loss": 0.4129, + "step": 123750 + }, + { + "epoch": 0.6387336769493501, + "grad_norm": 22621.40625, + "learning_rate": 4.5404427134272926e-05, + "loss": 0.4204, + "step": 123800 + }, + { + "epoch": 0.638991646931963, + "grad_norm": 24213.93359375, + "learning_rate": 4.536805284229258e-05, + "loss": 0.4109, + "step": 123850 + }, + { + "epoch": 0.6392496169145758, + "grad_norm": 20231.091796875, + "learning_rate": 4.5331681022755946e-05, + "loss": 0.4221, + "step": 123900 + }, + { + "epoch": 0.6395075868971887, + "grad_norm": 22513.21875, + "learning_rate": 4.529531169507757e-05, + "loss": 0.4189, + "step": 123950 + }, + { + "epoch": 0.6397655568798014, + "grad_norm": 19454.783203125, + "learning_rate": 4.5258944878670714e-05, + "loss": 0.4138, + "step": 124000 + }, + { + "epoch": 0.6400235268624143, + "grad_norm": 23547.423828125, + "learning_rate": 4.522258059294727e-05, + "loss": 0.4206, + "step": 124050 + }, + { + "epoch": 0.6402814968450271, + "grad_norm": 23985.0703125, + "learning_rate": 4.5186218857317825e-05, + "loss": 0.4186, + "step": 124100 + }, + { + "epoch": 0.64053946682764, + "grad_norm": 22254.078125, + "learning_rate": 4.5149859691191517e-05, + "loss": 0.4076, + "step": 124150 + }, + { + "epoch": 0.6407974368102528, + "grad_norm": 24060.70703125, + "learning_rate": 4.5113503113976194e-05, + "loss": 0.4207, + "step": 124200 + }, + { + "epoch": 0.6410554067928655, + "grad_norm": 21521.923828125, + "learning_rate": 4.5077149145078275e-05, + "loss": 0.4134, + "step": 124250 + }, + { + "epoch": 0.6413133767754784, + "grad_norm": 22107.48828125, + "learning_rate": 4.504079780390282e-05, + "loss": 0.4095, + "step": 124300 + }, + { + "epoch": 0.6415713467580912, + "grad_norm": 22610.880859375, + "learning_rate": 4.5004449109853485e-05, + "loss": 0.4216, + "step": 124350 + }, + { + "epoch": 0.6418293167407041, + "grad_norm": 22752.83984375, + "learning_rate": 4.496810308233247e-05, + "loss": 0.4225, + "step": 124400 + }, + { + "epoch": 0.6420872867233168, + "grad_norm": 22029.88671875, + "learning_rate": 4.4931759740740596e-05, + "loss": 0.4138, + "step": 124450 + }, + { + "epoch": 0.6423452567059297, + "grad_norm": 24989.2421875, + "learning_rate": 4.489541910447722e-05, + "loss": 0.4166, + "step": 124500 + }, + { + "epoch": 0.6426032266885425, + "grad_norm": 25843.16796875, + "learning_rate": 4.485908119294031e-05, + "loss": 0.4132, + "step": 124550 + }, + { + "epoch": 0.6428611966711554, + "grad_norm": 23847.01171875, + "learning_rate": 4.4822746025526286e-05, + "loss": 0.4256, + "step": 124600 + }, + { + "epoch": 0.6431191666537681, + "grad_norm": 21634.71484375, + "learning_rate": 4.478641362163019e-05, + "loss": 0.4182, + "step": 124650 + }, + { + "epoch": 0.643377136636381, + "grad_norm": 22252.021484375, + "learning_rate": 4.475008400064554e-05, + "loss": 0.419, + "step": 124700 + }, + { + "epoch": 0.6436351066189938, + "grad_norm": 24151.951171875, + "learning_rate": 4.471375718196439e-05, + "loss": 0.4201, + "step": 124750 + }, + { + "epoch": 0.6438930766016067, + "grad_norm": 23570.310546875, + "learning_rate": 4.4677433184977315e-05, + "loss": 0.4131, + "step": 124800 + }, + { + "epoch": 0.6441510465842195, + "grad_norm": 23886.896484375, + "learning_rate": 4.464111202907332e-05, + "loss": 0.4172, + "step": 124850 + }, + { + "epoch": 0.6444090165668322, + "grad_norm": 23476.888671875, + "learning_rate": 4.4604793733639973e-05, + "loss": 0.419, + "step": 124900 + }, + { + "epoch": 0.6446669865494451, + "grad_norm": 22735.759765625, + "learning_rate": 4.456847831806324e-05, + "loss": 0.4214, + "step": 124950 + }, + { + "epoch": 0.644924956532058, + "grad_norm": 25508.525390625, + "learning_rate": 4.4532165801727626e-05, + "loss": 0.4184, + "step": 125000 + }, + { + "epoch": 0.644924956532058, + "eval_loss": 0.40382638573646545, + "eval_runtime": 3215.6548, + "eval_samples_per_second": 964.382, + "eval_steps_per_second": 1.884, + "step": 125000 + }, + { + "epoch": 0.6451829265146708, + "grad_norm": 23686.8671875, + "learning_rate": 4.449585620401601e-05, + "loss": 0.4115, + "step": 125050 + }, + { + "epoch": 0.6454408964972835, + "grad_norm": 22472.7421875, + "learning_rate": 4.445954954430976e-05, + "loss": 0.4187, + "step": 125100 + }, + { + "epoch": 0.6456988664798964, + "grad_norm": 25044.5859375, + "learning_rate": 4.442324584198871e-05, + "loss": 0.4188, + "step": 125150 + }, + { + "epoch": 0.6459568364625092, + "grad_norm": 23489.119140625, + "learning_rate": 4.4386945116431025e-05, + "loss": 0.4212, + "step": 125200 + }, + { + "epoch": 0.6462148064451221, + "grad_norm": 23150.12109375, + "learning_rate": 4.435064738701335e-05, + "loss": 0.4155, + "step": 125250 + }, + { + "epoch": 0.6464727764277348, + "grad_norm": 22082.09765625, + "learning_rate": 4.4314352673110696e-05, + "loss": 0.4208, + "step": 125300 + }, + { + "epoch": 0.6467307464103477, + "grad_norm": 23107.71484375, + "learning_rate": 4.427806099409652e-05, + "loss": 0.4172, + "step": 125350 + }, + { + "epoch": 0.6469887163929605, + "grad_norm": 23660.607421875, + "learning_rate": 4.4241772369342554e-05, + "loss": 0.4156, + "step": 125400 + }, + { + "epoch": 0.6472466863755734, + "grad_norm": 22054.47265625, + "learning_rate": 4.420548681821901e-05, + "loss": 0.4174, + "step": 125450 + }, + { + "epoch": 0.6475046563581862, + "grad_norm": 22386.654296875, + "learning_rate": 4.416920436009439e-05, + "loss": 0.4164, + "step": 125500 + }, + { + "epoch": 0.647762626340799, + "grad_norm": 22394.78125, + "learning_rate": 4.413292501433557e-05, + "loss": 0.4128, + "step": 125550 + }, + { + "epoch": 0.6480205963234118, + "grad_norm": 21871.1953125, + "learning_rate": 4.4096648800307796e-05, + "loss": 0.4174, + "step": 125600 + }, + { + "epoch": 0.6482785663060247, + "grad_norm": 21630.826171875, + "learning_rate": 4.406037573737456e-05, + "loss": 0.4146, + "step": 125650 + }, + { + "epoch": 0.6485365362886375, + "grad_norm": 20917.244140625, + "learning_rate": 4.4024105844897744e-05, + "loss": 0.4172, + "step": 125700 + }, + { + "epoch": 0.6487945062712502, + "grad_norm": 21545.53515625, + "learning_rate": 4.3987839142237505e-05, + "loss": 0.4189, + "step": 125750 + }, + { + "epoch": 0.6490524762538631, + "grad_norm": 27708.19140625, + "learning_rate": 4.395157564875234e-05, + "loss": 0.4127, + "step": 125800 + }, + { + "epoch": 0.6493104462364759, + "grad_norm": 23791.052734375, + "learning_rate": 4.391531538379895e-05, + "loss": 0.4146, + "step": 125850 + }, + { + "epoch": 0.6495684162190888, + "grad_norm": 23441.0078125, + "learning_rate": 4.387905836673239e-05, + "loss": 0.4191, + "step": 125900 + }, + { + "epoch": 0.6498263862017015, + "grad_norm": 21998.982421875, + "learning_rate": 4.3842804616905944e-05, + "loss": 0.4165, + "step": 125950 + }, + { + "epoch": 0.6500843561843144, + "grad_norm": 26170.572265625, + "learning_rate": 4.380655415367116e-05, + "loss": 0.4106, + "step": 126000 + }, + { + "epoch": 0.6503423261669272, + "grad_norm": 23915.345703125, + "learning_rate": 4.3770306996377866e-05, + "loss": 0.417, + "step": 126050 + }, + { + "epoch": 0.6506002961495401, + "grad_norm": 22807.23828125, + "learning_rate": 4.373406316437404e-05, + "loss": 0.4138, + "step": 126100 + }, + { + "epoch": 0.6508582661321528, + "grad_norm": 22825.060546875, + "learning_rate": 4.369782267700598e-05, + "loss": 0.4159, + "step": 126150 + }, + { + "epoch": 0.6511162361147657, + "grad_norm": 21670.83984375, + "learning_rate": 4.366158555361812e-05, + "loss": 0.4131, + "step": 126200 + }, + { + "epoch": 0.6513742060973785, + "grad_norm": 24840.630859375, + "learning_rate": 4.362535181355319e-05, + "loss": 0.4072, + "step": 126250 + }, + { + "epoch": 0.6516321760799914, + "grad_norm": 24121.158203125, + "learning_rate": 4.358912147615199e-05, + "loss": 0.4085, + "step": 126300 + }, + { + "epoch": 0.6518901460626042, + "grad_norm": 21738.236328125, + "learning_rate": 4.355289456075363e-05, + "loss": 0.4154, + "step": 126350 + }, + { + "epoch": 0.6521481160452169, + "grad_norm": 24880.833984375, + "learning_rate": 4.3516671086695296e-05, + "loss": 0.4154, + "step": 126400 + }, + { + "epoch": 0.6524060860278298, + "grad_norm": 21572.140625, + "learning_rate": 4.348045107331239e-05, + "loss": 0.4185, + "step": 126450 + }, + { + "epoch": 0.6526640560104426, + "grad_norm": 24076.17578125, + "learning_rate": 4.344423453993849e-05, + "loss": 0.4132, + "step": 126500 + }, + { + "epoch": 0.6529220259930555, + "grad_norm": 23531.365234375, + "learning_rate": 4.340802150590522e-05, + "loss": 0.4179, + "step": 126550 + }, + { + "epoch": 0.6531799959756682, + "grad_norm": 24287.568359375, + "learning_rate": 4.337181199054243e-05, + "loss": 0.4136, + "step": 126600 + }, + { + "epoch": 0.6534379659582811, + "grad_norm": 23352.52734375, + "learning_rate": 4.3335606013178046e-05, + "loss": 0.4177, + "step": 126650 + }, + { + "epoch": 0.6536959359408939, + "grad_norm": 22291.494140625, + "learning_rate": 4.3299403593138144e-05, + "loss": 0.4155, + "step": 126700 + }, + { + "epoch": 0.6539539059235068, + "grad_norm": 20745.798828125, + "learning_rate": 4.3263204749746836e-05, + "loss": 0.4139, + "step": 126750 + }, + { + "epoch": 0.6542118759061195, + "grad_norm": 24670.357421875, + "learning_rate": 4.322700950232639e-05, + "loss": 0.423, + "step": 126800 + }, + { + "epoch": 0.6544698458887324, + "grad_norm": 23067.81640625, + "learning_rate": 4.31908178701971e-05, + "loss": 0.4174, + "step": 126850 + }, + { + "epoch": 0.6547278158713452, + "grad_norm": 25275.47265625, + "learning_rate": 4.315462987267739e-05, + "loss": 0.4181, + "step": 126900 + }, + { + "epoch": 0.6549857858539581, + "grad_norm": 21032.4375, + "learning_rate": 4.311844552908372e-05, + "loss": 0.4111, + "step": 126950 + }, + { + "epoch": 0.6552437558365709, + "grad_norm": 21629.0625, + "learning_rate": 4.308226485873056e-05, + "loss": 0.4129, + "step": 127000 + }, + { + "epoch": 0.6555017258191836, + "grad_norm": 24375.935546875, + "learning_rate": 4.3046087880930466e-05, + "loss": 0.4129, + "step": 127050 + }, + { + "epoch": 0.6557596958017965, + "grad_norm": 21224.63671875, + "learning_rate": 4.3009914614994e-05, + "loss": 0.4156, + "step": 127100 + }, + { + "epoch": 0.6560176657844093, + "grad_norm": 24836.560546875, + "learning_rate": 4.297374508022977e-05, + "loss": 0.4133, + "step": 127150 + }, + { + "epoch": 0.6562756357670222, + "grad_norm": 22769.599609375, + "learning_rate": 4.293757929594435e-05, + "loss": 0.4151, + "step": 127200 + }, + { + "epoch": 0.6565336057496349, + "grad_norm": 22936.603515625, + "learning_rate": 4.2901417281442345e-05, + "loss": 0.4173, + "step": 127250 + }, + { + "epoch": 0.6567915757322478, + "grad_norm": 21296.39453125, + "learning_rate": 4.286525905602634e-05, + "loss": 0.4121, + "step": 127300 + }, + { + "epoch": 0.6570495457148606, + "grad_norm": 24282.591796875, + "learning_rate": 4.282910463899689e-05, + "loss": 0.4086, + "step": 127350 + }, + { + "epoch": 0.6573075156974735, + "grad_norm": 22443.6015625, + "learning_rate": 4.2792954049652545e-05, + "loss": 0.4183, + "step": 127400 + }, + { + "epoch": 0.6575654856800862, + "grad_norm": 21437.98046875, + "learning_rate": 4.275680730728976e-05, + "loss": 0.4172, + "step": 127450 + }, + { + "epoch": 0.6578234556626991, + "grad_norm": 24970.3125, + "learning_rate": 4.2720664431202987e-05, + "loss": 0.4187, + "step": 127500 + }, + { + "epoch": 0.6580814256453119, + "grad_norm": 21128.349609375, + "learning_rate": 4.268452544068457e-05, + "loss": 0.4142, + "step": 127550 + }, + { + "epoch": 0.6583393956279248, + "grad_norm": 26429.14453125, + "learning_rate": 4.2648390355024836e-05, + "loss": 0.4115, + "step": 127600 + }, + { + "epoch": 0.6585973656105376, + "grad_norm": 22542.380859375, + "learning_rate": 4.261225919351195e-05, + "loss": 0.4144, + "step": 127650 + }, + { + "epoch": 0.6588553355931503, + "grad_norm": 23179.853515625, + "learning_rate": 4.257613197543207e-05, + "loss": 0.4164, + "step": 127700 + }, + { + "epoch": 0.6591133055757632, + "grad_norm": 24641.048828125, + "learning_rate": 4.254000872006918e-05, + "loss": 0.4175, + "step": 127750 + }, + { + "epoch": 0.659371275558376, + "grad_norm": 23836.771484375, + "learning_rate": 4.250388944670517e-05, + "loss": 0.4201, + "step": 127800 + }, + { + "epoch": 0.6596292455409889, + "grad_norm": 23714.7578125, + "learning_rate": 4.2467774174619836e-05, + "loss": 0.4102, + "step": 127850 + }, + { + "epoch": 0.6598872155236016, + "grad_norm": 23630.2890625, + "learning_rate": 4.2431662923090785e-05, + "loss": 0.411, + "step": 127900 + }, + { + "epoch": 0.6601451855062145, + "grad_norm": 23018.384765625, + "learning_rate": 4.239555571139353e-05, + "loss": 0.4113, + "step": 127950 + }, + { + "epoch": 0.6604031554888273, + "grad_norm": 23594.041015625, + "learning_rate": 4.235945255880137e-05, + "loss": 0.4153, + "step": 128000 + }, + { + "epoch": 0.6606611254714402, + "grad_norm": 24231.07421875, + "learning_rate": 4.232335348458549e-05, + "loss": 0.4159, + "step": 128050 + }, + { + "epoch": 0.6609190954540529, + "grad_norm": 22362.98828125, + "learning_rate": 4.228725850801486e-05, + "loss": 0.4218, + "step": 128100 + }, + { + "epoch": 0.6611770654366658, + "grad_norm": 23008.44140625, + "learning_rate": 4.225116764835631e-05, + "loss": 0.416, + "step": 128150 + }, + { + "epoch": 0.6614350354192786, + "grad_norm": 23027.1875, + "learning_rate": 4.221508092487441e-05, + "loss": 0.4163, + "step": 128200 + }, + { + "epoch": 0.6616930054018915, + "grad_norm": 25121.61328125, + "learning_rate": 4.2178998356831553e-05, + "loss": 0.4167, + "step": 128250 + }, + { + "epoch": 0.6619509753845043, + "grad_norm": 24767.4140625, + "learning_rate": 4.214291996348794e-05, + "loss": 0.4176, + "step": 128300 + }, + { + "epoch": 0.662208945367117, + "grad_norm": 24596.533203125, + "learning_rate": 4.210684576410151e-05, + "loss": 0.4183, + "step": 128350 + }, + { + "epoch": 0.6624669153497299, + "grad_norm": 21095.8671875, + "learning_rate": 4.2070775777927976e-05, + "loss": 0.4151, + "step": 128400 + }, + { + "epoch": 0.6627248853323428, + "grad_norm": 25389.1640625, + "learning_rate": 4.203471002422077e-05, + "loss": 0.4226, + "step": 128450 + }, + { + "epoch": 0.6629828553149556, + "grad_norm": 24613.94921875, + "learning_rate": 4.199864852223113e-05, + "loss": 0.4093, + "step": 128500 + }, + { + "epoch": 0.6632408252975683, + "grad_norm": 23665.59765625, + "learning_rate": 4.196259129120796e-05, + "loss": 0.4135, + "step": 128550 + }, + { + "epoch": 0.6634987952801812, + "grad_norm": 22946.5234375, + "learning_rate": 4.192653835039795e-05, + "loss": 0.4151, + "step": 128600 + }, + { + "epoch": 0.663756765262794, + "grad_norm": 22438.23046875, + "learning_rate": 4.189048971904541e-05, + "loss": 0.4064, + "step": 128650 + }, + { + "epoch": 0.6640147352454069, + "grad_norm": 22760.623046875, + "learning_rate": 4.185444541639243e-05, + "loss": 0.4084, + "step": 128700 + }, + { + "epoch": 0.6642727052280196, + "grad_norm": 25223.484375, + "learning_rate": 4.1818405461678763e-05, + "loss": 0.4151, + "step": 128750 + }, + { + "epoch": 0.6645306752106325, + "grad_norm": 31547.962890625, + "learning_rate": 4.178236987414182e-05, + "loss": 0.4115, + "step": 128800 + }, + { + "epoch": 0.6647886451932453, + "grad_norm": 19114.953125, + "learning_rate": 4.174633867301674e-05, + "loss": 0.4109, + "step": 128850 + }, + { + "epoch": 0.6650466151758582, + "grad_norm": 22819.888671875, + "learning_rate": 4.1710311877536226e-05, + "loss": 0.4123, + "step": 128900 + }, + { + "epoch": 0.6653045851584709, + "grad_norm": 22868.62890625, + "learning_rate": 4.167428950693073e-05, + "loss": 0.413, + "step": 128950 + }, + { + "epoch": 0.6655625551410838, + "grad_norm": 23062.359375, + "learning_rate": 4.163827158042826e-05, + "loss": 0.4152, + "step": 129000 + }, + { + "epoch": 0.6658205251236966, + "grad_norm": 25990.505859375, + "learning_rate": 4.160225811725453e-05, + "loss": 0.4176, + "step": 129050 + }, + { + "epoch": 0.6660784951063095, + "grad_norm": 21594.1953125, + "learning_rate": 4.156624913663279e-05, + "loss": 0.4136, + "step": 129100 + }, + { + "epoch": 0.6663364650889223, + "grad_norm": 21145.869140625, + "learning_rate": 4.153024465778393e-05, + "loss": 0.4216, + "step": 129150 + }, + { + "epoch": 0.666594435071535, + "grad_norm": 22634.7734375, + "learning_rate": 4.149424469992649e-05, + "loss": 0.4114, + "step": 129200 + }, + { + "epoch": 0.6668524050541479, + "grad_norm": 23526.46875, + "learning_rate": 4.145824928227652e-05, + "loss": 0.4217, + "step": 129250 + }, + { + "epoch": 0.6671103750367607, + "grad_norm": 22295.880859375, + "learning_rate": 4.142225842404769e-05, + "loss": 0.4169, + "step": 129300 + }, + { + "epoch": 0.6673683450193736, + "grad_norm": 22282.421875, + "learning_rate": 4.13862721444512e-05, + "loss": 0.4195, + "step": 129350 + }, + { + "epoch": 0.6676263150019863, + "grad_norm": 21856.337890625, + "learning_rate": 4.135029046269585e-05, + "loss": 0.4229, + "step": 129400 + }, + { + "epoch": 0.6678842849845992, + "grad_norm": 20999.04296875, + "learning_rate": 4.131431339798796e-05, + "loss": 0.4168, + "step": 129450 + }, + { + "epoch": 0.668142254967212, + "grad_norm": 24684.484375, + "learning_rate": 4.12783409695314e-05, + "loss": 0.4117, + "step": 129500 + }, + { + "epoch": 0.6684002249498249, + "grad_norm": 24120.349609375, + "learning_rate": 4.124237319652753e-05, + "loss": 0.4186, + "step": 129550 + }, + { + "epoch": 0.6686581949324376, + "grad_norm": 23283.736328125, + "learning_rate": 4.1206410098175265e-05, + "loss": 0.4176, + "step": 129600 + }, + { + "epoch": 0.6689161649150505, + "grad_norm": 21902.6875, + "learning_rate": 4.117045169367102e-05, + "loss": 0.4153, + "step": 129650 + }, + { + "epoch": 0.6691741348976633, + "grad_norm": 22762.6015625, + "learning_rate": 4.1134498002208674e-05, + "loss": 0.414, + "step": 129700 + }, + { + "epoch": 0.6694321048802762, + "grad_norm": 20947.083984375, + "learning_rate": 4.109854904297965e-05, + "loss": 0.4113, + "step": 129750 + }, + { + "epoch": 0.669690074862889, + "grad_norm": 24687.189453125, + "learning_rate": 4.106260483517276e-05, + "loss": 0.4207, + "step": 129800 + }, + { + "epoch": 0.6699480448455017, + "grad_norm": 24164.724609375, + "learning_rate": 4.102666539797435e-05, + "loss": 0.4116, + "step": 129850 + }, + { + "epoch": 0.6702060148281146, + "grad_norm": 23408.68359375, + "learning_rate": 4.099073075056818e-05, + "loss": 0.4181, + "step": 129900 + }, + { + "epoch": 0.6704639848107274, + "grad_norm": 22822.3515625, + "learning_rate": 4.0954800912135516e-05, + "loss": 0.4176, + "step": 129950 + }, + { + "epoch": 0.6707219547933403, + "grad_norm": 21576.173828125, + "learning_rate": 4.091887590185494e-05, + "loss": 0.4165, + "step": 130000 + }, + { + "epoch": 0.6707219547933403, + "eval_loss": 0.40186887979507446, + "eval_runtime": 3150.7117, + "eval_samples_per_second": 984.26, + "eval_steps_per_second": 1.922, + "step": 130000 + }, + { + "epoch": 0.670979924775953, + "grad_norm": 21987.3671875, + "learning_rate": 4.0882955738902576e-05, + "loss": 0.4176, + "step": 130050 + }, + { + "epoch": 0.6712378947585659, + "grad_norm": 23900.74609375, + "learning_rate": 4.0847040442451895e-05, + "loss": 0.4183, + "step": 130100 + }, + { + "epoch": 0.6714958647411787, + "grad_norm": 22624.236328125, + "learning_rate": 4.081113003167378e-05, + "loss": 0.4146, + "step": 130150 + }, + { + "epoch": 0.6717538347237916, + "grad_norm": 22636.490234375, + "learning_rate": 4.0775224525736546e-05, + "loss": 0.4107, + "step": 130200 + }, + { + "epoch": 0.6720118047064043, + "grad_norm": 22667.66796875, + "learning_rate": 4.07393239438058e-05, + "loss": 0.4151, + "step": 130250 + }, + { + "epoch": 0.6722697746890172, + "grad_norm": 20381.720703125, + "learning_rate": 4.070342830504465e-05, + "loss": 0.4167, + "step": 130300 + }, + { + "epoch": 0.67252774467163, + "grad_norm": 22913.248046875, + "learning_rate": 4.0667537628613424e-05, + "loss": 0.4116, + "step": 130350 + }, + { + "epoch": 0.6727857146542429, + "grad_norm": 23168.865234375, + "learning_rate": 4.063165193366992e-05, + "loss": 0.413, + "step": 130400 + }, + { + "epoch": 0.6730436846368557, + "grad_norm": 21597.861328125, + "learning_rate": 4.059577123936918e-05, + "loss": 0.4179, + "step": 130450 + }, + { + "epoch": 0.6733016546194684, + "grad_norm": 20305.806640625, + "learning_rate": 4.055989556486365e-05, + "loss": 0.4199, + "step": 130500 + }, + { + "epoch": 0.6735596246020813, + "grad_norm": 23520.173828125, + "learning_rate": 4.052402492930311e-05, + "loss": 0.4154, + "step": 130550 + }, + { + "epoch": 0.6738175945846941, + "grad_norm": 23356.85546875, + "learning_rate": 4.048815935183453e-05, + "loss": 0.4154, + "step": 130600 + }, + { + "epoch": 0.674075564567307, + "grad_norm": 22958.611328125, + "learning_rate": 4.0452298851602324e-05, + "loss": 0.4149, + "step": 130650 + }, + { + "epoch": 0.6743335345499197, + "grad_norm": 24888.25390625, + "learning_rate": 4.04164434477481e-05, + "loss": 0.4166, + "step": 130700 + }, + { + "epoch": 0.6745915045325326, + "grad_norm": 22958.189453125, + "learning_rate": 4.0380593159410806e-05, + "loss": 0.4159, + "step": 130750 + }, + { + "epoch": 0.6748494745151454, + "grad_norm": 21863.55859375, + "learning_rate": 4.03447480057266e-05, + "loss": 0.4142, + "step": 130800 + }, + { + "epoch": 0.6751074444977583, + "grad_norm": 23096.375, + "learning_rate": 4.030890800582895e-05, + "loss": 0.4108, + "step": 130850 + }, + { + "epoch": 0.675365414480371, + "grad_norm": 23506.576171875, + "learning_rate": 4.027307317884854e-05, + "loss": 0.4111, + "step": 130900 + }, + { + "epoch": 0.6756233844629839, + "grad_norm": 26913.11328125, + "learning_rate": 4.023724354391331e-05, + "loss": 0.4145, + "step": 130950 + }, + { + "epoch": 0.6758813544455967, + "grad_norm": 22008.958984375, + "learning_rate": 4.020141912014846e-05, + "loss": 0.4118, + "step": 131000 + }, + { + "epoch": 0.6761393244282096, + "grad_norm": 21431.857421875, + "learning_rate": 4.016559992667632e-05, + "loss": 0.417, + "step": 131050 + }, + { + "epoch": 0.6763972944108223, + "grad_norm": 24077.453125, + "learning_rate": 4.0129785982616524e-05, + "loss": 0.4121, + "step": 131100 + }, + { + "epoch": 0.6766552643934352, + "grad_norm": 22978.5390625, + "learning_rate": 4.009397730708583e-05, + "loss": 0.4074, + "step": 131150 + }, + { + "epoch": 0.676913234376048, + "grad_norm": 25474.740234375, + "learning_rate": 4.005817391919826e-05, + "loss": 0.4159, + "step": 131200 + }, + { + "epoch": 0.6771712043586608, + "grad_norm": 23532.416015625, + "learning_rate": 4.0022375838064904e-05, + "loss": 0.4202, + "step": 131250 + }, + { + "epoch": 0.6774291743412737, + "grad_norm": 23746.072265625, + "learning_rate": 3.998658308279414e-05, + "loss": 0.4157, + "step": 131300 + }, + { + "epoch": 0.6776871443238864, + "grad_norm": 21691.6875, + "learning_rate": 3.995079567249142e-05, + "loss": 0.4158, + "step": 131350 + }, + { + "epoch": 0.6779451143064993, + "grad_norm": 24167.923828125, + "learning_rate": 3.991501362625937e-05, + "loss": 0.4165, + "step": 131400 + }, + { + "epoch": 0.6782030842891121, + "grad_norm": 22420.27734375, + "learning_rate": 3.9879236963197784e-05, + "loss": 0.418, + "step": 131450 + }, + { + "epoch": 0.678461054271725, + "grad_norm": 22116.75, + "learning_rate": 3.984346570240352e-05, + "loss": 0.4152, + "step": 131500 + }, + { + "epoch": 0.6787190242543377, + "grad_norm": 23841.001953125, + "learning_rate": 3.9807699862970596e-05, + "loss": 0.4179, + "step": 131550 + }, + { + "epoch": 0.6789769942369506, + "grad_norm": 22931.126953125, + "learning_rate": 3.977193946399011e-05, + "loss": 0.4171, + "step": 131600 + }, + { + "epoch": 0.6792349642195634, + "grad_norm": 24939.294921875, + "learning_rate": 3.973618452455031e-05, + "loss": 0.4147, + "step": 131650 + }, + { + "epoch": 0.6794929342021763, + "grad_norm": 22026.615234375, + "learning_rate": 3.970043506373644e-05, + "loss": 0.4084, + "step": 131700 + }, + { + "epoch": 0.679750904184789, + "grad_norm": 24636.595703125, + "learning_rate": 3.9664691100630904e-05, + "loss": 0.4137, + "step": 131750 + }, + { + "epoch": 0.6800088741674019, + "grad_norm": 25599.443359375, + "learning_rate": 3.962895265431311e-05, + "loss": 0.4167, + "step": 131800 + }, + { + "epoch": 0.6802668441500147, + "grad_norm": 23514.0078125, + "learning_rate": 3.9593219743859575e-05, + "loss": 0.408, + "step": 131850 + }, + { + "epoch": 0.6805248141326276, + "grad_norm": 21798.9609375, + "learning_rate": 3.9557492388343844e-05, + "loss": 0.4129, + "step": 131900 + }, + { + "epoch": 0.6807827841152404, + "grad_norm": 24803.248046875, + "learning_rate": 3.952177060683644e-05, + "loss": 0.4126, + "step": 131950 + }, + { + "epoch": 0.6810407540978531, + "grad_norm": 23215.529296875, + "learning_rate": 3.948605441840501e-05, + "loss": 0.4114, + "step": 132000 + }, + { + "epoch": 0.681298724080466, + "grad_norm": 21179.626953125, + "learning_rate": 3.945034384211412e-05, + "loss": 0.4139, + "step": 132050 + }, + { + "epoch": 0.6815566940630788, + "grad_norm": 22894.04296875, + "learning_rate": 3.941463889702543e-05, + "loss": 0.4144, + "step": 132100 + }, + { + "epoch": 0.6818146640456917, + "grad_norm": 22581.392578125, + "learning_rate": 3.937893960219751e-05, + "loss": 0.4163, + "step": 132150 + }, + { + "epoch": 0.6820726340283044, + "grad_norm": 27557.634765625, + "learning_rate": 3.9343245976685966e-05, + "loss": 0.4194, + "step": 132200 + }, + { + "epoch": 0.6823306040109173, + "grad_norm": 24157.97265625, + "learning_rate": 3.9307558039543355e-05, + "loss": 0.4089, + "step": 132250 + }, + { + "epoch": 0.6825885739935301, + "grad_norm": 23363.904296875, + "learning_rate": 3.927187580981922e-05, + "loss": 0.4108, + "step": 132300 + }, + { + "epoch": 0.682846543976143, + "grad_norm": 24005.15625, + "learning_rate": 3.9236199306560054e-05, + "loss": 0.4103, + "step": 132350 + }, + { + "epoch": 0.6831045139587557, + "grad_norm": 23476.4609375, + "learning_rate": 3.920052854880925e-05, + "loss": 0.4189, + "step": 132400 + }, + { + "epoch": 0.6833624839413686, + "grad_norm": 23734.173828125, + "learning_rate": 3.91648635556072e-05, + "loss": 0.4183, + "step": 132450 + }, + { + "epoch": 0.6836204539239814, + "grad_norm": 22112.642578125, + "learning_rate": 3.912920434599117e-05, + "loss": 0.4139, + "step": 132500 + }, + { + "epoch": 0.6838784239065943, + "grad_norm": 23442.96484375, + "learning_rate": 3.909355093899537e-05, + "loss": 0.4137, + "step": 132550 + }, + { + "epoch": 0.6841363938892071, + "grad_norm": 22873.734375, + "learning_rate": 3.905790335365087e-05, + "loss": 0.4097, + "step": 132600 + }, + { + "epoch": 0.6843943638718198, + "grad_norm": 24382.9140625, + "learning_rate": 3.902226160898567e-05, + "loss": 0.4134, + "step": 132650 + }, + { + "epoch": 0.6846523338544327, + "grad_norm": 23238.1953125, + "learning_rate": 3.898662572402468e-05, + "loss": 0.4137, + "step": 132700 + }, + { + "epoch": 0.6849103038370455, + "grad_norm": 21690.37890625, + "learning_rate": 3.89509957177896e-05, + "loss": 0.4114, + "step": 132750 + }, + { + "epoch": 0.6851682738196584, + "grad_norm": 25762.189453125, + "learning_rate": 3.891537160929907e-05, + "loss": 0.4134, + "step": 132800 + }, + { + "epoch": 0.6854262438022711, + "grad_norm": 22006.044921875, + "learning_rate": 3.88797534175685e-05, + "loss": 0.4132, + "step": 132850 + }, + { + "epoch": 0.685684213784884, + "grad_norm": 22149.5546875, + "learning_rate": 3.8844141161610256e-05, + "loss": 0.4154, + "step": 132900 + }, + { + "epoch": 0.6859421837674968, + "grad_norm": 23865.419921875, + "learning_rate": 3.880853486043343e-05, + "loss": 0.4135, + "step": 132950 + }, + { + "epoch": 0.6862001537501097, + "grad_norm": 22708.126953125, + "learning_rate": 3.877293453304399e-05, + "loss": 0.4143, + "step": 133000 + }, + { + "epoch": 0.6864581237327224, + "grad_norm": 19948.517578125, + "learning_rate": 3.8737340198444683e-05, + "loss": 0.4181, + "step": 133050 + }, + { + "epoch": 0.6867160937153353, + "grad_norm": 22594.826171875, + "learning_rate": 3.870175187563509e-05, + "loss": 0.4108, + "step": 133100 + }, + { + "epoch": 0.6869740636979481, + "grad_norm": 24876.56640625, + "learning_rate": 3.866616958361159e-05, + "loss": 0.4136, + "step": 133150 + }, + { + "epoch": 0.687232033680561, + "grad_norm": 20055.0859375, + "learning_rate": 3.8630593341367285e-05, + "loss": 0.4176, + "step": 133200 + }, + { + "epoch": 0.6874900036631737, + "grad_norm": 24807.9140625, + "learning_rate": 3.8595023167892096e-05, + "loss": 0.4084, + "step": 133250 + }, + { + "epoch": 0.6877479736457865, + "grad_norm": 21060.78125, + "learning_rate": 3.8559459082172696e-05, + "loss": 0.4086, + "step": 133300 + }, + { + "epoch": 0.6880059436283994, + "grad_norm": 22740.255859375, + "learning_rate": 3.852390110319252e-05, + "loss": 0.4109, + "step": 133350 + }, + { + "epoch": 0.6882639136110122, + "grad_norm": 24095.68359375, + "learning_rate": 3.848834924993169e-05, + "loss": 0.4118, + "step": 133400 + }, + { + "epoch": 0.6885218835936251, + "grad_norm": 20011.78125, + "learning_rate": 3.8452803541367136e-05, + "loss": 0.4133, + "step": 133450 + }, + { + "epoch": 0.6887798535762378, + "grad_norm": 21369.7265625, + "learning_rate": 3.8417263996472444e-05, + "loss": 0.4104, + "step": 133500 + }, + { + "epoch": 0.6890378235588507, + "grad_norm": 22532.251953125, + "learning_rate": 3.8381730634217946e-05, + "loss": 0.415, + "step": 133550 + }, + { + "epoch": 0.6892957935414635, + "grad_norm": 21174.34765625, + "learning_rate": 3.8346203473570677e-05, + "loss": 0.4121, + "step": 133600 + }, + { + "epoch": 0.6895537635240764, + "grad_norm": 21758.87109375, + "learning_rate": 3.831068253349431e-05, + "loss": 0.4181, + "step": 133650 + }, + { + "epoch": 0.6898117335066891, + "grad_norm": 21809.083984375, + "learning_rate": 3.827516783294927e-05, + "loss": 0.41, + "step": 133700 + }, + { + "epoch": 0.690069703489302, + "grad_norm": 21419.69921875, + "learning_rate": 3.8239659390892593e-05, + "loss": 0.4166, + "step": 133750 + }, + { + "epoch": 0.6903276734719148, + "grad_norm": 20746.517578125, + "learning_rate": 3.820415722627802e-05, + "loss": 0.4168, + "step": 133800 + }, + { + "epoch": 0.6905856434545277, + "grad_norm": 22737.89453125, + "learning_rate": 3.816866135805589e-05, + "loss": 0.4119, + "step": 133850 + }, + { + "epoch": 0.6908436134371404, + "grad_norm": 23691.408203125, + "learning_rate": 3.813317180517324e-05, + "loss": 0.4105, + "step": 133900 + }, + { + "epoch": 0.6911015834197533, + "grad_norm": 22899.70703125, + "learning_rate": 3.8097688586573684e-05, + "loss": 0.412, + "step": 133950 + }, + { + "epoch": 0.6913595534023661, + "grad_norm": 25553.763671875, + "learning_rate": 3.8062211721197475e-05, + "loss": 0.4158, + "step": 134000 + }, + { + "epoch": 0.691617523384979, + "grad_norm": 22099.93359375, + "learning_rate": 3.802674122798152e-05, + "loss": 0.4149, + "step": 134050 + }, + { + "epoch": 0.6918754933675918, + "grad_norm": 25735.91015625, + "learning_rate": 3.799127712585922e-05, + "loss": 0.4058, + "step": 134100 + }, + { + "epoch": 0.6921334633502045, + "grad_norm": 21259.95703125, + "learning_rate": 3.795581943376067e-05, + "loss": 0.4192, + "step": 134150 + }, + { + "epoch": 0.6923914333328174, + "grad_norm": 22438.23046875, + "learning_rate": 3.7920368170612476e-05, + "loss": 0.414, + "step": 134200 + }, + { + "epoch": 0.6926494033154302, + "grad_norm": 24721.974609375, + "learning_rate": 3.788492335533786e-05, + "loss": 0.4154, + "step": 134250 + }, + { + "epoch": 0.6929073732980431, + "grad_norm": 24267.611328125, + "learning_rate": 3.7849485006856545e-05, + "loss": 0.4108, + "step": 134300 + }, + { + "epoch": 0.6931653432806558, + "grad_norm": 25588.193359375, + "learning_rate": 3.781405314408486e-05, + "loss": 0.4169, + "step": 134350 + }, + { + "epoch": 0.6934233132632687, + "grad_norm": 22651.216796875, + "learning_rate": 3.7778627785935626e-05, + "loss": 0.4112, + "step": 134400 + }, + { + "epoch": 0.6936812832458815, + "grad_norm": 24765.76953125, + "learning_rate": 3.774320895131823e-05, + "loss": 0.4173, + "step": 134450 + }, + { + "epoch": 0.6939392532284944, + "grad_norm": 25384.44921875, + "learning_rate": 3.7707796659138584e-05, + "loss": 0.4097, + "step": 134500 + }, + { + "epoch": 0.6941972232111071, + "grad_norm": 21145.587890625, + "learning_rate": 3.767239092829903e-05, + "loss": 0.4125, + "step": 134550 + }, + { + "epoch": 0.69445519319372, + "grad_norm": 22693.28515625, + "learning_rate": 3.763699177769849e-05, + "loss": 0.4111, + "step": 134600 + }, + { + "epoch": 0.6947131631763328, + "grad_norm": 20415.33984375, + "learning_rate": 3.760159922623235e-05, + "loss": 0.4178, + "step": 134650 + }, + { + "epoch": 0.6949711331589457, + "grad_norm": 23304.33984375, + "learning_rate": 3.756621329279247e-05, + "loss": 0.4142, + "step": 134700 + }, + { + "epoch": 0.6952291031415585, + "grad_norm": 22485.029296875, + "learning_rate": 3.7530833996267156e-05, + "loss": 0.4129, + "step": 134750 + }, + { + "epoch": 0.6954870731241712, + "grad_norm": 20506.5625, + "learning_rate": 3.7495461355541206e-05, + "loss": 0.4104, + "step": 134800 + }, + { + "epoch": 0.6957450431067841, + "grad_norm": 26106.26953125, + "learning_rate": 3.746009538949584e-05, + "loss": 0.4122, + "step": 134850 + }, + { + "epoch": 0.6960030130893969, + "grad_norm": 25230.55859375, + "learning_rate": 3.742473611700874e-05, + "loss": 0.4173, + "step": 134900 + }, + { + "epoch": 0.6962609830720098, + "grad_norm": 23462.197265625, + "learning_rate": 3.738938355695402e-05, + "loss": 0.4211, + "step": 134950 + }, + { + "epoch": 0.6965189530546225, + "grad_norm": 22550.8359375, + "learning_rate": 3.735403772820213e-05, + "loss": 0.4154, + "step": 135000 + }, + { + "epoch": 0.6965189530546225, + "eval_loss": 0.399837851524353, + "eval_runtime": 3136.0222, + "eval_samples_per_second": 988.871, + "eval_steps_per_second": 1.931, + "step": 135000 + }, + { + "epoch": 0.6967769230372354, + "grad_norm": 22235.15234375, + "learning_rate": 3.731869864962004e-05, + "loss": 0.4183, + "step": 135050 + }, + { + "epoch": 0.6970348930198482, + "grad_norm": 21969.208984375, + "learning_rate": 3.728336634007105e-05, + "loss": 0.41, + "step": 135100 + }, + { + "epoch": 0.6972928630024611, + "grad_norm": 22907.32421875, + "learning_rate": 3.724804081841488e-05, + "loss": 0.4213, + "step": 135150 + }, + { + "epoch": 0.6975508329850738, + "grad_norm": 22994.646484375, + "learning_rate": 3.721272210350757e-05, + "loss": 0.4103, + "step": 135200 + }, + { + "epoch": 0.6978088029676867, + "grad_norm": 22118.224609375, + "learning_rate": 3.717741021420162e-05, + "loss": 0.4195, + "step": 135250 + }, + { + "epoch": 0.6980667729502995, + "grad_norm": 19673.6484375, + "learning_rate": 3.7142105169345764e-05, + "loss": 0.4105, + "step": 135300 + }, + { + "epoch": 0.6983247429329124, + "grad_norm": 23110.041015625, + "learning_rate": 3.71068069877852e-05, + "loss": 0.4132, + "step": 135350 + }, + { + "epoch": 0.6985827129155251, + "grad_norm": 26589.453125, + "learning_rate": 3.707151568836144e-05, + "loss": 0.4171, + "step": 135400 + }, + { + "epoch": 0.6988406828981379, + "grad_norm": 25272.74609375, + "learning_rate": 3.7036231289912206e-05, + "loss": 0.4098, + "step": 135450 + }, + { + "epoch": 0.6990986528807508, + "grad_norm": 23238.626953125, + "learning_rate": 3.700095381127172e-05, + "loss": 0.4102, + "step": 135500 + }, + { + "epoch": 0.6993566228633636, + "grad_norm": 25412.8203125, + "learning_rate": 3.696568327127036e-05, + "loss": 0.4131, + "step": 135550 + }, + { + "epoch": 0.6996145928459765, + "grad_norm": 22329.0703125, + "learning_rate": 3.693041968873488e-05, + "loss": 0.4196, + "step": 135600 + }, + { + "epoch": 0.6998725628285892, + "grad_norm": 23497.068359375, + "learning_rate": 3.6895163082488294e-05, + "loss": 0.4137, + "step": 135650 + }, + { + "epoch": 0.7001305328112021, + "grad_norm": 23415.0859375, + "learning_rate": 3.6859913471349906e-05, + "loss": 0.4088, + "step": 135700 + }, + { + "epoch": 0.7003885027938149, + "grad_norm": 24474.064453125, + "learning_rate": 3.682467087413525e-05, + "loss": 0.4122, + "step": 135750 + }, + { + "epoch": 0.7006464727764278, + "grad_norm": 24427.3359375, + "learning_rate": 3.678943530965615e-05, + "loss": 0.4133, + "step": 135800 + }, + { + "epoch": 0.7009044427590405, + "grad_norm": 24399.58203125, + "learning_rate": 3.675420679672068e-05, + "loss": 0.4113, + "step": 135850 + }, + { + "epoch": 0.7011624127416534, + "grad_norm": 22070.033203125, + "learning_rate": 3.671898535413313e-05, + "loss": 0.4099, + "step": 135900 + }, + { + "epoch": 0.7014203827242662, + "grad_norm": 21846.20703125, + "learning_rate": 3.668377100069404e-05, + "loss": 0.4164, + "step": 135950 + }, + { + "epoch": 0.7016783527068791, + "grad_norm": 21927.2265625, + "learning_rate": 3.664856375520012e-05, + "loss": 0.4124, + "step": 136000 + }, + { + "epoch": 0.7019363226894918, + "grad_norm": 22155.341796875, + "learning_rate": 3.6613363636444344e-05, + "loss": 0.416, + "step": 136050 + }, + { + "epoch": 0.7021942926721046, + "grad_norm": 23344.486328125, + "learning_rate": 3.6578170663215826e-05, + "loss": 0.4162, + "step": 136100 + }, + { + "epoch": 0.7024522626547175, + "grad_norm": 23390.642578125, + "learning_rate": 3.6542984854299936e-05, + "loss": 0.4082, + "step": 136150 + }, + { + "epoch": 0.7027102326373303, + "grad_norm": 22980.90625, + "learning_rate": 3.6507806228478125e-05, + "loss": 0.4067, + "step": 136200 + }, + { + "epoch": 0.7029682026199432, + "grad_norm": 22321.662109375, + "learning_rate": 3.6472634804528095e-05, + "loss": 0.4129, + "step": 136250 + }, + { + "epoch": 0.7032261726025559, + "grad_norm": 22719.455078125, + "learning_rate": 3.643747060122366e-05, + "loss": 0.4169, + "step": 136300 + }, + { + "epoch": 0.7034841425851688, + "grad_norm": 25283.494140625, + "learning_rate": 3.640231363733481e-05, + "loss": 0.4081, + "step": 136350 + }, + { + "epoch": 0.7037421125677816, + "grad_norm": 24430.919921875, + "learning_rate": 3.636716393162764e-05, + "loss": 0.4187, + "step": 136400 + }, + { + "epoch": 0.7040000825503945, + "grad_norm": 23372.662109375, + "learning_rate": 3.633202150286435e-05, + "loss": 0.4117, + "step": 136450 + }, + { + "epoch": 0.7042580525330072, + "grad_norm": 23912.595703125, + "learning_rate": 3.6296886369803346e-05, + "loss": 0.4126, + "step": 136500 + }, + { + "epoch": 0.7045160225156201, + "grad_norm": 24092.0390625, + "learning_rate": 3.626175855119903e-05, + "loss": 0.4163, + "step": 136550 + }, + { + "epoch": 0.7047739924982329, + "grad_norm": 23452.2421875, + "learning_rate": 3.6226638065802e-05, + "loss": 0.4088, + "step": 136600 + }, + { + "epoch": 0.7050319624808458, + "grad_norm": 24399.787109375, + "learning_rate": 3.6191524932358845e-05, + "loss": 0.4139, + "step": 136650 + }, + { + "epoch": 0.7052899324634585, + "grad_norm": 23295.599609375, + "learning_rate": 3.6156419169612287e-05, + "loss": 0.4112, + "step": 136700 + }, + { + "epoch": 0.7055479024460714, + "grad_norm": 25809.876953125, + "learning_rate": 3.6121320796301126e-05, + "loss": 0.4141, + "step": 136750 + }, + { + "epoch": 0.7058058724286842, + "grad_norm": 21679.818359375, + "learning_rate": 3.608622983116018e-05, + "loss": 0.4183, + "step": 136800 + }, + { + "epoch": 0.706063842411297, + "grad_norm": 24492.578125, + "learning_rate": 3.6051146292920334e-05, + "loss": 0.4103, + "step": 136850 + }, + { + "epoch": 0.7063218123939099, + "grad_norm": 24805.59375, + "learning_rate": 3.601607020030847e-05, + "loss": 0.4129, + "step": 136900 + }, + { + "epoch": 0.7065797823765226, + "grad_norm": 23000.9765625, + "learning_rate": 3.5981001572047566e-05, + "loss": 0.4091, + "step": 136950 + }, + { + "epoch": 0.7068377523591355, + "grad_norm": 24590.6875, + "learning_rate": 3.594594042685655e-05, + "loss": 0.4061, + "step": 137000 + }, + { + "epoch": 0.7070957223417483, + "grad_norm": 22223.16015625, + "learning_rate": 3.5910886783450416e-05, + "loss": 0.4174, + "step": 137050 + }, + { + "epoch": 0.7073536923243612, + "grad_norm": 28207.7578125, + "learning_rate": 3.587584066054007e-05, + "loss": 0.4119, + "step": 137100 + }, + { + "epoch": 0.7076116623069739, + "grad_norm": 23703.271484375, + "learning_rate": 3.584080207683249e-05, + "loss": 0.4104, + "step": 137150 + }, + { + "epoch": 0.7078696322895868, + "grad_norm": 24903.92578125, + "learning_rate": 3.580577105103059e-05, + "loss": 0.4139, + "step": 137200 + }, + { + "epoch": 0.7081276022721996, + "grad_norm": 21130.029296875, + "learning_rate": 3.5770747601833235e-05, + "loss": 0.4208, + "step": 137250 + }, + { + "epoch": 0.7083855722548125, + "grad_norm": 22223.611328125, + "learning_rate": 3.5735731747935306e-05, + "loss": 0.4118, + "step": 137300 + }, + { + "epoch": 0.7086435422374252, + "grad_norm": 21862.12109375, + "learning_rate": 3.570072350802753e-05, + "loss": 0.4101, + "step": 137350 + }, + { + "epoch": 0.708901512220038, + "grad_norm": 22504.25390625, + "learning_rate": 3.566572290079667e-05, + "loss": 0.4187, + "step": 137400 + }, + { + "epoch": 0.7091594822026509, + "grad_norm": 21898.53125, + "learning_rate": 3.563072994492535e-05, + "loss": 0.4068, + "step": 137450 + }, + { + "epoch": 0.7094174521852638, + "grad_norm": 21629.5859375, + "learning_rate": 3.559574465909215e-05, + "loss": 0.4107, + "step": 137500 + }, + { + "epoch": 0.7096754221678765, + "grad_norm": 23078.080078125, + "learning_rate": 3.5560767061971515e-05, + "loss": 0.4093, + "step": 137550 + }, + { + "epoch": 0.7099333921504893, + "grad_norm": 21831.11328125, + "learning_rate": 3.5525797172233826e-05, + "loss": 0.4083, + "step": 137600 + }, + { + "epoch": 0.7101913621331022, + "grad_norm": 20934.220703125, + "learning_rate": 3.5490835008545334e-05, + "loss": 0.4143, + "step": 137650 + }, + { + "epoch": 0.710449332115715, + "grad_norm": 21335.0, + "learning_rate": 3.545588058956816e-05, + "loss": 0.4104, + "step": 137700 + }, + { + "epoch": 0.7107073020983279, + "grad_norm": 20424.279296875, + "learning_rate": 3.542093393396031e-05, + "loss": 0.4117, + "step": 137750 + }, + { + "epoch": 0.7109652720809406, + "grad_norm": 24527.76171875, + "learning_rate": 3.5385995060375596e-05, + "loss": 0.4128, + "step": 137800 + }, + { + "epoch": 0.7112232420635535, + "grad_norm": 23370.17578125, + "learning_rate": 3.535106398746376e-05, + "loss": 0.4149, + "step": 137850 + }, + { + "epoch": 0.7114812120461663, + "grad_norm": 22996.2890625, + "learning_rate": 3.531614073387028e-05, + "loss": 0.412, + "step": 137900 + }, + { + "epoch": 0.7117391820287792, + "grad_norm": 26592.931640625, + "learning_rate": 3.528122531823657e-05, + "loss": 0.4111, + "step": 137950 + }, + { + "epoch": 0.7119971520113919, + "grad_norm": 22353.35546875, + "learning_rate": 3.5246317759199745e-05, + "loss": 0.412, + "step": 138000 + }, + { + "epoch": 0.7122551219940048, + "grad_norm": 22266.91796875, + "learning_rate": 3.521141807539281e-05, + "loss": 0.4113, + "step": 138050 + }, + { + "epoch": 0.7125130919766176, + "grad_norm": 21723.318359375, + "learning_rate": 3.517652628544457e-05, + "loss": 0.4058, + "step": 138100 + }, + { + "epoch": 0.7127710619592305, + "grad_norm": 23738.322265625, + "learning_rate": 3.5141642407979535e-05, + "loss": 0.4072, + "step": 138150 + }, + { + "epoch": 0.7130290319418432, + "grad_norm": 25993.587890625, + "learning_rate": 3.5106766461618083e-05, + "loss": 0.4066, + "step": 138200 + }, + { + "epoch": 0.713287001924456, + "grad_norm": 23321.55859375, + "learning_rate": 3.50718984649763e-05, + "loss": 0.4104, + "step": 138250 + }, + { + "epoch": 0.7135449719070689, + "grad_norm": 22022.267578125, + "learning_rate": 3.503703843666605e-05, + "loss": 0.4096, + "step": 138300 + }, + { + "epoch": 0.7138029418896817, + "grad_norm": 22249.640625, + "learning_rate": 3.500218639529493e-05, + "loss": 0.4121, + "step": 138350 + }, + { + "epoch": 0.7140609118722946, + "grad_norm": 21145.283203125, + "learning_rate": 3.496734235946632e-05, + "loss": 0.4126, + "step": 138400 + }, + { + "epoch": 0.7143188818549073, + "grad_norm": 22439.38671875, + "learning_rate": 3.493250634777924e-05, + "loss": 0.4076, + "step": 138450 + }, + { + "epoch": 0.7145768518375202, + "grad_norm": 25641.93359375, + "learning_rate": 3.4897678378828516e-05, + "loss": 0.4105, + "step": 138500 + }, + { + "epoch": 0.714834821820133, + "grad_norm": 22200.46875, + "learning_rate": 3.486285847120465e-05, + "loss": 0.4097, + "step": 138550 + }, + { + "epoch": 0.7150927918027459, + "grad_norm": 22691.666015625, + "learning_rate": 3.482804664349381e-05, + "loss": 0.4154, + "step": 138600 + }, + { + "epoch": 0.7153507617853586, + "grad_norm": 22139.16796875, + "learning_rate": 3.479324291427788e-05, + "loss": 0.4124, + "step": 138650 + }, + { + "epoch": 0.7156087317679715, + "grad_norm": 23695.7578125, + "learning_rate": 3.4758447302134414e-05, + "loss": 0.4174, + "step": 138700 + }, + { + "epoch": 0.7158667017505843, + "grad_norm": 24720.06640625, + "learning_rate": 3.472365982563666e-05, + "loss": 0.4095, + "step": 138750 + }, + { + "epoch": 0.7161246717331972, + "grad_norm": 22861.171875, + "learning_rate": 3.4688880503353474e-05, + "loss": 0.4039, + "step": 138800 + }, + { + "epoch": 0.7163826417158099, + "grad_norm": 22751.833984375, + "learning_rate": 3.465410935384939e-05, + "loss": 0.4175, + "step": 138850 + }, + { + "epoch": 0.7166406116984227, + "grad_norm": 22689.5, + "learning_rate": 3.461934639568457e-05, + "loss": 0.4133, + "step": 138900 + }, + { + "epoch": 0.7168985816810356, + "grad_norm": 23292.1328125, + "learning_rate": 3.458459164741482e-05, + "loss": 0.4062, + "step": 138950 + }, + { + "epoch": 0.7171565516636484, + "grad_norm": 22390.515625, + "learning_rate": 3.4549845127591563e-05, + "loss": 0.4169, + "step": 139000 + }, + { + "epoch": 0.7174145216462613, + "grad_norm": 23531.9921875, + "learning_rate": 3.451510685476178e-05, + "loss": 0.4084, + "step": 139050 + }, + { + "epoch": 0.717672491628874, + "grad_norm": 23847.154296875, + "learning_rate": 3.448037684746812e-05, + "loss": 0.4134, + "step": 139100 + }, + { + "epoch": 0.7179304616114869, + "grad_norm": 22651.15234375, + "learning_rate": 3.4445655124248774e-05, + "loss": 0.4118, + "step": 139150 + }, + { + "epoch": 0.7181884315940997, + "grad_norm": 21893.123046875, + "learning_rate": 3.441094170363755e-05, + "loss": 0.4065, + "step": 139200 + }, + { + "epoch": 0.7184464015767126, + "grad_norm": 22238.685546875, + "learning_rate": 3.4376236604163756e-05, + "loss": 0.4164, + "step": 139250 + }, + { + "epoch": 0.7187043715593253, + "grad_norm": 25605.083984375, + "learning_rate": 3.434153984435234e-05, + "loss": 0.4105, + "step": 139300 + }, + { + "epoch": 0.7189623415419382, + "grad_norm": 22414.0703125, + "learning_rate": 3.430685144272374e-05, + "loss": 0.4095, + "step": 139350 + }, + { + "epoch": 0.719220311524551, + "grad_norm": 22067.443359375, + "learning_rate": 3.4272171417793954e-05, + "loss": 0.4105, + "step": 139400 + }, + { + "epoch": 0.7194782815071639, + "grad_norm": 22398.36328125, + "learning_rate": 3.423749978807454e-05, + "loss": 0.4065, + "step": 139450 + }, + { + "epoch": 0.7197362514897766, + "grad_norm": 25660.017578125, + "learning_rate": 3.420283657207248e-05, + "loss": 0.4139, + "step": 139500 + }, + { + "epoch": 0.7199942214723895, + "grad_norm": 27245.4609375, + "learning_rate": 3.416818178829039e-05, + "loss": 0.4106, + "step": 139550 + }, + { + "epoch": 0.7202521914550023, + "grad_norm": 22430.6484375, + "learning_rate": 3.413353545522628e-05, + "loss": 0.4103, + "step": 139600 + }, + { + "epoch": 0.7205101614376151, + "grad_norm": 25269.876953125, + "learning_rate": 3.409889759137373e-05, + "loss": 0.4073, + "step": 139650 + }, + { + "epoch": 0.720768131420228, + "grad_norm": 22811.275390625, + "learning_rate": 3.406426821522172e-05, + "loss": 0.4156, + "step": 139700 + }, + { + "epoch": 0.7210261014028407, + "grad_norm": 21838.966796875, + "learning_rate": 3.402964734525477e-05, + "loss": 0.4132, + "step": 139750 + }, + { + "epoch": 0.7212840713854536, + "grad_norm": 22130.935546875, + "learning_rate": 3.39950349999528e-05, + "loss": 0.418, + "step": 139800 + }, + { + "epoch": 0.7215420413680664, + "grad_norm": 22744.779296875, + "learning_rate": 3.396043119779123e-05, + "loss": 0.4098, + "step": 139850 + }, + { + "epoch": 0.7218000113506793, + "grad_norm": 22559.07421875, + "learning_rate": 3.392583595724093e-05, + "loss": 0.4159, + "step": 139900 + }, + { + "epoch": 0.722057981333292, + "grad_norm": 20920.349609375, + "learning_rate": 3.3891249296768116e-05, + "loss": 0.406, + "step": 139950 + }, + { + "epoch": 0.7223159513159049, + "grad_norm": 20708.716796875, + "learning_rate": 3.38566712348345e-05, + "loss": 0.4102, + "step": 140000 + }, + { + "epoch": 0.7223159513159049, + "eval_loss": 0.39852654933929443, + "eval_runtime": 3128.1309, + "eval_samples_per_second": 991.365, + "eval_steps_per_second": 1.936, + "step": 140000 + }, + { + "epoch": 0.7225739212985177, + "grad_norm": 24440.734375, + "learning_rate": 3.382210178989718e-05, + "loss": 0.4144, + "step": 140050 + }, + { + "epoch": 0.7228318912811306, + "grad_norm": 22715.88671875, + "learning_rate": 3.378754098040867e-05, + "loss": 0.4146, + "step": 140100 + }, + { + "epoch": 0.7230898612637433, + "grad_norm": 23713.474609375, + "learning_rate": 3.375298882481683e-05, + "loss": 0.4089, + "step": 140150 + }, + { + "epoch": 0.7233478312463562, + "grad_norm": 24705.048828125, + "learning_rate": 3.371844534156497e-05, + "loss": 0.4052, + "step": 140200 + }, + { + "epoch": 0.723605801228969, + "grad_norm": 22624.98046875, + "learning_rate": 3.368391054909169e-05, + "loss": 0.4155, + "step": 140250 + }, + { + "epoch": 0.7238637712115819, + "grad_norm": 24774.72265625, + "learning_rate": 3.364938446583103e-05, + "loss": 0.4058, + "step": 140300 + }, + { + "epoch": 0.7241217411941946, + "grad_norm": 24109.02734375, + "learning_rate": 3.361486711021235e-05, + "loss": 0.4169, + "step": 140350 + }, + { + "epoch": 0.7243797111768074, + "grad_norm": 20315.724609375, + "learning_rate": 3.3580358500660284e-05, + "loss": 0.4135, + "step": 140400 + }, + { + "epoch": 0.7246376811594203, + "grad_norm": 26642.84765625, + "learning_rate": 3.3545858655594935e-05, + "loss": 0.4182, + "step": 140450 + }, + { + "epoch": 0.7248956511420331, + "grad_norm": 23466.93359375, + "learning_rate": 3.351136759343161e-05, + "loss": 0.4098, + "step": 140500 + }, + { + "epoch": 0.725153621124646, + "grad_norm": 25247.11328125, + "learning_rate": 3.3476885332580985e-05, + "loss": 0.4085, + "step": 140550 + }, + { + "epoch": 0.7254115911072587, + "grad_norm": 25220.11328125, + "learning_rate": 3.3442411891449e-05, + "loss": 0.4139, + "step": 140600 + }, + { + "epoch": 0.7256695610898716, + "grad_norm": 21836.095703125, + "learning_rate": 3.3407947288436936e-05, + "loss": 0.4127, + "step": 140650 + }, + { + "epoch": 0.7259275310724844, + "grad_norm": 22301.443359375, + "learning_rate": 3.3373491541941346e-05, + "loss": 0.4127, + "step": 140700 + }, + { + "epoch": 0.7261855010550973, + "grad_norm": 21902.615234375, + "learning_rate": 3.333904467035399e-05, + "loss": 0.4111, + "step": 140750 + }, + { + "epoch": 0.72644347103771, + "grad_norm": 21408.71484375, + "learning_rate": 3.3304606692061984e-05, + "loss": 0.4095, + "step": 140800 + }, + { + "epoch": 0.7267014410203229, + "grad_norm": 26146.03515625, + "learning_rate": 3.3270177625447626e-05, + "loss": 0.4096, + "step": 140850 + }, + { + "epoch": 0.7269594110029357, + "grad_norm": 22772.9921875, + "learning_rate": 3.323575748888852e-05, + "loss": 0.4109, + "step": 140900 + }, + { + "epoch": 0.7272173809855486, + "grad_norm": 24654.810546875, + "learning_rate": 3.320134630075742e-05, + "loss": 0.4135, + "step": 140950 + }, + { + "epoch": 0.7274753509681613, + "grad_norm": 23458.103515625, + "learning_rate": 3.31669440794224e-05, + "loss": 0.4128, + "step": 141000 + }, + { + "epoch": 0.7277333209507741, + "grad_norm": 22455.630859375, + "learning_rate": 3.3132550843246654e-05, + "loss": 0.411, + "step": 141050 + }, + { + "epoch": 0.727991290933387, + "grad_norm": 22372.08203125, + "learning_rate": 3.3098166610588655e-05, + "loss": 0.413, + "step": 141100 + }, + { + "epoch": 0.7282492609159998, + "grad_norm": 22878.216796875, + "learning_rate": 3.306379139980206e-05, + "loss": 0.4054, + "step": 141150 + }, + { + "epoch": 0.7285072308986127, + "grad_norm": 22959.708984375, + "learning_rate": 3.302942522923563e-05, + "loss": 0.4114, + "step": 141200 + }, + { + "epoch": 0.7287652008812254, + "grad_norm": 22574.986328125, + "learning_rate": 3.2995068117233417e-05, + "loss": 0.4105, + "step": 141250 + }, + { + "epoch": 0.7290231708638383, + "grad_norm": 23770.279296875, + "learning_rate": 3.2960720082134555e-05, + "loss": 0.4091, + "step": 141300 + }, + { + "epoch": 0.7292811408464511, + "grad_norm": 23017.416015625, + "learning_rate": 3.292638114227338e-05, + "loss": 0.411, + "step": 141350 + }, + { + "epoch": 0.729539110829064, + "grad_norm": 23605.982421875, + "learning_rate": 3.289205131597932e-05, + "loss": 0.4097, + "step": 141400 + }, + { + "epoch": 0.7297970808116767, + "grad_norm": 22409.12890625, + "learning_rate": 3.2857730621577006e-05, + "loss": 0.4096, + "step": 141450 + }, + { + "epoch": 0.7300550507942896, + "grad_norm": 22681.11328125, + "learning_rate": 3.282341907738613e-05, + "loss": 0.4066, + "step": 141500 + }, + { + "epoch": 0.7303130207769024, + "grad_norm": 27188.859375, + "learning_rate": 3.278911670172154e-05, + "loss": 0.4104, + "step": 141550 + }, + { + "epoch": 0.7305709907595153, + "grad_norm": 25134.85546875, + "learning_rate": 3.2754823512893225e-05, + "loss": 0.4105, + "step": 141600 + }, + { + "epoch": 0.730828960742128, + "grad_norm": 21408.478515625, + "learning_rate": 3.2720539529206154e-05, + "loss": 0.412, + "step": 141650 + }, + { + "epoch": 0.7310869307247408, + "grad_norm": 21062.59375, + "learning_rate": 3.26862647689605e-05, + "loss": 0.411, + "step": 141700 + }, + { + "epoch": 0.7313449007073537, + "grad_norm": 21591.23828125, + "learning_rate": 3.265199925045143e-05, + "loss": 0.4171, + "step": 141750 + }, + { + "epoch": 0.7316028706899665, + "grad_norm": 23328.751953125, + "learning_rate": 3.261774299196926e-05, + "loss": 0.4127, + "step": 141800 + }, + { + "epoch": 0.7318608406725794, + "grad_norm": 27247.59375, + "learning_rate": 3.258349601179928e-05, + "loss": 0.4087, + "step": 141850 + }, + { + "epoch": 0.7321188106551921, + "grad_norm": 24500.822265625, + "learning_rate": 3.254925832822188e-05, + "loss": 0.4015, + "step": 141900 + }, + { + "epoch": 0.732376780637805, + "grad_norm": 25855.849609375, + "learning_rate": 3.251502995951247e-05, + "loss": 0.4125, + "step": 141950 + }, + { + "epoch": 0.7326347506204178, + "grad_norm": 23075.234375, + "learning_rate": 3.248081092394148e-05, + "loss": 0.4112, + "step": 142000 + }, + { + "epoch": 0.7328927206030307, + "grad_norm": 25166.712890625, + "learning_rate": 3.2446601239774405e-05, + "loss": 0.4121, + "step": 142050 + }, + { + "epoch": 0.7331506905856434, + "grad_norm": 23327.337890625, + "learning_rate": 3.241240092527167e-05, + "loss": 0.41, + "step": 142100 + }, + { + "epoch": 0.7334086605682563, + "grad_norm": 34138.34375, + "learning_rate": 3.237820999868876e-05, + "loss": 0.413, + "step": 142150 + }, + { + "epoch": 0.7336666305508691, + "grad_norm": 23031.2109375, + "learning_rate": 3.234402847827612e-05, + "loss": 0.414, + "step": 142200 + }, + { + "epoch": 0.733924600533482, + "grad_norm": 23237.44921875, + "learning_rate": 3.230985638227921e-05, + "loss": 0.4159, + "step": 142250 + }, + { + "epoch": 0.7341825705160947, + "grad_norm": 21437.705078125, + "learning_rate": 3.2275693728938395e-05, + "loss": 0.4078, + "step": 142300 + }, + { + "epoch": 0.7344405404987076, + "grad_norm": 23815.9140625, + "learning_rate": 3.224154053648906e-05, + "loss": 0.4135, + "step": 142350 + }, + { + "epoch": 0.7346985104813204, + "grad_norm": 26809.724609375, + "learning_rate": 3.2207396823161514e-05, + "loss": 0.409, + "step": 142400 + }, + { + "epoch": 0.7349564804639332, + "grad_norm": 21905.6484375, + "learning_rate": 3.2173262607181e-05, + "loss": 0.41, + "step": 142450 + }, + { + "epoch": 0.735214450446546, + "grad_norm": 23628.076171875, + "learning_rate": 3.2139137906767743e-05, + "loss": 0.4175, + "step": 142500 + }, + { + "epoch": 0.7354724204291588, + "grad_norm": 24156.837890625, + "learning_rate": 3.210502274013679e-05, + "loss": 0.4114, + "step": 142550 + }, + { + "epoch": 0.7357303904117717, + "grad_norm": 21517.404296875, + "learning_rate": 3.207091712549819e-05, + "loss": 0.4112, + "step": 142600 + }, + { + "epoch": 0.7359883603943845, + "grad_norm": 22684.734375, + "learning_rate": 3.203682108105685e-05, + "loss": 0.417, + "step": 142650 + }, + { + "epoch": 0.7362463303769974, + "grad_norm": 22205.361328125, + "learning_rate": 3.2002734625012585e-05, + "loss": 0.4111, + "step": 142700 + }, + { + "epoch": 0.7365043003596101, + "grad_norm": 21131.41796875, + "learning_rate": 3.196865777556008e-05, + "loss": 0.4114, + "step": 142750 + }, + { + "epoch": 0.736762270342223, + "grad_norm": 23506.66796875, + "learning_rate": 3.1934590550888894e-05, + "loss": 0.4183, + "step": 142800 + }, + { + "epoch": 0.7370202403248358, + "grad_norm": 24435.4609375, + "learning_rate": 3.190053296918345e-05, + "loss": 0.4166, + "step": 142850 + }, + { + "epoch": 0.7372782103074487, + "grad_norm": 22610.4296875, + "learning_rate": 3.186648504862303e-05, + "loss": 0.4109, + "step": 142900 + }, + { + "epoch": 0.7375361802900614, + "grad_norm": 24722.974609375, + "learning_rate": 3.183244680738178e-05, + "loss": 0.4093, + "step": 142950 + }, + { + "epoch": 0.7377941502726743, + "grad_norm": 23927.6640625, + "learning_rate": 3.1798418263628596e-05, + "loss": 0.4106, + "step": 143000 + }, + { + "epoch": 0.7380521202552871, + "grad_norm": 23958.216796875, + "learning_rate": 3.176439943552732e-05, + "loss": 0.4067, + "step": 143050 + }, + { + "epoch": 0.7383100902379, + "grad_norm": 23272.818359375, + "learning_rate": 3.1730390341236496e-05, + "loss": 0.4086, + "step": 143100 + }, + { + "epoch": 0.7385680602205127, + "grad_norm": 20998.751953125, + "learning_rate": 3.1696390998909556e-05, + "loss": 0.4099, + "step": 143150 + }, + { + "epoch": 0.7388260302031255, + "grad_norm": 24493.677734375, + "learning_rate": 3.166240142669464e-05, + "loss": 0.413, + "step": 143200 + }, + { + "epoch": 0.7390840001857384, + "grad_norm": 22639.927734375, + "learning_rate": 3.162842164273479e-05, + "loss": 0.4105, + "step": 143250 + }, + { + "epoch": 0.7393419701683512, + "grad_norm": 24407.029296875, + "learning_rate": 3.15944516651677e-05, + "loss": 0.4188, + "step": 143300 + }, + { + "epoch": 0.7395999401509641, + "grad_norm": 26538.68359375, + "learning_rate": 3.156049151212591e-05, + "loss": 0.4135, + "step": 143350 + }, + { + "epoch": 0.7398579101335768, + "grad_norm": 24519.060546875, + "learning_rate": 3.1526541201736695e-05, + "loss": 0.4141, + "step": 143400 + }, + { + "epoch": 0.7401158801161897, + "grad_norm": 21236.681640625, + "learning_rate": 3.149260075212206e-05, + "loss": 0.4096, + "step": 143450 + }, + { + "epoch": 0.7403738500988025, + "grad_norm": 24463.015625, + "learning_rate": 3.1458670181398796e-05, + "loss": 0.4035, + "step": 143500 + }, + { + "epoch": 0.7406318200814154, + "grad_norm": 26984.408203125, + "learning_rate": 3.1424749507678336e-05, + "loss": 0.4115, + "step": 143550 + }, + { + "epoch": 0.7408897900640281, + "grad_norm": 25619.35546875, + "learning_rate": 3.139083874906691e-05, + "loss": 0.4131, + "step": 143600 + }, + { + "epoch": 0.741147760046641, + "grad_norm": 24277.7890625, + "learning_rate": 3.13569379236654e-05, + "loss": 0.3994, + "step": 143650 + }, + { + "epoch": 0.7414057300292538, + "grad_norm": 24007.654296875, + "learning_rate": 3.1323047049569446e-05, + "loss": 0.4091, + "step": 143700 + }, + { + "epoch": 0.7416637000118667, + "grad_norm": 21688.703125, + "learning_rate": 3.12891661448693e-05, + "loss": 0.4176, + "step": 143750 + }, + { + "epoch": 0.7419216699944794, + "grad_norm": 22735.900390625, + "learning_rate": 3.125529522764995e-05, + "loss": 0.4091, + "step": 143800 + }, + { + "epoch": 0.7421796399770922, + "grad_norm": 23359.259765625, + "learning_rate": 3.122143431599105e-05, + "loss": 0.4125, + "step": 143850 + }, + { + "epoch": 0.7424376099597051, + "grad_norm": 26637.263671875, + "learning_rate": 3.118758342796687e-05, + "loss": 0.4137, + "step": 143900 + }, + { + "epoch": 0.7426955799423179, + "grad_norm": 24977.3984375, + "learning_rate": 3.1153742581646406e-05, + "loss": 0.4094, + "step": 143950 + }, + { + "epoch": 0.7429535499249308, + "grad_norm": 25850.91796875, + "learning_rate": 3.111991179509318e-05, + "loss": 0.4092, + "step": 144000 + }, + { + "epoch": 0.7432115199075435, + "grad_norm": 22823.0625, + "learning_rate": 3.1086091086365474e-05, + "loss": 0.4111, + "step": 144050 + }, + { + "epoch": 0.7434694898901564, + "grad_norm": 24187.640625, + "learning_rate": 3.1052280473516076e-05, + "loss": 0.414, + "step": 144100 + }, + { + "epoch": 0.7437274598727692, + "grad_norm": 21726.537109375, + "learning_rate": 3.101847997459249e-05, + "loss": 0.4098, + "step": 144150 + }, + { + "epoch": 0.7439854298553821, + "grad_norm": 23075.27734375, + "learning_rate": 3.098468960763671e-05, + "loss": 0.4084, + "step": 144200 + }, + { + "epoch": 0.7442433998379948, + "grad_norm": 24207.529296875, + "learning_rate": 3.095090939068541e-05, + "loss": 0.4156, + "step": 144250 + }, + { + "epoch": 0.7445013698206077, + "grad_norm": 25209.39453125, + "learning_rate": 3.091713934176982e-05, + "loss": 0.4122, + "step": 144300 + }, + { + "epoch": 0.7447593398032205, + "grad_norm": 24308.361328125, + "learning_rate": 3.088337947891573e-05, + "loss": 0.408, + "step": 144350 + }, + { + "epoch": 0.7450173097858334, + "grad_norm": 22416.990234375, + "learning_rate": 3.0849629820143517e-05, + "loss": 0.4136, + "step": 144400 + }, + { + "epoch": 0.7452752797684461, + "grad_norm": 24417.943359375, + "learning_rate": 3.081589038346806e-05, + "loss": 0.4079, + "step": 144450 + }, + { + "epoch": 0.745533249751059, + "grad_norm": 21519.650390625, + "learning_rate": 3.078216118689885e-05, + "loss": 0.4073, + "step": 144500 + }, + { + "epoch": 0.7457912197336718, + "grad_norm": 22821.1796875, + "learning_rate": 3.074844224843986e-05, + "loss": 0.4058, + "step": 144550 + }, + { + "epoch": 0.7460491897162846, + "grad_norm": 22559.86328125, + "learning_rate": 3.071473358608963e-05, + "loss": 0.413, + "step": 144600 + }, + { + "epoch": 0.7463071596988974, + "grad_norm": 25803.658203125, + "learning_rate": 3.068103521784115e-05, + "loss": 0.4077, + "step": 144650 + }, + { + "epoch": 0.7465651296815102, + "grad_norm": 27601.787109375, + "learning_rate": 3.0647347161681983e-05, + "loss": 0.4057, + "step": 144700 + }, + { + "epoch": 0.7468230996641231, + "grad_norm": 21363.67578125, + "learning_rate": 3.061366943559417e-05, + "loss": 0.4082, + "step": 144750 + }, + { + "epoch": 0.7470810696467359, + "grad_norm": 24007.3046875, + "learning_rate": 3.058000205755421e-05, + "loss": 0.4121, + "step": 144800 + }, + { + "epoch": 0.7473390396293488, + "grad_norm": 29940.8125, + "learning_rate": 3.054634504553312e-05, + "loss": 0.4046, + "step": 144850 + }, + { + "epoch": 0.7475970096119615, + "grad_norm": 24161.90234375, + "learning_rate": 3.0512698417496334e-05, + "loss": 0.4108, + "step": 144900 + }, + { + "epoch": 0.7478549795945744, + "grad_norm": 22984.619140625, + "learning_rate": 3.0479062191403785e-05, + "loss": 0.4158, + "step": 144950 + }, + { + "epoch": 0.7481129495771872, + "grad_norm": 26418.95703125, + "learning_rate": 3.0445436385209836e-05, + "loss": 0.4059, + "step": 145000 + }, + { + "epoch": 0.7481129495771872, + "eval_loss": 0.3971329629421234, + "eval_runtime": 3201.285, + "eval_samples_per_second": 968.711, + "eval_steps_per_second": 1.892, + "step": 145000 + }, + { + "epoch": 0.7483709195598001, + "grad_norm": 22503.525390625, + "learning_rate": 3.0411821016863308e-05, + "loss": 0.4048, + "step": 145050 + }, + { + "epoch": 0.7486288895424128, + "grad_norm": 23114.525390625, + "learning_rate": 3.03782161043074e-05, + "loss": 0.4111, + "step": 145100 + }, + { + "epoch": 0.7488868595250256, + "grad_norm": 23249.794921875, + "learning_rate": 3.0344621665479778e-05, + "loss": 0.4093, + "step": 145150 + }, + { + "epoch": 0.7491448295076385, + "grad_norm": 23568.833984375, + "learning_rate": 3.0311037718312518e-05, + "loss": 0.4166, + "step": 145200 + }, + { + "epoch": 0.7494027994902513, + "grad_norm": 21794.6015625, + "learning_rate": 3.027746428073206e-05, + "loss": 0.4122, + "step": 145250 + }, + { + "epoch": 0.7496607694728641, + "grad_norm": 23710.212890625, + "learning_rate": 3.024390137065929e-05, + "loss": 0.4066, + "step": 145300 + }, + { + "epoch": 0.7499187394554769, + "grad_norm": 23179.240234375, + "learning_rate": 3.0210349006009385e-05, + "loss": 0.4127, + "step": 145350 + }, + { + "epoch": 0.7501767094380898, + "grad_norm": 25111.078125, + "learning_rate": 3.017680720469199e-05, + "loss": 0.4128, + "step": 145400 + }, + { + "epoch": 0.7504346794207026, + "grad_norm": 24289.095703125, + "learning_rate": 3.0143275984611042e-05, + "loss": 0.4167, + "step": 145450 + }, + { + "epoch": 0.7506926494033155, + "grad_norm": 22695.802734375, + "learning_rate": 3.0109755363664893e-05, + "loss": 0.4135, + "step": 145500 + }, + { + "epoch": 0.7509506193859282, + "grad_norm": 26995.833984375, + "learning_rate": 3.0076245359746163e-05, + "loss": 0.4057, + "step": 145550 + }, + { + "epoch": 0.7512085893685411, + "grad_norm": 21887.568359375, + "learning_rate": 3.004274599074185e-05, + "loss": 0.4089, + "step": 145600 + }, + { + "epoch": 0.7514665593511539, + "grad_norm": 27344.78125, + "learning_rate": 3.00092572745333e-05, + "loss": 0.4156, + "step": 145650 + }, + { + "epoch": 0.7517245293337668, + "grad_norm": 25476.15234375, + "learning_rate": 2.9975779228996104e-05, + "loss": 0.4113, + "step": 145700 + }, + { + "epoch": 0.7519824993163795, + "grad_norm": 24602.64453125, + "learning_rate": 2.9942311872000215e-05, + "loss": 0.4077, + "step": 145750 + }, + { + "epoch": 0.7522404692989924, + "grad_norm": 24124.984375, + "learning_rate": 2.990885522140985e-05, + "loss": 0.4122, + "step": 145800 + }, + { + "epoch": 0.7524984392816052, + "grad_norm": 24945.2109375, + "learning_rate": 2.987540929508354e-05, + "loss": 0.409, + "step": 145850 + }, + { + "epoch": 0.752756409264218, + "grad_norm": 26535.109375, + "learning_rate": 2.9841974110874037e-05, + "loss": 0.4132, + "step": 145900 + }, + { + "epoch": 0.7530143792468308, + "grad_norm": 21566.904296875, + "learning_rate": 2.980854968662843e-05, + "loss": 0.4073, + "step": 145950 + }, + { + "epoch": 0.7532723492294436, + "grad_norm": 22965.73828125, + "learning_rate": 2.9775136040188007e-05, + "loss": 0.4124, + "step": 146000 + }, + { + "epoch": 0.7535303192120565, + "grad_norm": 25307.123046875, + "learning_rate": 2.974173318938833e-05, + "loss": 0.4134, + "step": 146050 + }, + { + "epoch": 0.7537882891946693, + "grad_norm": 22280.431640625, + "learning_rate": 2.9708341152059226e-05, + "loss": 0.4085, + "step": 146100 + }, + { + "epoch": 0.7540462591772822, + "grad_norm": 25268.705078125, + "learning_rate": 2.9674959946024662e-05, + "loss": 0.4031, + "step": 146150 + }, + { + "epoch": 0.7543042291598949, + "grad_norm": 20014.28125, + "learning_rate": 2.9641589589102918e-05, + "loss": 0.4093, + "step": 146200 + }, + { + "epoch": 0.7545621991425078, + "grad_norm": 28430.544921875, + "learning_rate": 2.9608230099106427e-05, + "loss": 0.4112, + "step": 146250 + }, + { + "epoch": 0.7548201691251206, + "grad_norm": 21031.328125, + "learning_rate": 2.9574881493841867e-05, + "loss": 0.4084, + "step": 146300 + }, + { + "epoch": 0.7550781391077335, + "grad_norm": 24118.916015625, + "learning_rate": 2.9541543791110032e-05, + "loss": 0.4152, + "step": 146350 + }, + { + "epoch": 0.7553361090903462, + "grad_norm": 20663.740234375, + "learning_rate": 2.950821700870598e-05, + "loss": 0.409, + "step": 146400 + }, + { + "epoch": 0.7555940790729591, + "grad_norm": 23081.328125, + "learning_rate": 2.9474901164418877e-05, + "loss": 0.4089, + "step": 146450 + }, + { + "epoch": 0.7558520490555719, + "grad_norm": 24167.80859375, + "learning_rate": 2.9441596276032085e-05, + "loss": 0.4096, + "step": 146500 + }, + { + "epoch": 0.7561100190381848, + "grad_norm": 24959.595703125, + "learning_rate": 2.940830236132313e-05, + "loss": 0.4109, + "step": 146550 + }, + { + "epoch": 0.7563679890207975, + "grad_norm": 22731.36328125, + "learning_rate": 2.9375019438063622e-05, + "loss": 0.41, + "step": 146600 + }, + { + "epoch": 0.7566259590034103, + "grad_norm": 24127.41015625, + "learning_rate": 2.9341747524019368e-05, + "loss": 0.4078, + "step": 146650 + }, + { + "epoch": 0.7568839289860232, + "grad_norm": 27476.791015625, + "learning_rate": 2.9308486636950254e-05, + "loss": 0.4063, + "step": 146700 + }, + { + "epoch": 0.757141898968636, + "grad_norm": 24664.61328125, + "learning_rate": 2.9275236794610328e-05, + "loss": 0.4086, + "step": 146750 + }, + { + "epoch": 0.7573998689512488, + "grad_norm": 23883.0625, + "learning_rate": 2.9241998014747664e-05, + "loss": 0.4046, + "step": 146800 + }, + { + "epoch": 0.7576578389338616, + "grad_norm": 23431.509765625, + "learning_rate": 2.9208770315104515e-05, + "loss": 0.4054, + "step": 146850 + }, + { + "epoch": 0.7579158089164745, + "grad_norm": 25177.9453125, + "learning_rate": 2.9175553713417176e-05, + "loss": 0.4094, + "step": 146900 + }, + { + "epoch": 0.7581737788990873, + "grad_norm": 22862.201171875, + "learning_rate": 2.9142348227416e-05, + "loss": 0.4073, + "step": 146950 + }, + { + "epoch": 0.7584317488817002, + "grad_norm": 21731.240234375, + "learning_rate": 2.9109153874825478e-05, + "loss": 0.4081, + "step": 147000 + }, + { + "epoch": 0.7586897188643129, + "grad_norm": 24952.87109375, + "learning_rate": 2.9075970673364083e-05, + "loss": 0.4092, + "step": 147050 + }, + { + "epoch": 0.7589476888469258, + "grad_norm": 23138.029296875, + "learning_rate": 2.9042798640744385e-05, + "loss": 0.4051, + "step": 147100 + }, + { + "epoch": 0.7592056588295386, + "grad_norm": 21496.501953125, + "learning_rate": 2.900963779467295e-05, + "loss": 0.4096, + "step": 147150 + }, + { + "epoch": 0.7594636288121515, + "grad_norm": 22243.36328125, + "learning_rate": 2.8976488152850406e-05, + "loss": 0.3985, + "step": 147200 + }, + { + "epoch": 0.7597215987947642, + "grad_norm": 24515.029296875, + "learning_rate": 2.894334973297137e-05, + "loss": 0.4043, + "step": 147250 + }, + { + "epoch": 0.759979568777377, + "grad_norm": 23431.802734375, + "learning_rate": 2.8910222552724553e-05, + "loss": 0.4092, + "step": 147300 + }, + { + "epoch": 0.7602375387599899, + "grad_norm": 24192.44140625, + "learning_rate": 2.8877106629792515e-05, + "loss": 0.413, + "step": 147350 + }, + { + "epoch": 0.7604955087426027, + "grad_norm": 24239.015625, + "learning_rate": 2.884400198185196e-05, + "loss": 0.4064, + "step": 147400 + }, + { + "epoch": 0.7607534787252155, + "grad_norm": 22407.27734375, + "learning_rate": 2.881090862657348e-05, + "loss": 0.4086, + "step": 147450 + }, + { + "epoch": 0.7610114487078283, + "grad_norm": 24915.517578125, + "learning_rate": 2.877782658162166e-05, + "loss": 0.4067, + "step": 147500 + }, + { + "epoch": 0.7612694186904412, + "grad_norm": 23721.33984375, + "learning_rate": 2.8744755864655098e-05, + "loss": 0.4078, + "step": 147550 + }, + { + "epoch": 0.761527388673054, + "grad_norm": 23041.625, + "learning_rate": 2.8711696493326233e-05, + "loss": 0.4092, + "step": 147600 + }, + { + "epoch": 0.7617853586556669, + "grad_norm": 24021.81640625, + "learning_rate": 2.867864848528158e-05, + "loss": 0.4116, + "step": 147650 + }, + { + "epoch": 0.7620433286382796, + "grad_norm": 21309.7890625, + "learning_rate": 2.8645611858161502e-05, + "loss": 0.414, + "step": 147700 + }, + { + "epoch": 0.7623012986208925, + "grad_norm": 21959.544921875, + "learning_rate": 2.8612586629600307e-05, + "loss": 0.4113, + "step": 147750 + }, + { + "epoch": 0.7625592686035053, + "grad_norm": 22090.75, + "learning_rate": 2.857957281722623e-05, + "loss": 0.41, + "step": 147800 + }, + { + "epoch": 0.7628172385861182, + "grad_norm": 21273.6640625, + "learning_rate": 2.854657043866138e-05, + "loss": 0.4043, + "step": 147850 + }, + { + "epoch": 0.7630752085687309, + "grad_norm": 22781.33984375, + "learning_rate": 2.8513579511521825e-05, + "loss": 0.4009, + "step": 147900 + }, + { + "epoch": 0.7633331785513437, + "grad_norm": 24383.95703125, + "learning_rate": 2.8480600053417472e-05, + "loss": 0.4077, + "step": 147950 + }, + { + "epoch": 0.7635911485339566, + "grad_norm": 23988.673828125, + "learning_rate": 2.8447632081952104e-05, + "loss": 0.4048, + "step": 148000 + }, + { + "epoch": 0.7638491185165694, + "grad_norm": 24593.1484375, + "learning_rate": 2.8414675614723397e-05, + "loss": 0.4145, + "step": 148050 + }, + { + "epoch": 0.7641070884991822, + "grad_norm": 25818.216796875, + "learning_rate": 2.838173066932287e-05, + "loss": 0.408, + "step": 148100 + }, + { + "epoch": 0.764365058481795, + "grad_norm": 25780.1796875, + "learning_rate": 2.8348797263335886e-05, + "loss": 0.4109, + "step": 148150 + }, + { + "epoch": 0.7646230284644079, + "grad_norm": 22835.51171875, + "learning_rate": 2.8315875414341687e-05, + "loss": 0.4037, + "step": 148200 + }, + { + "epoch": 0.7648809984470207, + "grad_norm": 22711.501953125, + "learning_rate": 2.8282965139913296e-05, + "loss": 0.4117, + "step": 148250 + }, + { + "epoch": 0.7651389684296336, + "grad_norm": 22654.080078125, + "learning_rate": 2.825006645761758e-05, + "loss": 0.4094, + "step": 148300 + }, + { + "epoch": 0.7653969384122463, + "grad_norm": 23406.8671875, + "learning_rate": 2.821717938501526e-05, + "loss": 0.4096, + "step": 148350 + }, + { + "epoch": 0.7656549083948592, + "grad_norm": 23591.68359375, + "learning_rate": 2.8184303939660745e-05, + "loss": 0.4087, + "step": 148400 + }, + { + "epoch": 0.765912878377472, + "grad_norm": 21550.94140625, + "learning_rate": 2.815144013910237e-05, + "loss": 0.4046, + "step": 148450 + }, + { + "epoch": 0.7661708483600849, + "grad_norm": 23503.48046875, + "learning_rate": 2.8118588000882177e-05, + "loss": 0.4116, + "step": 148500 + }, + { + "epoch": 0.7664288183426976, + "grad_norm": 25247.244140625, + "learning_rate": 2.8085747542536e-05, + "loss": 0.4023, + "step": 148550 + }, + { + "epoch": 0.7666867883253105, + "grad_norm": 23665.91796875, + "learning_rate": 2.805291878159344e-05, + "loss": 0.4117, + "step": 148600 + }, + { + "epoch": 0.7669447583079233, + "grad_norm": 22785.59765625, + "learning_rate": 2.8020101735577837e-05, + "loss": 0.4084, + "step": 148650 + }, + { + "epoch": 0.7672027282905362, + "grad_norm": 20447.72265625, + "learning_rate": 2.7987296422006327e-05, + "loss": 0.4091, + "step": 148700 + }, + { + "epoch": 0.7674606982731489, + "grad_norm": 24965.869140625, + "learning_rate": 2.795450285838974e-05, + "loss": 0.4067, + "step": 148750 + }, + { + "epoch": 0.7677186682557617, + "grad_norm": 24323.09765625, + "learning_rate": 2.7921721062232637e-05, + "loss": 0.4037, + "step": 148800 + }, + { + "epoch": 0.7679766382383746, + "grad_norm": 23956.177734375, + "learning_rate": 2.7888951051033314e-05, + "loss": 0.4079, + "step": 148850 + }, + { + "epoch": 0.7682346082209874, + "grad_norm": 24222.4140625, + "learning_rate": 2.7856192842283756e-05, + "loss": 0.4112, + "step": 148900 + }, + { + "epoch": 0.7684925782036002, + "grad_norm": 24444.046875, + "learning_rate": 2.782344645346966e-05, + "loss": 0.4148, + "step": 148950 + }, + { + "epoch": 0.768750548186213, + "grad_norm": 23160.578125, + "learning_rate": 2.779071190207046e-05, + "loss": 0.4063, + "step": 149000 + }, + { + "epoch": 0.7690085181688259, + "grad_norm": 25806.732421875, + "learning_rate": 2.7757989205559142e-05, + "loss": 0.4112, + "step": 149050 + }, + { + "epoch": 0.7692664881514387, + "grad_norm": 21389.734375, + "learning_rate": 2.7725278381402524e-05, + "loss": 0.4104, + "step": 149100 + }, + { + "epoch": 0.7695244581340516, + "grad_norm": 23550.23828125, + "learning_rate": 2.769257944706098e-05, + "loss": 0.4121, + "step": 149150 + }, + { + "epoch": 0.7697824281166643, + "grad_norm": 21442.373046875, + "learning_rate": 2.765989241998854e-05, + "loss": 0.4087, + "step": 149200 + }, + { + "epoch": 0.7700403980992772, + "grad_norm": 23958.978515625, + "learning_rate": 2.7627217317632993e-05, + "loss": 0.4136, + "step": 149250 + }, + { + "epoch": 0.77029836808189, + "grad_norm": 22143.07421875, + "learning_rate": 2.759455415743556e-05, + "loss": 0.41, + "step": 149300 + }, + { + "epoch": 0.7705563380645029, + "grad_norm": 22873.86328125, + "learning_rate": 2.7561902956831294e-05, + "loss": 0.4094, + "step": 149350 + }, + { + "epoch": 0.7708143080471156, + "grad_norm": 22419.3046875, + "learning_rate": 2.7529263733248734e-05, + "loss": 0.4133, + "step": 149400 + }, + { + "epoch": 0.7710722780297284, + "grad_norm": 22167.474609375, + "learning_rate": 2.7496636504110075e-05, + "loss": 0.4181, + "step": 149450 + }, + { + "epoch": 0.7713302480123413, + "grad_norm": 25449.96875, + "learning_rate": 2.74640212868311e-05, + "loss": 0.412, + "step": 149500 + }, + { + "epoch": 0.7715882179949541, + "grad_norm": 22876.767578125, + "learning_rate": 2.7431418098821154e-05, + "loss": 0.4087, + "step": 149550 + }, + { + "epoch": 0.7718461879775669, + "grad_norm": 25600.65625, + "learning_rate": 2.7398826957483235e-05, + "loss": 0.4133, + "step": 149600 + }, + { + "epoch": 0.7721041579601797, + "grad_norm": 21764.0, + "learning_rate": 2.7366247880213834e-05, + "loss": 0.4073, + "step": 149650 + }, + { + "epoch": 0.7723621279427926, + "grad_norm": 21836.0625, + "learning_rate": 2.7333680884403046e-05, + "loss": 0.4165, + "step": 149700 + }, + { + "epoch": 0.7726200979254054, + "grad_norm": 22049.466796875, + "learning_rate": 2.7301125987434496e-05, + "loss": 0.4104, + "step": 149750 + }, + { + "epoch": 0.7728780679080183, + "grad_norm": 25398.28515625, + "learning_rate": 2.7268583206685348e-05, + "loss": 0.4036, + "step": 149800 + }, + { + "epoch": 0.773136037890631, + "grad_norm": 22303.654296875, + "learning_rate": 2.72360525595263e-05, + "loss": 0.4077, + "step": 149850 + }, + { + "epoch": 0.7733940078732439, + "grad_norm": 24734.65234375, + "learning_rate": 2.7203534063321633e-05, + "loss": 0.409, + "step": 149900 + }, + { + "epoch": 0.7736519778558567, + "grad_norm": 22068.283203125, + "learning_rate": 2.7171027735429023e-05, + "loss": 0.4148, + "step": 149950 + }, + { + "epoch": 0.7739099478384696, + "grad_norm": 23250.4921875, + "learning_rate": 2.7138533593199766e-05, + "loss": 0.4062, + "step": 150000 + }, + { + "epoch": 0.7739099478384696, + "eval_loss": 0.3953176736831665, + "eval_runtime": 3196.6561, + "eval_samples_per_second": 970.114, + "eval_steps_per_second": 1.895, + "step": 150000 + }, + { + "epoch": 0.7741679178210823, + "grad_norm": 26452.75390625, + "learning_rate": 2.710605165397859e-05, + "loss": 0.4098, + "step": 150050 + }, + { + "epoch": 0.7744258878036951, + "grad_norm": 23934.783203125, + "learning_rate": 2.707358193510371e-05, + "loss": 0.4113, + "step": 150100 + }, + { + "epoch": 0.774683857786308, + "grad_norm": 22443.591796875, + "learning_rate": 2.7041124453906884e-05, + "loss": 0.4119, + "step": 150150 + }, + { + "epoch": 0.7749418277689208, + "grad_norm": 23333.529296875, + "learning_rate": 2.7008679227713214e-05, + "loss": 0.4029, + "step": 150200 + }, + { + "epoch": 0.7751997977515336, + "grad_norm": 22431.576171875, + "learning_rate": 2.6976246273841388e-05, + "loss": 0.4045, + "step": 150250 + }, + { + "epoch": 0.7754577677341464, + "grad_norm": 26959.68359375, + "learning_rate": 2.694382560960348e-05, + "loss": 0.4072, + "step": 150300 + }, + { + "epoch": 0.7757157377167593, + "grad_norm": 21064.66015625, + "learning_rate": 2.6911417252304994e-05, + "loss": 0.411, + "step": 150350 + }, + { + "epoch": 0.7759737076993721, + "grad_norm": 23242.583984375, + "learning_rate": 2.6879021219244906e-05, + "loss": 0.4075, + "step": 150400 + }, + { + "epoch": 0.776231677681985, + "grad_norm": 24738.037109375, + "learning_rate": 2.6846637527715546e-05, + "loss": 0.4069, + "step": 150450 + }, + { + "epoch": 0.7764896476645977, + "grad_norm": 23944.759765625, + "learning_rate": 2.681426619500277e-05, + "loss": 0.403, + "step": 150500 + }, + { + "epoch": 0.7767476176472106, + "grad_norm": 22064.611328125, + "learning_rate": 2.678190723838572e-05, + "loss": 0.4045, + "step": 150550 + }, + { + "epoch": 0.7770055876298234, + "grad_norm": 24025.298828125, + "learning_rate": 2.6749560675137002e-05, + "loss": 0.4087, + "step": 150600 + }, + { + "epoch": 0.7772635576124363, + "grad_norm": 20863.119140625, + "learning_rate": 2.6717226522522553e-05, + "loss": 0.4087, + "step": 150650 + }, + { + "epoch": 0.777521527595049, + "grad_norm": 24537.642578125, + "learning_rate": 2.668490479780179e-05, + "loss": 0.4127, + "step": 150700 + }, + { + "epoch": 0.7777794975776618, + "grad_norm": 24400.193359375, + "learning_rate": 2.665259551822733e-05, + "loss": 0.4066, + "step": 150750 + }, + { + "epoch": 0.7780374675602747, + "grad_norm": 25251.81640625, + "learning_rate": 2.6620298701045322e-05, + "loss": 0.4111, + "step": 150800 + }, + { + "epoch": 0.7782954375428875, + "grad_norm": 23078.0, + "learning_rate": 2.658801436349511e-05, + "loss": 0.4109, + "step": 150850 + }, + { + "epoch": 0.7785534075255003, + "grad_norm": 20437.556640625, + "learning_rate": 2.655574252280949e-05, + "loss": 0.4096, + "step": 150900 + }, + { + "epoch": 0.7788113775081131, + "grad_norm": 24091.796875, + "learning_rate": 2.652348319621457e-05, + "loss": 0.4097, + "step": 150950 + }, + { + "epoch": 0.779069347490726, + "grad_norm": 22893.6640625, + "learning_rate": 2.6491236400929686e-05, + "loss": 0.4093, + "step": 151000 + }, + { + "epoch": 0.7793273174733388, + "grad_norm": 22871.80859375, + "learning_rate": 2.645900215416761e-05, + "loss": 0.407, + "step": 151050 + }, + { + "epoch": 0.7795852874559517, + "grad_norm": 21766.30078125, + "learning_rate": 2.642678047313435e-05, + "loss": 0.4071, + "step": 151100 + }, + { + "epoch": 0.7798432574385644, + "grad_norm": 24945.544921875, + "learning_rate": 2.639457137502919e-05, + "loss": 0.4073, + "step": 151150 + }, + { + "epoch": 0.7801012274211773, + "grad_norm": 22374.009765625, + "learning_rate": 2.636237487704475e-05, + "loss": 0.409, + "step": 151200 + }, + { + "epoch": 0.7803591974037901, + "grad_norm": 23499.08984375, + "learning_rate": 2.6330190996366875e-05, + "loss": 0.4087, + "step": 151250 + }, + { + "epoch": 0.780617167386403, + "grad_norm": 24672.017578125, + "learning_rate": 2.629801975017469e-05, + "loss": 0.4075, + "step": 151300 + }, + { + "epoch": 0.7808751373690157, + "grad_norm": 23105.05078125, + "learning_rate": 2.6265861155640626e-05, + "loss": 0.4031, + "step": 151350 + }, + { + "epoch": 0.7811331073516286, + "grad_norm": 23226.171875, + "learning_rate": 2.6233715229930282e-05, + "loss": 0.4137, + "step": 151400 + }, + { + "epoch": 0.7813910773342414, + "grad_norm": 24494.732421875, + "learning_rate": 2.620158199020255e-05, + "loss": 0.4089, + "step": 151450 + }, + { + "epoch": 0.7816490473168543, + "grad_norm": 24024.236328125, + "learning_rate": 2.616946145360952e-05, + "loss": 0.4084, + "step": 151500 + }, + { + "epoch": 0.781907017299467, + "grad_norm": 21957.2265625, + "learning_rate": 2.613735363729649e-05, + "loss": 0.4079, + "step": 151550 + }, + { + "epoch": 0.7821649872820798, + "grad_norm": 22637.291015625, + "learning_rate": 2.6105258558402056e-05, + "loss": 0.4093, + "step": 151600 + }, + { + "epoch": 0.7824229572646927, + "grad_norm": 27436.56640625, + "learning_rate": 2.607317623405787e-05, + "loss": 0.4054, + "step": 151650 + }, + { + "epoch": 0.7826809272473055, + "grad_norm": 21909.509765625, + "learning_rate": 2.6041106681388922e-05, + "loss": 0.4052, + "step": 151700 + }, + { + "epoch": 0.7829388972299183, + "grad_norm": 22887.494140625, + "learning_rate": 2.6009049917513283e-05, + "loss": 0.408, + "step": 151750 + }, + { + "epoch": 0.7831968672125311, + "grad_norm": 20771.53125, + "learning_rate": 2.5977005959542222e-05, + "loss": 0.4052, + "step": 151800 + }, + { + "epoch": 0.783454837195144, + "grad_norm": 22012.322265625, + "learning_rate": 2.5944974824580244e-05, + "loss": 0.4053, + "step": 151850 + }, + { + "epoch": 0.7837128071777568, + "grad_norm": 25365.822265625, + "learning_rate": 2.5912956529724865e-05, + "loss": 0.4141, + "step": 151900 + }, + { + "epoch": 0.7839707771603697, + "grad_norm": 23211.658203125, + "learning_rate": 2.5880951092066885e-05, + "loss": 0.4094, + "step": 151950 + }, + { + "epoch": 0.7842287471429824, + "grad_norm": 21514.79296875, + "learning_rate": 2.584895852869018e-05, + "loss": 0.4056, + "step": 152000 + }, + { + "epoch": 0.7844867171255953, + "grad_norm": 23275.76953125, + "learning_rate": 2.581697885667176e-05, + "loss": 0.4076, + "step": 152050 + }, + { + "epoch": 0.7847446871082081, + "grad_norm": 24080.478515625, + "learning_rate": 2.578501209308174e-05, + "loss": 0.409, + "step": 152100 + }, + { + "epoch": 0.785002657090821, + "grad_norm": 23384.275390625, + "learning_rate": 2.5753058254983376e-05, + "loss": 0.4063, + "step": 152150 + }, + { + "epoch": 0.7852606270734337, + "grad_norm": 22736.451171875, + "learning_rate": 2.572111735943298e-05, + "loss": 0.4054, + "step": 152200 + }, + { + "epoch": 0.7855185970560465, + "grad_norm": 24730.462890625, + "learning_rate": 2.568918942348002e-05, + "loss": 0.4074, + "step": 152250 + }, + { + "epoch": 0.7857765670386594, + "grad_norm": 23020.759765625, + "learning_rate": 2.5657274464166996e-05, + "loss": 0.4143, + "step": 152300 + }, + { + "epoch": 0.7860345370212722, + "grad_norm": 22263.357421875, + "learning_rate": 2.56253724985295e-05, + "loss": 0.4075, + "step": 152350 + }, + { + "epoch": 0.786292507003885, + "grad_norm": 23515.408203125, + "learning_rate": 2.5593483543596165e-05, + "loss": 0.4055, + "step": 152400 + }, + { + "epoch": 0.7865504769864978, + "grad_norm": 21960.447265625, + "learning_rate": 2.55616076163887e-05, + "loss": 0.407, + "step": 152450 + }, + { + "epoch": 0.7868084469691107, + "grad_norm": 26880.94140625, + "learning_rate": 2.55297447339219e-05, + "loss": 0.4029, + "step": 152500 + }, + { + "epoch": 0.7870664169517235, + "grad_norm": 22276.259765625, + "learning_rate": 2.5497894913203492e-05, + "loss": 0.4038, + "step": 152550 + }, + { + "epoch": 0.7873243869343364, + "grad_norm": 22566.541015625, + "learning_rate": 2.5466058171234336e-05, + "loss": 0.4055, + "step": 152600 + }, + { + "epoch": 0.7875823569169491, + "grad_norm": 24620.486328125, + "learning_rate": 2.543423452500826e-05, + "loss": 0.4031, + "step": 152650 + }, + { + "epoch": 0.787840326899562, + "grad_norm": 24162.99609375, + "learning_rate": 2.540242399151208e-05, + "loss": 0.4075, + "step": 152700 + }, + { + "epoch": 0.7880982968821748, + "grad_norm": 25309.958984375, + "learning_rate": 2.537062658772572e-05, + "loss": 0.4052, + "step": 152750 + }, + { + "epoch": 0.7883562668647877, + "grad_norm": 22024.390625, + "learning_rate": 2.533884233062192e-05, + "loss": 0.4036, + "step": 152800 + }, + { + "epoch": 0.7886142368474004, + "grad_norm": 22356.041015625, + "learning_rate": 2.530707123716657e-05, + "loss": 0.4065, + "step": 152850 + }, + { + "epoch": 0.7888722068300132, + "grad_norm": 22957.642578125, + "learning_rate": 2.527531332431844e-05, + "loss": 0.403, + "step": 152900 + }, + { + "epoch": 0.7891301768126261, + "grad_norm": 22161.298828125, + "learning_rate": 2.52435686090293e-05, + "loss": 0.4046, + "step": 152950 + }, + { + "epoch": 0.7893881467952389, + "grad_norm": 22849.720703125, + "learning_rate": 2.5211837108243847e-05, + "loss": 0.4045, + "step": 153000 + }, + { + "epoch": 0.7896461167778517, + "grad_norm": 25891.248046875, + "learning_rate": 2.5180118838899756e-05, + "loss": 0.4083, + "step": 153050 + }, + { + "epoch": 0.7899040867604645, + "grad_norm": 23150.634765625, + "learning_rate": 2.5148413817927598e-05, + "loss": 0.4104, + "step": 153100 + }, + { + "epoch": 0.7901620567430774, + "grad_norm": 23457.515625, + "learning_rate": 2.511672206225094e-05, + "loss": 0.4101, + "step": 153150 + }, + { + "epoch": 0.7904200267256902, + "grad_norm": 21316.8828125, + "learning_rate": 2.508504358878621e-05, + "loss": 0.4091, + "step": 153200 + }, + { + "epoch": 0.7906779967083031, + "grad_norm": 25747.87109375, + "learning_rate": 2.5053378414442748e-05, + "loss": 0.4131, + "step": 153250 + }, + { + "epoch": 0.7909359666909158, + "grad_norm": 21499.56640625, + "learning_rate": 2.502172655612286e-05, + "loss": 0.4028, + "step": 153300 + }, + { + "epoch": 0.7911939366735287, + "grad_norm": 22949.970703125, + "learning_rate": 2.499008803072162e-05, + "loss": 0.4078, + "step": 153350 + }, + { + "epoch": 0.7914519066561415, + "grad_norm": 26207.181640625, + "learning_rate": 2.495846285512714e-05, + "loss": 0.4064, + "step": 153400 + }, + { + "epoch": 0.7917098766387544, + "grad_norm": 25037.625, + "learning_rate": 2.4926851046220246e-05, + "loss": 0.4067, + "step": 153450 + }, + { + "epoch": 0.7919678466213671, + "grad_norm": 24114.482421875, + "learning_rate": 2.4895252620874775e-05, + "loss": 0.4123, + "step": 153500 + }, + { + "epoch": 0.79222581660398, + "grad_norm": 24953.568359375, + "learning_rate": 2.4863667595957325e-05, + "loss": 0.4083, + "step": 153550 + }, + { + "epoch": 0.7924837865865928, + "grad_norm": 24928.2265625, + "learning_rate": 2.483209598832736e-05, + "loss": 0.4066, + "step": 153600 + }, + { + "epoch": 0.7927417565692056, + "grad_norm": 24045.166015625, + "learning_rate": 2.4800537814837227e-05, + "loss": 0.4056, + "step": 153650 + }, + { + "epoch": 0.7929997265518184, + "grad_norm": 24591.826171875, + "learning_rate": 2.476899309233205e-05, + "loss": 0.4094, + "step": 153700 + }, + { + "epoch": 0.7932576965344312, + "grad_norm": 23336.810546875, + "learning_rate": 2.4737461837649782e-05, + "loss": 0.41, + "step": 153750 + }, + { + "epoch": 0.7935156665170441, + "grad_norm": 23454.171875, + "learning_rate": 2.4705944067621216e-05, + "loss": 0.4068, + "step": 153800 + }, + { + "epoch": 0.7937736364996569, + "grad_norm": 25322.201171875, + "learning_rate": 2.467443979906991e-05, + "loss": 0.4097, + "step": 153850 + }, + { + "epoch": 0.7940316064822697, + "grad_norm": 24731.580078125, + "learning_rate": 2.464294904881222e-05, + "loss": 0.4028, + "step": 153900 + }, + { + "epoch": 0.7942895764648825, + "grad_norm": 21753.568359375, + "learning_rate": 2.4611471833657356e-05, + "loss": 0.4148, + "step": 153950 + }, + { + "epoch": 0.7945475464474954, + "grad_norm": 26548.966796875, + "learning_rate": 2.458000817040717e-05, + "loss": 0.4074, + "step": 154000 + }, + { + "epoch": 0.7948055164301082, + "grad_norm": 21149.470703125, + "learning_rate": 2.4548558075856414e-05, + "loss": 0.408, + "step": 154050 + }, + { + "epoch": 0.7950634864127211, + "grad_norm": 25742.859375, + "learning_rate": 2.4517121566792517e-05, + "loss": 0.405, + "step": 154100 + }, + { + "epoch": 0.7953214563953338, + "grad_norm": 20954.91796875, + "learning_rate": 2.4485698659995658e-05, + "loss": 0.3975, + "step": 154150 + }, + { + "epoch": 0.7955794263779467, + "grad_norm": 23551.646484375, + "learning_rate": 2.445428937223884e-05, + "loss": 0.4059, + "step": 154200 + }, + { + "epoch": 0.7958373963605595, + "grad_norm": 25214.693359375, + "learning_rate": 2.4422893720287654e-05, + "loss": 0.4008, + "step": 154250 + }, + { + "epoch": 0.7960953663431724, + "grad_norm": 25346.916015625, + "learning_rate": 2.4391511720900545e-05, + "loss": 0.4035, + "step": 154300 + }, + { + "epoch": 0.7963533363257851, + "grad_norm": 21641.23828125, + "learning_rate": 2.43601433908286e-05, + "loss": 0.4069, + "step": 154350 + }, + { + "epoch": 0.7966113063083979, + "grad_norm": 22860.998046875, + "learning_rate": 2.4328788746815628e-05, + "loss": 0.4022, + "step": 154400 + }, + { + "epoch": 0.7968692762910108, + "grad_norm": 21989.96484375, + "learning_rate": 2.429744780559813e-05, + "loss": 0.4055, + "step": 154450 + }, + { + "epoch": 0.7971272462736236, + "grad_norm": 24413.74609375, + "learning_rate": 2.4266120583905272e-05, + "loss": 0.412, + "step": 154500 + }, + { + "epoch": 0.7973852162562364, + "grad_norm": 24805.859375, + "learning_rate": 2.4234807098458957e-05, + "loss": 0.41, + "step": 154550 + }, + { + "epoch": 0.7976431862388492, + "grad_norm": 23658.326171875, + "learning_rate": 2.42035073659737e-05, + "loss": 0.41, + "step": 154600 + }, + { + "epoch": 0.7979011562214621, + "grad_norm": 25225.228515625, + "learning_rate": 2.417222140315669e-05, + "loss": 0.4069, + "step": 154650 + }, + { + "epoch": 0.7981591262040749, + "grad_norm": 23417.3828125, + "learning_rate": 2.414094922670777e-05, + "loss": 0.4102, + "step": 154700 + }, + { + "epoch": 0.7984170961866878, + "grad_norm": 25014.5078125, + "learning_rate": 2.4109690853319422e-05, + "loss": 0.412, + "step": 154750 + }, + { + "epoch": 0.7986750661693005, + "grad_norm": 25523.3125, + "learning_rate": 2.407844629967674e-05, + "loss": 0.4102, + "step": 154800 + }, + { + "epoch": 0.7989330361519134, + "grad_norm": 23173.44921875, + "learning_rate": 2.404721558245752e-05, + "loss": 0.407, + "step": 154850 + }, + { + "epoch": 0.7991910061345262, + "grad_norm": 24673.5078125, + "learning_rate": 2.401599871833204e-05, + "loss": 0.4054, + "step": 154900 + }, + { + "epoch": 0.799448976117139, + "grad_norm": 24709.765625, + "learning_rate": 2.398479572396331e-05, + "loss": 0.4097, + "step": 154950 + }, + { + "epoch": 0.7997069460997518, + "grad_norm": 22404.29296875, + "learning_rate": 2.395360661600687e-05, + "loss": 0.4072, + "step": 155000 + }, + { + "epoch": 0.7997069460997518, + "eval_loss": 0.39372530579566956, + "eval_runtime": 3195.8879, + "eval_samples_per_second": 970.347, + "eval_steps_per_second": 1.895, + "step": 155000 + }, + { + "epoch": 0.7999649160823646, + "grad_norm": 24004.09375, + "learning_rate": 2.3922431411110834e-05, + "loss": 0.4016, + "step": 155050 + }, + { + "epoch": 0.8002228860649775, + "grad_norm": 25013.6484375, + "learning_rate": 2.3891270125915992e-05, + "loss": 0.4068, + "step": 155100 + }, + { + "epoch": 0.8004808560475903, + "grad_norm": 23532.982421875, + "learning_rate": 2.3860122777055553e-05, + "loss": 0.4036, + "step": 155150 + }, + { + "epoch": 0.8007388260302031, + "grad_norm": 27413.044921875, + "learning_rate": 2.3828989381155426e-05, + "loss": 0.4098, + "step": 155200 + }, + { + "epoch": 0.8009967960128159, + "grad_norm": 25821.794921875, + "learning_rate": 2.379786995483399e-05, + "loss": 0.4076, + "step": 155250 + }, + { + "epoch": 0.8012547659954288, + "grad_norm": 23864.154296875, + "learning_rate": 2.37667645147022e-05, + "loss": 0.4082, + "step": 155300 + }, + { + "epoch": 0.8015127359780416, + "grad_norm": 22892.451171875, + "learning_rate": 2.3735673077363534e-05, + "loss": 0.4116, + "step": 155350 + }, + { + "epoch": 0.8017707059606545, + "grad_norm": 24638.51953125, + "learning_rate": 2.3704595659413987e-05, + "loss": 0.4015, + "step": 155400 + }, + { + "epoch": 0.8020286759432672, + "grad_norm": 23007.734375, + "learning_rate": 2.3673532277442112e-05, + "loss": 0.4075, + "step": 155450 + }, + { + "epoch": 0.8022866459258801, + "grad_norm": 25629.17578125, + "learning_rate": 2.364248294802892e-05, + "loss": 0.4031, + "step": 155500 + }, + { + "epoch": 0.8025446159084929, + "grad_norm": 23949.939453125, + "learning_rate": 2.3611447687747955e-05, + "loss": 0.4091, + "step": 155550 + }, + { + "epoch": 0.8028025858911058, + "grad_norm": 23120.3515625, + "learning_rate": 2.3580426513165228e-05, + "loss": 0.4106, + "step": 155600 + }, + { + "epoch": 0.8030605558737185, + "grad_norm": 26965.955078125, + "learning_rate": 2.3549419440839236e-05, + "loss": 0.4054, + "step": 155650 + }, + { + "epoch": 0.8033185258563313, + "grad_norm": 23370.33984375, + "learning_rate": 2.3518426487320948e-05, + "loss": 0.407, + "step": 155700 + }, + { + "epoch": 0.8035764958389442, + "grad_norm": 22571.12890625, + "learning_rate": 2.3487447669153833e-05, + "loss": 0.4118, + "step": 155750 + }, + { + "epoch": 0.803834465821557, + "grad_norm": 24092.56640625, + "learning_rate": 2.3456483002873768e-05, + "loss": 0.4053, + "step": 155800 + }, + { + "epoch": 0.8040924358041698, + "grad_norm": 24549.140625, + "learning_rate": 2.3425532505009072e-05, + "loss": 0.405, + "step": 155850 + }, + { + "epoch": 0.8043504057867826, + "grad_norm": 23510.904296875, + "learning_rate": 2.3394596192080574e-05, + "loss": 0.4049, + "step": 155900 + }, + { + "epoch": 0.8046083757693955, + "grad_norm": 23147.369140625, + "learning_rate": 2.3363674080601416e-05, + "loss": 0.4032, + "step": 155950 + }, + { + "epoch": 0.8048663457520083, + "grad_norm": 21877.10546875, + "learning_rate": 2.3332766187077264e-05, + "loss": 0.4006, + "step": 156000 + }, + { + "epoch": 0.8051243157346211, + "grad_norm": 24041.384765625, + "learning_rate": 2.330187252800614e-05, + "loss": 0.4056, + "step": 156050 + }, + { + "epoch": 0.8053822857172339, + "grad_norm": 23452.453125, + "learning_rate": 2.327099311987848e-05, + "loss": 0.4071, + "step": 156100 + }, + { + "epoch": 0.8056402556998468, + "grad_norm": 23023.5859375, + "learning_rate": 2.3240127979177123e-05, + "loss": 0.4095, + "step": 156150 + }, + { + "epoch": 0.8058982256824596, + "grad_norm": 23684.615234375, + "learning_rate": 2.3209277122377255e-05, + "loss": 0.4023, + "step": 156200 + }, + { + "epoch": 0.8061561956650725, + "grad_norm": 22598.732421875, + "learning_rate": 2.31784405659465e-05, + "loss": 0.4013, + "step": 156250 + }, + { + "epoch": 0.8064141656476852, + "grad_norm": 21835.93359375, + "learning_rate": 2.3147618326344804e-05, + "loss": 0.4072, + "step": 156300 + }, + { + "epoch": 0.806672135630298, + "grad_norm": 26343.41015625, + "learning_rate": 2.311681042002448e-05, + "loss": 0.4154, + "step": 156350 + }, + { + "epoch": 0.8069301056129109, + "grad_norm": 24116.162109375, + "learning_rate": 2.3086016863430193e-05, + "loss": 0.4032, + "step": 156400 + }, + { + "epoch": 0.8071880755955237, + "grad_norm": 23874.53515625, + "learning_rate": 2.3055237672998946e-05, + "loss": 0.4063, + "step": 156450 + }, + { + "epoch": 0.8074460455781365, + "grad_norm": 25624.203125, + "learning_rate": 2.302447286516006e-05, + "loss": 0.4034, + "step": 156500 + }, + { + "epoch": 0.8077040155607493, + "grad_norm": 22652.2109375, + "learning_rate": 2.2993722456335236e-05, + "loss": 0.4049, + "step": 156550 + }, + { + "epoch": 0.8079619855433622, + "grad_norm": 26234.255859375, + "learning_rate": 2.2962986462938385e-05, + "loss": 0.4035, + "step": 156600 + }, + { + "epoch": 0.808219955525975, + "grad_norm": 24374.974609375, + "learning_rate": 2.293226490137584e-05, + "loss": 0.4052, + "step": 156650 + }, + { + "epoch": 0.8084779255085878, + "grad_norm": 24195.4296875, + "learning_rate": 2.2901557788046146e-05, + "loss": 0.4072, + "step": 156700 + }, + { + "epoch": 0.8087358954912006, + "grad_norm": 24590.525390625, + "learning_rate": 2.2870865139340165e-05, + "loss": 0.4092, + "step": 156750 + }, + { + "epoch": 0.8089938654738135, + "grad_norm": 20863.509765625, + "learning_rate": 2.2840186971641083e-05, + "loss": 0.4073, + "step": 156800 + }, + { + "epoch": 0.8092518354564263, + "grad_norm": 23662.16015625, + "learning_rate": 2.2809523301324238e-05, + "loss": 0.4101, + "step": 156850 + }, + { + "epoch": 0.8095098054390392, + "grad_norm": 21700.666015625, + "learning_rate": 2.2778874144757357e-05, + "loss": 0.4075, + "step": 156900 + }, + { + "epoch": 0.8097677754216519, + "grad_norm": 29026.71484375, + "learning_rate": 2.274823951830036e-05, + "loss": 0.4005, + "step": 156950 + }, + { + "epoch": 0.8100257454042648, + "grad_norm": 27310.48828125, + "learning_rate": 2.2717619438305397e-05, + "loss": 0.4058, + "step": 157000 + }, + { + "epoch": 0.8102837153868776, + "grad_norm": 25008.673828125, + "learning_rate": 2.2687013921116895e-05, + "loss": 0.404, + "step": 157050 + }, + { + "epoch": 0.8105416853694904, + "grad_norm": 22623.57421875, + "learning_rate": 2.2656422983071452e-05, + "loss": 0.4059, + "step": 157100 + }, + { + "epoch": 0.8107996553521032, + "grad_norm": 23960.427734375, + "learning_rate": 2.2625846640497965e-05, + "loss": 0.4096, + "step": 157150 + }, + { + "epoch": 0.811057625334716, + "grad_norm": 22415.021484375, + "learning_rate": 2.2595284909717475e-05, + "loss": 0.4061, + "step": 157200 + }, + { + "epoch": 0.8113155953173289, + "grad_norm": 23358.822265625, + "learning_rate": 2.2564737807043233e-05, + "loss": 0.4003, + "step": 157250 + }, + { + "epoch": 0.8115735652999417, + "grad_norm": 21686.9765625, + "learning_rate": 2.2534205348780702e-05, + "loss": 0.4063, + "step": 157300 + }, + { + "epoch": 0.8118315352825545, + "grad_norm": 22949.484375, + "learning_rate": 2.2503687551227504e-05, + "loss": 0.407, + "step": 157350 + }, + { + "epoch": 0.8120895052651673, + "grad_norm": 21776.201171875, + "learning_rate": 2.2473184430673444e-05, + "loss": 0.4073, + "step": 157400 + }, + { + "epoch": 0.8123474752477802, + "grad_norm": 25641.17578125, + "learning_rate": 2.244269600340055e-05, + "loss": 0.4074, + "step": 157450 + }, + { + "epoch": 0.812605445230393, + "grad_norm": 22723.42578125, + "learning_rate": 2.2412222285682867e-05, + "loss": 0.4119, + "step": 157500 + }, + { + "epoch": 0.8128634152130059, + "grad_norm": 24244.48046875, + "learning_rate": 2.2381763293786746e-05, + "loss": 0.4157, + "step": 157550 + }, + { + "epoch": 0.8131213851956186, + "grad_norm": 26826.337890625, + "learning_rate": 2.235131904397058e-05, + "loss": 0.4102, + "step": 157600 + }, + { + "epoch": 0.8133793551782315, + "grad_norm": 23157.0546875, + "learning_rate": 2.232088955248491e-05, + "loss": 0.4121, + "step": 157650 + }, + { + "epoch": 0.8136373251608443, + "grad_norm": 23352.009765625, + "learning_rate": 2.229047483557245e-05, + "loss": 0.4054, + "step": 157700 + }, + { + "epoch": 0.8138952951434572, + "grad_norm": 24417.2734375, + "learning_rate": 2.2260074909467925e-05, + "loss": 0.4092, + "step": 157750 + }, + { + "epoch": 0.8141532651260699, + "grad_norm": 22345.669921875, + "learning_rate": 2.2229689790398283e-05, + "loss": 0.402, + "step": 157800 + }, + { + "epoch": 0.8144112351086827, + "grad_norm": 22904.20703125, + "learning_rate": 2.2199319494582492e-05, + "loss": 0.4067, + "step": 157850 + }, + { + "epoch": 0.8146692050912956, + "grad_norm": 24132.306640625, + "learning_rate": 2.216896403823162e-05, + "loss": 0.4094, + "step": 157900 + }, + { + "epoch": 0.8149271750739084, + "grad_norm": 24649.001953125, + "learning_rate": 2.2138623437548833e-05, + "loss": 0.4048, + "step": 157950 + }, + { + "epoch": 0.8151851450565212, + "grad_norm": 24956.458984375, + "learning_rate": 2.210829770872933e-05, + "loss": 0.4038, + "step": 158000 + }, + { + "epoch": 0.815443115039134, + "grad_norm": 24047.3515625, + "learning_rate": 2.2077986867960437e-05, + "loss": 0.407, + "step": 158050 + }, + { + "epoch": 0.8157010850217469, + "grad_norm": 22895.953125, + "learning_rate": 2.2047690931421476e-05, + "loss": 0.4033, + "step": 158100 + }, + { + "epoch": 0.8159590550043597, + "grad_norm": 22524.640625, + "learning_rate": 2.201740991528383e-05, + "loss": 0.4136, + "step": 158150 + }, + { + "epoch": 0.8162170249869725, + "grad_norm": 22507.46875, + "learning_rate": 2.1987143835710928e-05, + "loss": 0.4043, + "step": 158200 + }, + { + "epoch": 0.8164749949695853, + "grad_norm": 24044.5390625, + "learning_rate": 2.1956892708858202e-05, + "loss": 0.4099, + "step": 158250 + }, + { + "epoch": 0.8167329649521982, + "grad_norm": 26112.05859375, + "learning_rate": 2.1926656550873103e-05, + "loss": 0.4087, + "step": 158300 + }, + { + "epoch": 0.816990934934811, + "grad_norm": 25168.59375, + "learning_rate": 2.189643537789517e-05, + "loss": 0.4059, + "step": 158350 + }, + { + "epoch": 0.8172489049174239, + "grad_norm": 31289.392578125, + "learning_rate": 2.1866229206055804e-05, + "loss": 0.4048, + "step": 158400 + }, + { + "epoch": 0.8175068749000366, + "grad_norm": 27301.970703125, + "learning_rate": 2.1836038051478508e-05, + "loss": 0.4111, + "step": 158450 + }, + { + "epoch": 0.8177648448826494, + "grad_norm": 22742.66015625, + "learning_rate": 2.180586193027877e-05, + "loss": 0.3998, + "step": 158500 + }, + { + "epoch": 0.8180228148652623, + "grad_norm": 26745.51171875, + "learning_rate": 2.177570085856395e-05, + "loss": 0.4069, + "step": 158550 + }, + { + "epoch": 0.8182807848478751, + "grad_norm": 24821.93359375, + "learning_rate": 2.1745554852433502e-05, + "loss": 0.4057, + "step": 158600 + }, + { + "epoch": 0.8185387548304879, + "grad_norm": 24082.908203125, + "learning_rate": 2.1715423927978755e-05, + "loss": 0.4042, + "step": 158650 + }, + { + "epoch": 0.8187967248131007, + "grad_norm": 23584.001953125, + "learning_rate": 2.168530810128302e-05, + "loss": 0.4062, + "step": 158700 + }, + { + "epoch": 0.8190546947957136, + "grad_norm": 25795.326171875, + "learning_rate": 2.1655207388421532e-05, + "loss": 0.4101, + "step": 158750 + }, + { + "epoch": 0.8193126647783264, + "grad_norm": 22298.908203125, + "learning_rate": 2.1625121805461483e-05, + "loss": 0.4004, + "step": 158800 + }, + { + "epoch": 0.8195706347609392, + "grad_norm": 24439.970703125, + "learning_rate": 2.1595051368461943e-05, + "loss": 0.4078, + "step": 158850 + }, + { + "epoch": 0.819828604743552, + "grad_norm": 24895.5546875, + "learning_rate": 2.1564996093473975e-05, + "loss": 0.4008, + "step": 158900 + }, + { + "epoch": 0.8200865747261649, + "grad_norm": 27615.1171875, + "learning_rate": 2.153495599654048e-05, + "loss": 0.4051, + "step": 158950 + }, + { + "epoch": 0.8203445447087777, + "grad_norm": 22537.25390625, + "learning_rate": 2.150493109369628e-05, + "loss": 0.4078, + "step": 159000 + }, + { + "epoch": 0.8206025146913906, + "grad_norm": 23422.39453125, + "learning_rate": 2.1474921400968085e-05, + "loss": 0.3999, + "step": 159050 + }, + { + "epoch": 0.8208604846740033, + "grad_norm": 24678.099609375, + "learning_rate": 2.1444926934374475e-05, + "loss": 0.4038, + "step": 159100 + }, + { + "epoch": 0.8211184546566161, + "grad_norm": 25680.623046875, + "learning_rate": 2.1414947709925963e-05, + "loss": 0.4082, + "step": 159150 + }, + { + "epoch": 0.821376424639229, + "grad_norm": 26526.724609375, + "learning_rate": 2.1384983743624813e-05, + "loss": 0.4076, + "step": 159200 + }, + { + "epoch": 0.8216343946218418, + "grad_norm": 21391.701171875, + "learning_rate": 2.1355035051465265e-05, + "loss": 0.4003, + "step": 159250 + }, + { + "epoch": 0.8218923646044546, + "grad_norm": 22676.607421875, + "learning_rate": 2.1325101649433327e-05, + "loss": 0.4087, + "step": 159300 + }, + { + "epoch": 0.8221503345870674, + "grad_norm": 23139.802734375, + "learning_rate": 2.1295183553506855e-05, + "loss": 0.4102, + "step": 159350 + }, + { + "epoch": 0.8224083045696803, + "grad_norm": 23598.369140625, + "learning_rate": 2.1265280779655593e-05, + "loss": 0.4027, + "step": 159400 + }, + { + "epoch": 0.8226662745522931, + "grad_norm": 24068.453125, + "learning_rate": 2.1235393343841008e-05, + "loss": 0.4097, + "step": 159450 + }, + { + "epoch": 0.8229242445349059, + "grad_norm": 26833.779296875, + "learning_rate": 2.1205521262016476e-05, + "loss": 0.4094, + "step": 159500 + }, + { + "epoch": 0.8231822145175187, + "grad_norm": 21122.98046875, + "learning_rate": 2.1175664550127123e-05, + "loss": 0.4074, + "step": 159550 + }, + { + "epoch": 0.8234401845001316, + "grad_norm": 24398.310546875, + "learning_rate": 2.1145823224109884e-05, + "loss": 0.4081, + "step": 159600 + }, + { + "epoch": 0.8236981544827444, + "grad_norm": 20830.05078125, + "learning_rate": 2.111599729989348e-05, + "loss": 0.4031, + "step": 159650 + }, + { + "epoch": 0.8239561244653573, + "grad_norm": 24353.29296875, + "learning_rate": 2.108618679339841e-05, + "loss": 0.4037, + "step": 159700 + }, + { + "epoch": 0.82421409444797, + "grad_norm": 22828.130859375, + "learning_rate": 2.1056391720536928e-05, + "loss": 0.4021, + "step": 159750 + }, + { + "epoch": 0.8244720644305829, + "grad_norm": 21661.53515625, + "learning_rate": 2.1026612097213106e-05, + "loss": 0.4117, + "step": 159800 + }, + { + "epoch": 0.8247300344131957, + "grad_norm": 20191.279296875, + "learning_rate": 2.0996847939322707e-05, + "loss": 0.4088, + "step": 159850 + }, + { + "epoch": 0.8249880043958085, + "grad_norm": 23767.8125, + "learning_rate": 2.0967099262753258e-05, + "loss": 0.4035, + "step": 159900 + }, + { + "epoch": 0.8252459743784213, + "grad_norm": 24693.4609375, + "learning_rate": 2.093736608338405e-05, + "loss": 0.4135, + "step": 159950 + }, + { + "epoch": 0.8255039443610341, + "grad_norm": 22759.341796875, + "learning_rate": 2.0907648417086027e-05, + "loss": 0.4048, + "step": 160000 + }, + { + "epoch": 0.8255039443610341, + "eval_loss": 0.3925068974494934, + "eval_runtime": 3187.046, + "eval_samples_per_second": 973.039, + "eval_steps_per_second": 1.901, + "step": 160000 + }, + { + "epoch": 0.825761914343647, + "grad_norm": 25066.45703125, + "learning_rate": 2.0877946279721983e-05, + "loss": 0.4017, + "step": 160050 + }, + { + "epoch": 0.8260198843262598, + "grad_norm": 24734.384765625, + "learning_rate": 2.084825968714626e-05, + "loss": 0.4091, + "step": 160100 + }, + { + "epoch": 0.8262778543088726, + "grad_norm": 26498.201171875, + "learning_rate": 2.0818588655205045e-05, + "loss": 0.4028, + "step": 160150 + }, + { + "epoch": 0.8265358242914854, + "grad_norm": 23436.36328125, + "learning_rate": 2.0788933199736143e-05, + "loss": 0.4019, + "step": 160200 + }, + { + "epoch": 0.8267937942740983, + "grad_norm": 23851.89453125, + "learning_rate": 2.075929333656904e-05, + "loss": 0.4055, + "step": 160250 + }, + { + "epoch": 0.8270517642567111, + "grad_norm": 23416.0625, + "learning_rate": 2.0729669081524977e-05, + "loss": 0.4075, + "step": 160300 + }, + { + "epoch": 0.8273097342393239, + "grad_norm": 22208.994140625, + "learning_rate": 2.070006045041673e-05, + "loss": 0.4047, + "step": 160350 + }, + { + "epoch": 0.8275677042219367, + "grad_norm": 21291.3515625, + "learning_rate": 2.067046745904888e-05, + "loss": 0.405, + "step": 160400 + }, + { + "epoch": 0.8278256742045496, + "grad_norm": 24646.279296875, + "learning_rate": 2.0640890123217565e-05, + "loss": 0.4076, + "step": 160450 + }, + { + "epoch": 0.8280836441871624, + "grad_norm": 22018.609375, + "learning_rate": 2.0611328458710595e-05, + "loss": 0.406, + "step": 160500 + }, + { + "epoch": 0.8283416141697753, + "grad_norm": 30070.40234375, + "learning_rate": 2.0581782481307415e-05, + "loss": 0.4099, + "step": 160550 + }, + { + "epoch": 0.828599584152388, + "grad_norm": 24574.34375, + "learning_rate": 2.0552252206779098e-05, + "loss": 0.4035, + "step": 160600 + }, + { + "epoch": 0.8288575541350008, + "grad_norm": 23137.224609375, + "learning_rate": 2.0522737650888313e-05, + "loss": 0.4006, + "step": 160650 + }, + { + "epoch": 0.8291155241176137, + "grad_norm": 22633.23828125, + "learning_rate": 2.0493238829389393e-05, + "loss": 0.4064, + "step": 160700 + }, + { + "epoch": 0.8293734941002265, + "grad_norm": 23670.525390625, + "learning_rate": 2.046375575802822e-05, + "loss": 0.4084, + "step": 160750 + }, + { + "epoch": 0.8296314640828393, + "grad_norm": 24236.7890625, + "learning_rate": 2.043428845254229e-05, + "loss": 0.413, + "step": 160800 + }, + { + "epoch": 0.8298894340654521, + "grad_norm": 25734.12890625, + "learning_rate": 2.0404836928660676e-05, + "loss": 0.3992, + "step": 160850 + }, + { + "epoch": 0.830147404048065, + "grad_norm": 23417.83203125, + "learning_rate": 2.037540120210401e-05, + "loss": 0.4069, + "step": 160900 + }, + { + "epoch": 0.8304053740306778, + "grad_norm": 24619.853515625, + "learning_rate": 2.0345981288584575e-05, + "loss": 0.4002, + "step": 160950 + }, + { + "epoch": 0.8306633440132906, + "grad_norm": 21862.111328125, + "learning_rate": 2.031657720380608e-05, + "loss": 0.4012, + "step": 161000 + }, + { + "epoch": 0.8309213139959034, + "grad_norm": 23347.91015625, + "learning_rate": 2.0287188963463906e-05, + "loss": 0.4061, + "step": 161050 + }, + { + "epoch": 0.8311792839785163, + "grad_norm": 25119.107421875, + "learning_rate": 2.02578165832449e-05, + "loss": 0.4061, + "step": 161100 + }, + { + "epoch": 0.8314372539611291, + "grad_norm": 22684.50390625, + "learning_rate": 2.0228460078827466e-05, + "loss": 0.4062, + "step": 161150 + }, + { + "epoch": 0.831695223943742, + "grad_norm": 39309.30859375, + "learning_rate": 2.0199119465881565e-05, + "loss": 0.4091, + "step": 161200 + }, + { + "epoch": 0.8319531939263547, + "grad_norm": 22076.8125, + "learning_rate": 2.0169794760068632e-05, + "loss": 0.4052, + "step": 161250 + }, + { + "epoch": 0.8322111639089675, + "grad_norm": 26682.44140625, + "learning_rate": 2.0140485977041636e-05, + "loss": 0.405, + "step": 161300 + }, + { + "epoch": 0.8324691338915804, + "grad_norm": 24586.09375, + "learning_rate": 2.011119313244502e-05, + "loss": 0.4066, + "step": 161350 + }, + { + "epoch": 0.8327271038741932, + "grad_norm": 26363.5703125, + "learning_rate": 2.008191624191475e-05, + "loss": 0.4027, + "step": 161400 + }, + { + "epoch": 0.832985073856806, + "grad_norm": 24361.9921875, + "learning_rate": 2.0052655321078246e-05, + "loss": 0.4041, + "step": 161450 + }, + { + "epoch": 0.8332430438394188, + "grad_norm": 22026.951171875, + "learning_rate": 2.0023410385554466e-05, + "loss": 0.4068, + "step": 161500 + }, + { + "epoch": 0.8335010138220317, + "grad_norm": 24540.068359375, + "learning_rate": 1.9994181450953725e-05, + "loss": 0.4036, + "step": 161550 + }, + { + "epoch": 0.8337589838046445, + "grad_norm": 25837.857421875, + "learning_rate": 1.9964968532877916e-05, + "loss": 0.4052, + "step": 161600 + }, + { + "epoch": 0.8340169537872573, + "grad_norm": 23252.900390625, + "learning_rate": 1.993577164692031e-05, + "loss": 0.4021, + "step": 161650 + }, + { + "epoch": 0.8342749237698701, + "grad_norm": 25305.177734375, + "learning_rate": 1.990659080866562e-05, + "loss": 0.4089, + "step": 161700 + }, + { + "epoch": 0.834532893752483, + "grad_norm": 25317.89453125, + "learning_rate": 1.9877426033690066e-05, + "loss": 0.4082, + "step": 161750 + }, + { + "epoch": 0.8347908637350958, + "grad_norm": 25872.2109375, + "learning_rate": 1.984827733756117e-05, + "loss": 0.4021, + "step": 161800 + }, + { + "epoch": 0.8350488337177087, + "grad_norm": 23915.955078125, + "learning_rate": 1.9819144735837998e-05, + "loss": 0.4054, + "step": 161850 + }, + { + "epoch": 0.8353068037003214, + "grad_norm": 25145.380859375, + "learning_rate": 1.9790028244070946e-05, + "loss": 0.4119, + "step": 161900 + }, + { + "epoch": 0.8355647736829342, + "grad_norm": 24318.28125, + "learning_rate": 1.976092787780184e-05, + "loss": 0.4015, + "step": 161950 + }, + { + "epoch": 0.8358227436655471, + "grad_norm": 22675.845703125, + "learning_rate": 1.973184365256388e-05, + "loss": 0.4107, + "step": 162000 + }, + { + "epoch": 0.83608071364816, + "grad_norm": 23785.451171875, + "learning_rate": 1.9702775583881656e-05, + "loss": 0.408, + "step": 162050 + }, + { + "epoch": 0.8363386836307727, + "grad_norm": 22790.47265625, + "learning_rate": 1.9673723687271174e-05, + "loss": 0.406, + "step": 162100 + }, + { + "epoch": 0.8365966536133855, + "grad_norm": 24380.498046875, + "learning_rate": 1.9644687978239746e-05, + "loss": 0.4105, + "step": 162150 + }, + { + "epoch": 0.8368546235959984, + "grad_norm": 23812.814453125, + "learning_rate": 1.9615668472286085e-05, + "loss": 0.4032, + "step": 162200 + }, + { + "epoch": 0.8371125935786112, + "grad_norm": 22820.734375, + "learning_rate": 1.9586665184900232e-05, + "loss": 0.4072, + "step": 162250 + }, + { + "epoch": 0.837370563561224, + "grad_norm": 22347.779296875, + "learning_rate": 1.955767813156359e-05, + "loss": 0.4045, + "step": 162300 + }, + { + "epoch": 0.8376285335438368, + "grad_norm": 24328.546875, + "learning_rate": 1.9528707327748852e-05, + "loss": 0.4097, + "step": 162350 + }, + { + "epoch": 0.8378865035264497, + "grad_norm": 23850.13671875, + "learning_rate": 1.9499752788920146e-05, + "loss": 0.4085, + "step": 162400 + }, + { + "epoch": 0.8381444735090625, + "grad_norm": 24967.3203125, + "learning_rate": 1.9470814530532756e-05, + "loss": 0.4056, + "step": 162450 + }, + { + "epoch": 0.8384024434916753, + "grad_norm": 23740.197265625, + "learning_rate": 1.9441892568033426e-05, + "loss": 0.4112, + "step": 162500 + }, + { + "epoch": 0.8386604134742881, + "grad_norm": 26039.447265625, + "learning_rate": 1.941298691686012e-05, + "loss": 0.405, + "step": 162550 + }, + { + "epoch": 0.838918383456901, + "grad_norm": 22781.23828125, + "learning_rate": 1.9384097592442102e-05, + "loss": 0.4043, + "step": 162600 + }, + { + "epoch": 0.8391763534395138, + "grad_norm": 25735.17578125, + "learning_rate": 1.935522461019998e-05, + "loss": 0.4021, + "step": 162650 + }, + { + "epoch": 0.8394343234221266, + "grad_norm": 26452.810546875, + "learning_rate": 1.932636798554552e-05, + "loss": 0.4093, + "step": 162700 + }, + { + "epoch": 0.8396922934047394, + "grad_norm": 24199.3515625, + "learning_rate": 1.929752773388189e-05, + "loss": 0.4003, + "step": 162750 + }, + { + "epoch": 0.8399502633873522, + "grad_norm": 27610.30859375, + "learning_rate": 1.9268703870603434e-05, + "loss": 0.4035, + "step": 162800 + }, + { + "epoch": 0.8402082333699651, + "grad_norm": 23799.3359375, + "learning_rate": 1.9239896411095777e-05, + "loss": 0.4072, + "step": 162850 + }, + { + "epoch": 0.8404662033525779, + "grad_norm": 24182.162109375, + "learning_rate": 1.9211105370735784e-05, + "loss": 0.4056, + "step": 162900 + }, + { + "epoch": 0.8407241733351907, + "grad_norm": 21251.0625, + "learning_rate": 1.918233076489153e-05, + "loss": 0.4073, + "step": 162950 + }, + { + "epoch": 0.8409821433178035, + "grad_norm": 22723.09765625, + "learning_rate": 1.9153572608922383e-05, + "loss": 0.4041, + "step": 163000 + }, + { + "epoch": 0.8412401133004164, + "grad_norm": 23557.125, + "learning_rate": 1.9124830918178876e-05, + "loss": 0.4064, + "step": 163050 + }, + { + "epoch": 0.8414980832830292, + "grad_norm": 24273.71484375, + "learning_rate": 1.9096105708002754e-05, + "loss": 0.4072, + "step": 163100 + }, + { + "epoch": 0.841756053265642, + "grad_norm": 24078.10546875, + "learning_rate": 1.9067396993726994e-05, + "loss": 0.409, + "step": 163150 + }, + { + "epoch": 0.8420140232482548, + "grad_norm": 23370.31640625, + "learning_rate": 1.9038704790675738e-05, + "loss": 0.4082, + "step": 163200 + }, + { + "epoch": 0.8422719932308677, + "grad_norm": 23478.564453125, + "learning_rate": 1.901002911416432e-05, + "loss": 0.4082, + "step": 163250 + }, + { + "epoch": 0.8425299632134805, + "grad_norm": 22697.802734375, + "learning_rate": 1.898136997949929e-05, + "loss": 0.4107, + "step": 163300 + }, + { + "epoch": 0.8427879331960934, + "grad_norm": 25571.9765625, + "learning_rate": 1.8952727401978326e-05, + "loss": 0.3996, + "step": 163350 + }, + { + "epoch": 0.8430459031787061, + "grad_norm": 24950.283203125, + "learning_rate": 1.8924101396890264e-05, + "loss": 0.403, + "step": 163400 + }, + { + "epoch": 0.8433038731613189, + "grad_norm": 22436.380859375, + "learning_rate": 1.8895491979515162e-05, + "loss": 0.4041, + "step": 163450 + }, + { + "epoch": 0.8435618431439318, + "grad_norm": 25954.529296875, + "learning_rate": 1.8866899165124097e-05, + "loss": 0.4003, + "step": 163500 + }, + { + "epoch": 0.8438198131265446, + "grad_norm": 21477.8828125, + "learning_rate": 1.883832296897944e-05, + "loss": 0.4063, + "step": 163550 + }, + { + "epoch": 0.8440777831091574, + "grad_norm": 24669.7890625, + "learning_rate": 1.8809763406334535e-05, + "loss": 0.4049, + "step": 163600 + }, + { + "epoch": 0.8443357530917702, + "grad_norm": 27181.50390625, + "learning_rate": 1.878122049243398e-05, + "loss": 0.4007, + "step": 163650 + }, + { + "epoch": 0.8445937230743831, + "grad_norm": 25191.591796875, + "learning_rate": 1.8752694242513408e-05, + "loss": 0.4072, + "step": 163700 + }, + { + "epoch": 0.8448516930569959, + "grad_norm": 24557.42578125, + "learning_rate": 1.872418467179956e-05, + "loss": 0.4043, + "step": 163750 + }, + { + "epoch": 0.8451096630396087, + "grad_norm": 25135.6328125, + "learning_rate": 1.8695691795510335e-05, + "loss": 0.4008, + "step": 163800 + }, + { + "epoch": 0.8453676330222215, + "grad_norm": 23372.181640625, + "learning_rate": 1.8667215628854656e-05, + "loss": 0.4073, + "step": 163850 + }, + { + "epoch": 0.8456256030048344, + "grad_norm": 23332.65625, + "learning_rate": 1.8638756187032554e-05, + "loss": 0.3987, + "step": 163900 + }, + { + "epoch": 0.8458835729874472, + "grad_norm": 23423.669921875, + "learning_rate": 1.861031348523512e-05, + "loss": 0.4066, + "step": 163950 + }, + { + "epoch": 0.8461415429700601, + "grad_norm": 25873.208984375, + "learning_rate": 1.858188753864452e-05, + "loss": 0.4015, + "step": 164000 + }, + { + "epoch": 0.8463995129526728, + "grad_norm": 24766.4140625, + "learning_rate": 1.8553478362433964e-05, + "loss": 0.4076, + "step": 164050 + }, + { + "epoch": 0.8466574829352856, + "grad_norm": 25044.45703125, + "learning_rate": 1.852508597176776e-05, + "loss": 0.3972, + "step": 164100 + }, + { + "epoch": 0.8469154529178985, + "grad_norm": 23699.478515625, + "learning_rate": 1.8496710381801157e-05, + "loss": 0.3953, + "step": 164150 + }, + { + "epoch": 0.8471734229005113, + "grad_norm": 22853.53125, + "learning_rate": 1.8468351607680546e-05, + "loss": 0.4095, + "step": 164200 + }, + { + "epoch": 0.8474313928831241, + "grad_norm": 21374.96875, + "learning_rate": 1.8440009664543267e-05, + "loss": 0.4092, + "step": 164250 + }, + { + "epoch": 0.8476893628657369, + "grad_norm": 22454.515625, + "learning_rate": 1.8411684567517694e-05, + "loss": 0.4005, + "step": 164300 + }, + { + "epoch": 0.8479473328483498, + "grad_norm": 23134.24609375, + "learning_rate": 1.8383376331723258e-05, + "loss": 0.4041, + "step": 164350 + }, + { + "epoch": 0.8482053028309626, + "grad_norm": 23000.69921875, + "learning_rate": 1.835508497227028e-05, + "loss": 0.4056, + "step": 164400 + }, + { + "epoch": 0.8484632728135754, + "grad_norm": 23213.333984375, + "learning_rate": 1.8326810504260194e-05, + "loss": 0.4076, + "step": 164450 + }, + { + "epoch": 0.8487212427961882, + "grad_norm": 24883.953125, + "learning_rate": 1.8298552942785353e-05, + "loss": 0.4023, + "step": 164500 + }, + { + "epoch": 0.8489792127788011, + "grad_norm": 23075.015625, + "learning_rate": 1.827031230292908e-05, + "loss": 0.4095, + "step": 164550 + }, + { + "epoch": 0.8492371827614139, + "grad_norm": 24055.23828125, + "learning_rate": 1.824208859976569e-05, + "loss": 0.4034, + "step": 164600 + }, + { + "epoch": 0.8494951527440268, + "grad_norm": 24572.919921875, + "learning_rate": 1.8213881848360438e-05, + "loss": 0.4106, + "step": 164650 + }, + { + "epoch": 0.8497531227266395, + "grad_norm": 26111.40234375, + "learning_rate": 1.8185692063769566e-05, + "loss": 0.4051, + "step": 164700 + }, + { + "epoch": 0.8500110927092523, + "grad_norm": 22763.25, + "learning_rate": 1.8157519261040222e-05, + "loss": 0.4019, + "step": 164750 + }, + { + "epoch": 0.8502690626918652, + "grad_norm": 22230.16796875, + "learning_rate": 1.8129363455210503e-05, + "loss": 0.4085, + "step": 164800 + }, + { + "epoch": 0.850527032674478, + "grad_norm": 24729.40234375, + "learning_rate": 1.8101224661309435e-05, + "loss": 0.4042, + "step": 164850 + }, + { + "epoch": 0.8507850026570908, + "grad_norm": 23329.431640625, + "learning_rate": 1.807310289435696e-05, + "loss": 0.405, + "step": 164900 + }, + { + "epoch": 0.8510429726397036, + "grad_norm": 24267.970703125, + "learning_rate": 1.8044998169363908e-05, + "loss": 0.406, + "step": 164950 + }, + { + "epoch": 0.8513009426223165, + "grad_norm": 23587.689453125, + "learning_rate": 1.80169105013321e-05, + "loss": 0.4069, + "step": 165000 + }, + { + "epoch": 0.8513009426223165, + "eval_loss": 0.3912332057952881, + "eval_runtime": 3189.1337, + "eval_samples_per_second": 972.402, + "eval_steps_per_second": 1.899, + "step": 165000 + }, + { + "epoch": 0.8515589126049293, + "grad_norm": 23356.634765625, + "learning_rate": 1.798883990525412e-05, + "loss": 0.4022, + "step": 165050 + }, + { + "epoch": 0.8518168825875421, + "grad_norm": 23850.75, + "learning_rate": 1.7960786396113542e-05, + "loss": 0.3984, + "step": 165100 + }, + { + "epoch": 0.8520748525701549, + "grad_norm": 23898.03125, + "learning_rate": 1.7932749988884795e-05, + "loss": 0.4035, + "step": 165150 + }, + { + "epoch": 0.8523328225527678, + "grad_norm": 23517.4453125, + "learning_rate": 1.790473069853314e-05, + "loss": 0.4061, + "step": 165200 + }, + { + "epoch": 0.8525907925353806, + "grad_norm": 24264.568359375, + "learning_rate": 1.787672854001478e-05, + "loss": 0.4076, + "step": 165250 + }, + { + "epoch": 0.8528487625179934, + "grad_norm": 23741.220703125, + "learning_rate": 1.7848743528276663e-05, + "loss": 0.4063, + "step": 165300 + }, + { + "epoch": 0.8531067325006062, + "grad_norm": 25368.697265625, + "learning_rate": 1.782077567825669e-05, + "loss": 0.4027, + "step": 165350 + }, + { + "epoch": 0.853364702483219, + "grad_norm": 21610.12890625, + "learning_rate": 1.779282500488355e-05, + "loss": 0.4067, + "step": 165400 + }, + { + "epoch": 0.8536226724658319, + "grad_norm": 26066.560546875, + "learning_rate": 1.7764891523076766e-05, + "loss": 0.4091, + "step": 165450 + }, + { + "epoch": 0.8538806424484447, + "grad_norm": 22909.5234375, + "learning_rate": 1.773697524774669e-05, + "loss": 0.4035, + "step": 165500 + }, + { + "epoch": 0.8541386124310575, + "grad_norm": 23672.54296875, + "learning_rate": 1.7709076193794478e-05, + "loss": 0.407, + "step": 165550 + }, + { + "epoch": 0.8543965824136703, + "grad_norm": 22466.203125, + "learning_rate": 1.7681194376112125e-05, + "loss": 0.4057, + "step": 165600 + }, + { + "epoch": 0.8546545523962832, + "grad_norm": 23236.4296875, + "learning_rate": 1.7653329809582404e-05, + "loss": 0.4058, + "step": 165650 + }, + { + "epoch": 0.854912522378896, + "grad_norm": 23181.5, + "learning_rate": 1.7625482509078873e-05, + "loss": 0.4007, + "step": 165700 + }, + { + "epoch": 0.8551704923615088, + "grad_norm": 20621.5, + "learning_rate": 1.7597652489465877e-05, + "loss": 0.4053, + "step": 165750 + }, + { + "epoch": 0.8554284623441216, + "grad_norm": 23911.7734375, + "learning_rate": 1.756983976559855e-05, + "loss": 0.4043, + "step": 165800 + }, + { + "epoch": 0.8556864323267345, + "grad_norm": 21440.978515625, + "learning_rate": 1.7542044352322768e-05, + "loss": 0.4076, + "step": 165850 + }, + { + "epoch": 0.8559444023093473, + "grad_norm": 22439.712890625, + "learning_rate": 1.7514266264475233e-05, + "loss": 0.3999, + "step": 165900 + }, + { + "epoch": 0.8562023722919601, + "grad_norm": 24814.876953125, + "learning_rate": 1.748650551688328e-05, + "loss": 0.405, + "step": 165950 + }, + { + "epoch": 0.8564603422745729, + "grad_norm": 21705.185546875, + "learning_rate": 1.7458762124365096e-05, + "loss": 0.4007, + "step": 166000 + }, + { + "epoch": 0.8567183122571858, + "grad_norm": 25317.05078125, + "learning_rate": 1.7431036101729604e-05, + "loss": 0.4036, + "step": 166050 + }, + { + "epoch": 0.8569762822397986, + "grad_norm": 23984.142578125, + "learning_rate": 1.7403327463776343e-05, + "loss": 0.4027, + "step": 166100 + }, + { + "epoch": 0.8572342522224115, + "grad_norm": 24149.794921875, + "learning_rate": 1.7375636225295716e-05, + "loss": 0.3986, + "step": 166150 + }, + { + "epoch": 0.8574922222050242, + "grad_norm": 20085.748046875, + "learning_rate": 1.73479624010687e-05, + "loss": 0.4032, + "step": 166200 + }, + { + "epoch": 0.857750192187637, + "grad_norm": 25550.01171875, + "learning_rate": 1.732030600586711e-05, + "loss": 0.4067, + "step": 166250 + }, + { + "epoch": 0.8580081621702499, + "grad_norm": 23439.69921875, + "learning_rate": 1.7292667054453364e-05, + "loss": 0.4058, + "step": 166300 + }, + { + "epoch": 0.8582661321528627, + "grad_norm": 24064.46484375, + "learning_rate": 1.7265045561580606e-05, + "loss": 0.406, + "step": 166350 + }, + { + "epoch": 0.8585241021354755, + "grad_norm": 27679.162109375, + "learning_rate": 1.723744154199264e-05, + "loss": 0.403, + "step": 166400 + }, + { + "epoch": 0.8587820721180883, + "grad_norm": 21371.59765625, + "learning_rate": 1.7209855010423977e-05, + "loss": 0.4103, + "step": 166450 + }, + { + "epoch": 0.8590400421007012, + "grad_norm": 24340.283203125, + "learning_rate": 1.7182285981599766e-05, + "loss": 0.4073, + "step": 166500 + }, + { + "epoch": 0.859298012083314, + "grad_norm": 22603.62109375, + "learning_rate": 1.7154734470235823e-05, + "loss": 0.4026, + "step": 166550 + }, + { + "epoch": 0.8595559820659268, + "grad_norm": 21442.248046875, + "learning_rate": 1.7127200491038607e-05, + "loss": 0.4089, + "step": 166600 + }, + { + "epoch": 0.8598139520485396, + "grad_norm": 22127.478515625, + "learning_rate": 1.7099684058705212e-05, + "loss": 0.4073, + "step": 166650 + }, + { + "epoch": 0.8600719220311525, + "grad_norm": 37660.0859375, + "learning_rate": 1.707218518792342e-05, + "loss": 0.404, + "step": 166700 + }, + { + "epoch": 0.8603298920137653, + "grad_norm": 23772.982421875, + "learning_rate": 1.704470389337153e-05, + "loss": 0.4004, + "step": 166750 + }, + { + "epoch": 0.8605878619963782, + "grad_norm": 24957.23828125, + "learning_rate": 1.7017240189718575e-05, + "loss": 0.4025, + "step": 166800 + }, + { + "epoch": 0.8608458319789909, + "grad_norm": 25014.044921875, + "learning_rate": 1.6989794091624138e-05, + "loss": 0.4037, + "step": 166850 + }, + { + "epoch": 0.8611038019616037, + "grad_norm": 23370.162109375, + "learning_rate": 1.696236561373839e-05, + "loss": 0.4043, + "step": 166900 + }, + { + "epoch": 0.8613617719442166, + "grad_norm": 25212.830078125, + "learning_rate": 1.693495477070217e-05, + "loss": 0.3997, + "step": 166950 + }, + { + "epoch": 0.8616197419268294, + "grad_norm": 22828.701171875, + "learning_rate": 1.69075615771468e-05, + "loss": 0.4063, + "step": 167000 + }, + { + "epoch": 0.8618777119094422, + "grad_norm": 23862.4375, + "learning_rate": 1.6880186047694274e-05, + "loss": 0.4044, + "step": 167050 + }, + { + "epoch": 0.862135681892055, + "grad_norm": 25248.44140625, + "learning_rate": 1.685282819695711e-05, + "loss": 0.4072, + "step": 167100 + }, + { + "epoch": 0.8623936518746679, + "grad_norm": 24765.2421875, + "learning_rate": 1.68254880395384e-05, + "loss": 0.4055, + "step": 167150 + }, + { + "epoch": 0.8626516218572807, + "grad_norm": 22687.32421875, + "learning_rate": 1.6798165590031783e-05, + "loss": 0.4076, + "step": 167200 + }, + { + "epoch": 0.8629095918398935, + "grad_norm": 28427.16015625, + "learning_rate": 1.677086086302146e-05, + "loss": 0.3985, + "step": 167250 + }, + { + "epoch": 0.8631675618225063, + "grad_norm": 24114.146484375, + "learning_rate": 1.6743573873082147e-05, + "loss": 0.3993, + "step": 167300 + }, + { + "epoch": 0.8634255318051192, + "grad_norm": 22007.857421875, + "learning_rate": 1.6716304634779144e-05, + "loss": 0.4054, + "step": 167350 + }, + { + "epoch": 0.863683501787732, + "grad_norm": 24888.619140625, + "learning_rate": 1.6689053162668226e-05, + "loss": 0.3983, + "step": 167400 + }, + { + "epoch": 0.8639414717703447, + "grad_norm": 23306.1640625, + "learning_rate": 1.6661819471295704e-05, + "loss": 0.3985, + "step": 167450 + }, + { + "epoch": 0.8641994417529576, + "grad_norm": 25983.62109375, + "learning_rate": 1.6634603575198387e-05, + "loss": 0.4033, + "step": 167500 + }, + { + "epoch": 0.8644574117355704, + "grad_norm": 21851.826171875, + "learning_rate": 1.6607405488903582e-05, + "loss": 0.4067, + "step": 167550 + }, + { + "epoch": 0.8647153817181833, + "grad_norm": 23041.548828125, + "learning_rate": 1.6580225226929152e-05, + "loss": 0.4054, + "step": 167600 + }, + { + "epoch": 0.8649733517007961, + "grad_norm": 24893.72265625, + "learning_rate": 1.655306280378333e-05, + "loss": 0.4081, + "step": 167650 + }, + { + "epoch": 0.8652313216834089, + "grad_norm": 24462.869140625, + "learning_rate": 1.6525918233964933e-05, + "loss": 0.4093, + "step": 167700 + }, + { + "epoch": 0.8654892916660217, + "grad_norm": 20188.037109375, + "learning_rate": 1.6498791531963197e-05, + "loss": 0.3986, + "step": 167750 + }, + { + "epoch": 0.8657472616486346, + "grad_norm": 24806.51171875, + "learning_rate": 1.6471682712257812e-05, + "loss": 0.3988, + "step": 167800 + }, + { + "epoch": 0.8660052316312474, + "grad_norm": 21647.11328125, + "learning_rate": 1.6444591789318992e-05, + "loss": 0.4083, + "step": 167850 + }, + { + "epoch": 0.8662632016138602, + "grad_norm": 22894.3515625, + "learning_rate": 1.6417518777607277e-05, + "loss": 0.4004, + "step": 167900 + }, + { + "epoch": 0.866521171596473, + "grad_norm": 23173.974609375, + "learning_rate": 1.6390463691573765e-05, + "loss": 0.409, + "step": 167950 + }, + { + "epoch": 0.8667791415790859, + "grad_norm": 24268.001953125, + "learning_rate": 1.6363426545659927e-05, + "loss": 0.4021, + "step": 168000 + }, + { + "epoch": 0.8670371115616987, + "grad_norm": 23466.482421875, + "learning_rate": 1.6336407354297667e-05, + "loss": 0.4067, + "step": 168050 + }, + { + "epoch": 0.8672950815443115, + "grad_norm": 22965.560546875, + "learning_rate": 1.6309406131909298e-05, + "loss": 0.4127, + "step": 168100 + }, + { + "epoch": 0.8675530515269243, + "grad_norm": 22818.5859375, + "learning_rate": 1.6282422892907563e-05, + "loss": 0.4107, + "step": 168150 + }, + { + "epoch": 0.8678110215095372, + "grad_norm": 23358.80859375, + "learning_rate": 1.6255457651695565e-05, + "loss": 0.3985, + "step": 168200 + }, + { + "epoch": 0.86806899149215, + "grad_norm": 24952.044921875, + "learning_rate": 1.6228510422666865e-05, + "loss": 0.4021, + "step": 168250 + }, + { + "epoch": 0.8683269614747628, + "grad_norm": 23554.359375, + "learning_rate": 1.6201581220205353e-05, + "loss": 0.4091, + "step": 168300 + }, + { + "epoch": 0.8685849314573756, + "grad_norm": 23862.92578125, + "learning_rate": 1.6174670058685316e-05, + "loss": 0.4009, + "step": 168350 + }, + { + "epoch": 0.8688429014399884, + "grad_norm": 23549.693359375, + "learning_rate": 1.6147776952471415e-05, + "loss": 0.4062, + "step": 168400 + }, + { + "epoch": 0.8691008714226013, + "grad_norm": 25237.26953125, + "learning_rate": 1.612090191591865e-05, + "loss": 0.4009, + "step": 168450 + }, + { + "epoch": 0.8693588414052141, + "grad_norm": 24368.298828125, + "learning_rate": 1.6094044963372444e-05, + "loss": 0.4052, + "step": 168500 + }, + { + "epoch": 0.8696168113878269, + "grad_norm": 24438.0, + "learning_rate": 1.6067206109168453e-05, + "loss": 0.4077, + "step": 168550 + }, + { + "epoch": 0.8698747813704397, + "grad_norm": 30002.744140625, + "learning_rate": 1.6040385367632786e-05, + "loss": 0.4029, + "step": 168600 + }, + { + "epoch": 0.8701327513530526, + "grad_norm": 24591.333984375, + "learning_rate": 1.6013582753081824e-05, + "loss": 0.4019, + "step": 168650 + }, + { + "epoch": 0.8703907213356654, + "grad_norm": 24005.166015625, + "learning_rate": 1.5986798279822263e-05, + "loss": 0.4046, + "step": 168700 + }, + { + "epoch": 0.8706486913182782, + "grad_norm": 22198.482421875, + "learning_rate": 1.5960031962151167e-05, + "loss": 0.4003, + "step": 168750 + }, + { + "epoch": 0.870906661300891, + "grad_norm": 23392.919921875, + "learning_rate": 1.5933283814355872e-05, + "loss": 0.4039, + "step": 168800 + }, + { + "epoch": 0.8711646312835039, + "grad_norm": 26185.88671875, + "learning_rate": 1.5906553850714003e-05, + "loss": 0.4044, + "step": 168850 + }, + { + "epoch": 0.8714226012661167, + "grad_norm": 34066.59765625, + "learning_rate": 1.5879842085493514e-05, + "loss": 0.4068, + "step": 168900 + }, + { + "epoch": 0.8716805712487296, + "grad_norm": 21913.802734375, + "learning_rate": 1.5853148532952616e-05, + "loss": 0.4083, + "step": 168950 + }, + { + "epoch": 0.8719385412313423, + "grad_norm": 22491.25390625, + "learning_rate": 1.5826473207339802e-05, + "loss": 0.4037, + "step": 169000 + }, + { + "epoch": 0.8721965112139551, + "grad_norm": 23891.447265625, + "learning_rate": 1.579981612289389e-05, + "loss": 0.4033, + "step": 169050 + }, + { + "epoch": 0.872454481196568, + "grad_norm": 24374.109375, + "learning_rate": 1.5773177293843855e-05, + "loss": 0.41, + "step": 169100 + }, + { + "epoch": 0.8727124511791808, + "grad_norm": 24323.197265625, + "learning_rate": 1.574655673440903e-05, + "loss": 0.3999, + "step": 169150 + }, + { + "epoch": 0.8729704211617936, + "grad_norm": 22040.76171875, + "learning_rate": 1.5719954458798943e-05, + "loss": 0.3997, + "step": 169200 + }, + { + "epoch": 0.8732283911444064, + "grad_norm": 32067.173828125, + "learning_rate": 1.5693370481213355e-05, + "loss": 0.4028, + "step": 169250 + }, + { + "epoch": 0.8734863611270193, + "grad_norm": 27840.97265625, + "learning_rate": 1.5666804815842322e-05, + "loss": 0.4082, + "step": 169300 + }, + { + "epoch": 0.8737443311096321, + "grad_norm": 23976.154296875, + "learning_rate": 1.5640257476866033e-05, + "loss": 0.4075, + "step": 169350 + }, + { + "epoch": 0.8740023010922449, + "grad_norm": 22856.724609375, + "learning_rate": 1.5613728478454976e-05, + "loss": 0.4033, + "step": 169400 + }, + { + "epoch": 0.8742602710748577, + "grad_norm": 22639.69140625, + "learning_rate": 1.5587217834769803e-05, + "loss": 0.4052, + "step": 169450 + }, + { + "epoch": 0.8745182410574706, + "grad_norm": 24272.626953125, + "learning_rate": 1.5560725559961386e-05, + "loss": 0.4029, + "step": 169500 + }, + { + "epoch": 0.8747762110400834, + "grad_norm": 23789.333984375, + "learning_rate": 1.553425166817079e-05, + "loss": 0.4078, + "step": 169550 + }, + { + "epoch": 0.8750341810226961, + "grad_norm": 23287.294921875, + "learning_rate": 1.5507796173529248e-05, + "loss": 0.408, + "step": 169600 + }, + { + "epoch": 0.875292151005309, + "grad_norm": 22272.13671875, + "learning_rate": 1.548135909015822e-05, + "loss": 0.4017, + "step": 169650 + }, + { + "epoch": 0.8755501209879218, + "grad_norm": 24645.40234375, + "learning_rate": 1.5454940432169297e-05, + "loss": 0.4001, + "step": 169700 + }, + { + "epoch": 0.8758080909705347, + "grad_norm": 26364.072265625, + "learning_rate": 1.5428540213664243e-05, + "loss": 0.411, + "step": 169750 + }, + { + "epoch": 0.8760660609531475, + "grad_norm": 24535.76171875, + "learning_rate": 1.5402158448734987e-05, + "loss": 0.4042, + "step": 169800 + }, + { + "epoch": 0.8763240309357603, + "grad_norm": 23294.94140625, + "learning_rate": 1.53757951514636e-05, + "loss": 0.4083, + "step": 169850 + }, + { + "epoch": 0.8765820009183731, + "grad_norm": 23390.046875, + "learning_rate": 1.5349450335922295e-05, + "loss": 0.399, + "step": 169900 + }, + { + "epoch": 0.876839970900986, + "grad_norm": 23079.41796875, + "learning_rate": 1.5323124016173455e-05, + "loss": 0.4078, + "step": 169950 + }, + { + "epoch": 0.8770979408835988, + "grad_norm": 24190.23046875, + "learning_rate": 1.529681620626951e-05, + "loss": 0.4013, + "step": 170000 + }, + { + "epoch": 0.8770979408835988, + "eval_loss": 0.39030978083610535, + "eval_runtime": 3197.421, + "eval_samples_per_second": 969.882, + "eval_steps_per_second": 1.894, + "step": 170000 + }, + { + "epoch": 0.8773559108662116, + "grad_norm": 24830.658203125, + "learning_rate": 1.5270526920253098e-05, + "loss": 0.4053, + "step": 170050 + }, + { + "epoch": 0.8776138808488244, + "grad_norm": 21314.533203125, + "learning_rate": 1.5244256172156923e-05, + "loss": 0.4067, + "step": 170100 + }, + { + "epoch": 0.8778718508314373, + "grad_norm": 23271.314453125, + "learning_rate": 1.521800397600378e-05, + "loss": 0.4024, + "step": 170150 + }, + { + "epoch": 0.8781298208140501, + "grad_norm": 20112.265625, + "learning_rate": 1.5191770345806632e-05, + "loss": 0.4001, + "step": 170200 + }, + { + "epoch": 0.8783877907966628, + "grad_norm": 23957.087890625, + "learning_rate": 1.5165555295568418e-05, + "loss": 0.406, + "step": 170250 + }, + { + "epoch": 0.8786457607792757, + "grad_norm": 23699.181640625, + "learning_rate": 1.5139358839282275e-05, + "loss": 0.4005, + "step": 170300 + }, + { + "epoch": 0.8789037307618885, + "grad_norm": 23276.4453125, + "learning_rate": 1.5113180990931353e-05, + "loss": 0.4057, + "step": 170350 + }, + { + "epoch": 0.8791617007445014, + "grad_norm": 27051.26171875, + "learning_rate": 1.5087021764488867e-05, + "loss": 0.4037, + "step": 170400 + }, + { + "epoch": 0.8794196707271142, + "grad_norm": 24315.11328125, + "learning_rate": 1.5060881173918112e-05, + "loss": 0.4004, + "step": 170450 + }, + { + "epoch": 0.879677640709727, + "grad_norm": 22589.85546875, + "learning_rate": 1.5034759233172419e-05, + "loss": 0.402, + "step": 170500 + }, + { + "epoch": 0.8799356106923398, + "grad_norm": 24601.666015625, + "learning_rate": 1.5008655956195195e-05, + "loss": 0.4083, + "step": 170550 + }, + { + "epoch": 0.8801935806749527, + "grad_norm": 23203.884765625, + "learning_rate": 1.4982571356919862e-05, + "loss": 0.3971, + "step": 170600 + }, + { + "epoch": 0.8804515506575655, + "grad_norm": 28701.162109375, + "learning_rate": 1.4956505449269858e-05, + "loss": 0.3989, + "step": 170650 + }, + { + "epoch": 0.8807095206401783, + "grad_norm": 23548.541015625, + "learning_rate": 1.4930458247158668e-05, + "loss": 0.4014, + "step": 170700 + }, + { + "epoch": 0.8809674906227911, + "grad_norm": 26836.626953125, + "learning_rate": 1.4904429764489792e-05, + "loss": 0.3964, + "step": 170750 + }, + { + "epoch": 0.881225460605404, + "grad_norm": 23989.537109375, + "learning_rate": 1.4878420015156697e-05, + "loss": 0.4062, + "step": 170800 + }, + { + "epoch": 0.8814834305880168, + "grad_norm": 22008.498046875, + "learning_rate": 1.4852429013042945e-05, + "loss": 0.4034, + "step": 170850 + }, + { + "epoch": 0.8817414005706296, + "grad_norm": 22564.548828125, + "learning_rate": 1.4826456772021957e-05, + "loss": 0.3953, + "step": 170900 + }, + { + "epoch": 0.8819993705532424, + "grad_norm": 20611.005859375, + "learning_rate": 1.4800503305957264e-05, + "loss": 0.3993, + "step": 170950 + }, + { + "epoch": 0.8822573405358553, + "grad_norm": 23731.072265625, + "learning_rate": 1.4774568628702312e-05, + "loss": 0.4008, + "step": 171000 + }, + { + "epoch": 0.8825153105184681, + "grad_norm": 23515.265625, + "learning_rate": 1.4748652754100506e-05, + "loss": 0.4093, + "step": 171050 + }, + { + "epoch": 0.882773280501081, + "grad_norm": 20889.193359375, + "learning_rate": 1.4722755695985291e-05, + "loss": 0.4036, + "step": 171100 + }, + { + "epoch": 0.8830312504836937, + "grad_norm": 23561.208984375, + "learning_rate": 1.4696877468179954e-05, + "loss": 0.4009, + "step": 171150 + }, + { + "epoch": 0.8832892204663065, + "grad_norm": 29216.3046875, + "learning_rate": 1.4671018084497828e-05, + "loss": 0.4087, + "step": 171200 + }, + { + "epoch": 0.8835471904489194, + "grad_norm": 24697.615234375, + "learning_rate": 1.4645177558742147e-05, + "loss": 0.3976, + "step": 171250 + }, + { + "epoch": 0.8838051604315322, + "grad_norm": 30338.123046875, + "learning_rate": 1.4619355904706062e-05, + "loss": 0.4046, + "step": 171300 + }, + { + "epoch": 0.884063130414145, + "grad_norm": 22565.310546875, + "learning_rate": 1.4593553136172705e-05, + "loss": 0.4011, + "step": 171350 + }, + { + "epoch": 0.8843211003967578, + "grad_norm": 23498.0, + "learning_rate": 1.4567769266915077e-05, + "loss": 0.4071, + "step": 171400 + }, + { + "epoch": 0.8845790703793707, + "grad_norm": 23772.279296875, + "learning_rate": 1.4542004310696112e-05, + "loss": 0.4048, + "step": 171450 + }, + { + "epoch": 0.8848370403619835, + "grad_norm": 22418.015625, + "learning_rate": 1.4516258281268636e-05, + "loss": 0.4009, + "step": 171500 + }, + { + "epoch": 0.8850950103445963, + "grad_norm": 25706.166015625, + "learning_rate": 1.4490531192375395e-05, + "loss": 0.4017, + "step": 171550 + }, + { + "epoch": 0.8853529803272091, + "grad_norm": 23563.37890625, + "learning_rate": 1.4464823057748982e-05, + "loss": 0.4056, + "step": 171600 + }, + { + "epoch": 0.885610950309822, + "grad_norm": 23104.65234375, + "learning_rate": 1.4439133891111956e-05, + "loss": 0.4014, + "step": 171650 + }, + { + "epoch": 0.8858689202924348, + "grad_norm": 22858.935546875, + "learning_rate": 1.4413463706176627e-05, + "loss": 0.4047, + "step": 171700 + }, + { + "epoch": 0.8861268902750475, + "grad_norm": 23197.859375, + "learning_rate": 1.4387812516645299e-05, + "loss": 0.4032, + "step": 171750 + }, + { + "epoch": 0.8863848602576604, + "grad_norm": 22323.4609375, + "learning_rate": 1.4362180336210057e-05, + "loss": 0.4018, + "step": 171800 + }, + { + "epoch": 0.8866428302402732, + "grad_norm": 23677.431640625, + "learning_rate": 1.433656717855285e-05, + "loss": 0.4057, + "step": 171850 + }, + { + "epoch": 0.8869008002228861, + "grad_norm": 22975.283203125, + "learning_rate": 1.4310973057345538e-05, + "loss": 0.4077, + "step": 171900 + }, + { + "epoch": 0.8871587702054989, + "grad_norm": 23338.005859375, + "learning_rate": 1.4285397986249694e-05, + "loss": 0.4037, + "step": 171950 + }, + { + "epoch": 0.8874167401881117, + "grad_norm": 22469.08203125, + "learning_rate": 1.4259841978916849e-05, + "loss": 0.4025, + "step": 172000 + }, + { + "epoch": 0.8876747101707245, + "grad_norm": 23508.064453125, + "learning_rate": 1.4234305048988288e-05, + "loss": 0.3979, + "step": 172050 + }, + { + "epoch": 0.8879326801533374, + "grad_norm": 25113.62890625, + "learning_rate": 1.4208787210095126e-05, + "loss": 0.3988, + "step": 172100 + }, + { + "epoch": 0.8881906501359502, + "grad_norm": 23230.75, + "learning_rate": 1.4183288475858298e-05, + "loss": 0.4029, + "step": 172150 + }, + { + "epoch": 0.888448620118563, + "grad_norm": 22058.306640625, + "learning_rate": 1.4157808859888516e-05, + "loss": 0.4082, + "step": 172200 + }, + { + "epoch": 0.8887065901011758, + "grad_norm": 23375.91015625, + "learning_rate": 1.4132348375786336e-05, + "loss": 0.407, + "step": 172250 + }, + { + "epoch": 0.8889645600837887, + "grad_norm": 21199.943359375, + "learning_rate": 1.4106907037142059e-05, + "loss": 0.4039, + "step": 172300 + }, + { + "epoch": 0.8892225300664015, + "grad_norm": 22754.287109375, + "learning_rate": 1.4081484857535777e-05, + "loss": 0.4, + "step": 172350 + }, + { + "epoch": 0.8894805000490142, + "grad_norm": 23116.21484375, + "learning_rate": 1.405608185053735e-05, + "loss": 0.4026, + "step": 172400 + }, + { + "epoch": 0.8897384700316271, + "grad_norm": 22281.65625, + "learning_rate": 1.4030698029706423e-05, + "loss": 0.3992, + "step": 172450 + }, + { + "epoch": 0.8899964400142399, + "grad_norm": 22979.447265625, + "learning_rate": 1.400533340859237e-05, + "loss": 0.4027, + "step": 172500 + }, + { + "epoch": 0.8902544099968528, + "grad_norm": 25733.873046875, + "learning_rate": 1.3979988000734373e-05, + "loss": 0.4092, + "step": 172550 + }, + { + "epoch": 0.8905123799794656, + "grad_norm": 23825.38671875, + "learning_rate": 1.395466181966127e-05, + "loss": 0.3997, + "step": 172600 + }, + { + "epoch": 0.8907703499620784, + "grad_norm": 27504.0703125, + "learning_rate": 1.3929354878891715e-05, + "loss": 0.403, + "step": 172650 + }, + { + "epoch": 0.8910283199446912, + "grad_norm": 28201.208984375, + "learning_rate": 1.3904067191934067e-05, + "loss": 0.4029, + "step": 172700 + }, + { + "epoch": 0.8912862899273041, + "grad_norm": 24115.69140625, + "learning_rate": 1.3878798772286377e-05, + "loss": 0.3979, + "step": 172750 + }, + { + "epoch": 0.8915442599099169, + "grad_norm": 20489.552734375, + "learning_rate": 1.3853549633436491e-05, + "loss": 0.4001, + "step": 172800 + }, + { + "epoch": 0.8918022298925297, + "grad_norm": 23580.583984375, + "learning_rate": 1.3828319788861838e-05, + "loss": 0.3983, + "step": 172850 + }, + { + "epoch": 0.8920601998751425, + "grad_norm": 24172.771484375, + "learning_rate": 1.3803109252029678e-05, + "loss": 0.4081, + "step": 172900 + }, + { + "epoch": 0.8923181698577554, + "grad_norm": 26543.375, + "learning_rate": 1.3777918036396887e-05, + "loss": 0.4015, + "step": 172950 + }, + { + "epoch": 0.8925761398403682, + "grad_norm": 27849.654296875, + "learning_rate": 1.3752746155410046e-05, + "loss": 0.4045, + "step": 173000 + }, + { + "epoch": 0.892834109822981, + "grad_norm": 25752.724609375, + "learning_rate": 1.3727593622505424e-05, + "loss": 0.4022, + "step": 173050 + }, + { + "epoch": 0.8930920798055938, + "grad_norm": 22836.892578125, + "learning_rate": 1.3702460451108935e-05, + "loss": 0.4015, + "step": 173100 + }, + { + "epoch": 0.8933500497882066, + "grad_norm": 26556.62890625, + "learning_rate": 1.3677346654636208e-05, + "loss": 0.4017, + "step": 173150 + }, + { + "epoch": 0.8936080197708195, + "grad_norm": 24310.390625, + "learning_rate": 1.3652252246492492e-05, + "loss": 0.4015, + "step": 173200 + }, + { + "epoch": 0.8938659897534323, + "grad_norm": 23713.0859375, + "learning_rate": 1.3627177240072698e-05, + "loss": 0.4024, + "step": 173250 + }, + { + "epoch": 0.8941239597360451, + "grad_norm": 21189.57421875, + "learning_rate": 1.3602121648761373e-05, + "loss": 0.4012, + "step": 173300 + }, + { + "epoch": 0.8943819297186579, + "grad_norm": 24229.1484375, + "learning_rate": 1.3577085485932705e-05, + "loss": 0.4105, + "step": 173350 + }, + { + "epoch": 0.8946398997012708, + "grad_norm": 23998.22265625, + "learning_rate": 1.3552068764950504e-05, + "loss": 0.4004, + "step": 173400 + }, + { + "epoch": 0.8948978696838836, + "grad_norm": 24751.1171875, + "learning_rate": 1.3527071499168253e-05, + "loss": 0.4024, + "step": 173450 + }, + { + "epoch": 0.8951558396664964, + "grad_norm": 24872.029296875, + "learning_rate": 1.3502093701928948e-05, + "loss": 0.406, + "step": 173500 + }, + { + "epoch": 0.8954138096491092, + "grad_norm": 23180.771484375, + "learning_rate": 1.3477135386565297e-05, + "loss": 0.4041, + "step": 173550 + }, + { + "epoch": 0.8956717796317221, + "grad_norm": 23679.1484375, + "learning_rate": 1.3452196566399555e-05, + "loss": 0.4095, + "step": 173600 + }, + { + "epoch": 0.8959297496143349, + "grad_norm": 26730.537109375, + "learning_rate": 1.3427277254743565e-05, + "loss": 0.4058, + "step": 173650 + }, + { + "epoch": 0.8961877195969477, + "grad_norm": 23320.666015625, + "learning_rate": 1.3402377464898813e-05, + "loss": 0.4038, + "step": 173700 + }, + { + "epoch": 0.8964456895795605, + "grad_norm": 22802.87890625, + "learning_rate": 1.3377497210156276e-05, + "loss": 0.3977, + "step": 173750 + }, + { + "epoch": 0.8967036595621733, + "grad_norm": 21257.22265625, + "learning_rate": 1.3352636503796584e-05, + "loss": 0.4074, + "step": 173800 + }, + { + "epoch": 0.8969616295447862, + "grad_norm": 23935.412109375, + "learning_rate": 1.332779535908989e-05, + "loss": 0.4021, + "step": 173850 + }, + { + "epoch": 0.8972195995273989, + "grad_norm": 21819.267578125, + "learning_rate": 1.3302973789295925e-05, + "loss": 0.3992, + "step": 173900 + }, + { + "epoch": 0.8974775695100118, + "grad_norm": 23360.71875, + "learning_rate": 1.327817180766393e-05, + "loss": 0.4051, + "step": 173950 + }, + { + "epoch": 0.8977355394926246, + "grad_norm": 24474.685546875, + "learning_rate": 1.3253389427432772e-05, + "loss": 0.4046, + "step": 174000 + }, + { + "epoch": 0.8979935094752375, + "grad_norm": 29715.3359375, + "learning_rate": 1.3228626661830779e-05, + "loss": 0.4037, + "step": 174050 + }, + { + "epoch": 0.8982514794578503, + "grad_norm": 23241.20703125, + "learning_rate": 1.3203883524075833e-05, + "loss": 0.4003, + "step": 174100 + }, + { + "epoch": 0.8985094494404631, + "grad_norm": 26005.23828125, + "learning_rate": 1.3179160027375347e-05, + "loss": 0.3992, + "step": 174150 + }, + { + "epoch": 0.8987674194230759, + "grad_norm": 23322.212890625, + "learning_rate": 1.3154456184926234e-05, + "loss": 0.4037, + "step": 174200 + }, + { + "epoch": 0.8990253894056888, + "grad_norm": 22434.90234375, + "learning_rate": 1.3129772009914964e-05, + "loss": 0.4044, + "step": 174250 + }, + { + "epoch": 0.8992833593883016, + "grad_norm": 24753.904296875, + "learning_rate": 1.3105107515517418e-05, + "loss": 0.4034, + "step": 174300 + }, + { + "epoch": 0.8995413293709144, + "grad_norm": 23271.814453125, + "learning_rate": 1.3080462714899066e-05, + "loss": 0.3992, + "step": 174350 + }, + { + "epoch": 0.8997992993535272, + "grad_norm": 23929.7578125, + "learning_rate": 1.3055837621214811e-05, + "loss": 0.4018, + "step": 174400 + }, + { + "epoch": 0.90005726933614, + "grad_norm": 25211.7265625, + "learning_rate": 1.3031232247609037e-05, + "loss": 0.4052, + "step": 174450 + }, + { + "epoch": 0.9003152393187529, + "grad_norm": 24554.791015625, + "learning_rate": 1.300664660721566e-05, + "loss": 0.3987, + "step": 174500 + }, + { + "epoch": 0.9005732093013656, + "grad_norm": 26028.396484375, + "learning_rate": 1.2982080713157963e-05, + "loss": 0.4032, + "step": 174550 + }, + { + "epoch": 0.9008311792839785, + "grad_norm": 24228.72265625, + "learning_rate": 1.295753457854878e-05, + "loss": 0.4001, + "step": 174600 + }, + { + "epoch": 0.9010891492665913, + "grad_norm": 24043.064453125, + "learning_rate": 1.293300821649036e-05, + "loss": 0.4009, + "step": 174650 + }, + { + "epoch": 0.9013471192492042, + "grad_norm": 25628.208984375, + "learning_rate": 1.2908501640074388e-05, + "loss": 0.4058, + "step": 174700 + }, + { + "epoch": 0.901605089231817, + "grad_norm": 23927.81640625, + "learning_rate": 1.288401486238201e-05, + "loss": 0.4044, + "step": 174750 + }, + { + "epoch": 0.9018630592144298, + "grad_norm": 23615.923828125, + "learning_rate": 1.2859547896483793e-05, + "loss": 0.4042, + "step": 174800 + }, + { + "epoch": 0.9021210291970426, + "grad_norm": 24990.158203125, + "learning_rate": 1.2835100755439705e-05, + "loss": 0.4033, + "step": 174850 + }, + { + "epoch": 0.9023789991796555, + "grad_norm": 23908.240234375, + "learning_rate": 1.2810673452299194e-05, + "loss": 0.404, + "step": 174900 + }, + { + "epoch": 0.9026369691622683, + "grad_norm": 24776.828125, + "learning_rate": 1.278626600010106e-05, + "loss": 0.4017, + "step": 174950 + }, + { + "epoch": 0.9028949391448811, + "grad_norm": 23400.912109375, + "learning_rate": 1.276187841187354e-05, + "loss": 0.4007, + "step": 175000 + }, + { + "epoch": 0.9028949391448811, + "eval_loss": 0.389443963766098, + "eval_runtime": 3184.6844, + "eval_samples_per_second": 973.761, + "eval_steps_per_second": 1.902, + "step": 175000 + }, + { + "epoch": 0.9031529091274939, + "grad_norm": 23482.337890625, + "learning_rate": 1.2737510700634248e-05, + "loss": 0.4033, + "step": 175050 + }, + { + "epoch": 0.9034108791101068, + "grad_norm": 24351.23828125, + "learning_rate": 1.2713162879390183e-05, + "loss": 0.4031, + "step": 175100 + }, + { + "epoch": 0.9036688490927196, + "grad_norm": 28495.6796875, + "learning_rate": 1.2688834961137785e-05, + "loss": 0.4057, + "step": 175150 + }, + { + "epoch": 0.9039268190753323, + "grad_norm": 23276.583984375, + "learning_rate": 1.2664526958862765e-05, + "loss": 0.4036, + "step": 175200 + }, + { + "epoch": 0.9041847890579452, + "grad_norm": 22784.033203125, + "learning_rate": 1.2640238885540312e-05, + "loss": 0.4054, + "step": 175250 + }, + { + "epoch": 0.904442759040558, + "grad_norm": 22389.21484375, + "learning_rate": 1.2615970754134914e-05, + "loss": 0.4036, + "step": 175300 + }, + { + "epoch": 0.9047007290231709, + "grad_norm": 24767.59375, + "learning_rate": 1.2591722577600412e-05, + "loss": 0.4055, + "step": 175350 + }, + { + "epoch": 0.9049586990057837, + "grad_norm": 24981.552734375, + "learning_rate": 1.2567494368880056e-05, + "loss": 0.3997, + "step": 175400 + }, + { + "epoch": 0.9052166689883965, + "grad_norm": 24523.580078125, + "learning_rate": 1.254328614090634e-05, + "loss": 0.4009, + "step": 175450 + }, + { + "epoch": 0.9054746389710093, + "grad_norm": 29571.404296875, + "learning_rate": 1.251909790660119e-05, + "loss": 0.4013, + "step": 175500 + }, + { + "epoch": 0.9057326089536222, + "grad_norm": 23286.564453125, + "learning_rate": 1.24949296788758e-05, + "loss": 0.3997, + "step": 175550 + }, + { + "epoch": 0.905990578936235, + "grad_norm": 23124.205078125, + "learning_rate": 1.247078147063071e-05, + "loss": 0.4056, + "step": 175600 + }, + { + "epoch": 0.9062485489188478, + "grad_norm": 23467.775390625, + "learning_rate": 1.2446653294755755e-05, + "loss": 0.3976, + "step": 175650 + }, + { + "epoch": 0.9065065189014606, + "grad_norm": 23793.609375, + "learning_rate": 1.2422545164130096e-05, + "loss": 0.4018, + "step": 175700 + }, + { + "epoch": 0.9067644888840735, + "grad_norm": 24439.974609375, + "learning_rate": 1.2398457091622167e-05, + "loss": 0.4063, + "step": 175750 + }, + { + "epoch": 0.9070224588666863, + "grad_norm": 23925.22265625, + "learning_rate": 1.2374389090089744e-05, + "loss": 0.4039, + "step": 175800 + }, + { + "epoch": 0.907280428849299, + "grad_norm": 23174.416015625, + "learning_rate": 1.2350341172379853e-05, + "loss": 0.4031, + "step": 175850 + }, + { + "epoch": 0.9075383988319119, + "grad_norm": 26669.806640625, + "learning_rate": 1.2326313351328794e-05, + "loss": 0.4031, + "step": 175900 + }, + { + "epoch": 0.9077963688145247, + "grad_norm": 21128.041015625, + "learning_rate": 1.2302305639762168e-05, + "loss": 0.407, + "step": 175950 + }, + { + "epoch": 0.9080543387971376, + "grad_norm": 22798.111328125, + "learning_rate": 1.2278318050494797e-05, + "loss": 0.4035, + "step": 176000 + }, + { + "epoch": 0.9083123087797504, + "grad_norm": 23327.587890625, + "learning_rate": 1.2254350596330843e-05, + "loss": 0.3958, + "step": 176050 + }, + { + "epoch": 0.9085702787623632, + "grad_norm": 22225.3125, + "learning_rate": 1.2230403290063613e-05, + "loss": 0.4074, + "step": 176100 + }, + { + "epoch": 0.908828248744976, + "grad_norm": 22727.791015625, + "learning_rate": 1.2206476144475754e-05, + "loss": 0.4063, + "step": 176150 + }, + { + "epoch": 0.9090862187275889, + "grad_norm": 26138.931640625, + "learning_rate": 1.2182569172339098e-05, + "loss": 0.408, + "step": 176200 + }, + { + "epoch": 0.9093441887102017, + "grad_norm": 23436.91796875, + "learning_rate": 1.2158682386414716e-05, + "loss": 0.4038, + "step": 176250 + }, + { + "epoch": 0.9096021586928145, + "grad_norm": 23695.244140625, + "learning_rate": 1.2134815799452947e-05, + "loss": 0.4074, + "step": 176300 + }, + { + "epoch": 0.9098601286754273, + "grad_norm": 25616.240234375, + "learning_rate": 1.2110969424193263e-05, + "loss": 0.3971, + "step": 176350 + }, + { + "epoch": 0.9101180986580402, + "grad_norm": 27326.634765625, + "learning_rate": 1.2087143273364431e-05, + "loss": 0.4045, + "step": 176400 + }, + { + "epoch": 0.910376068640653, + "grad_norm": 23704.775390625, + "learning_rate": 1.2063337359684384e-05, + "loss": 0.4071, + "step": 176450 + }, + { + "epoch": 0.9106340386232658, + "grad_norm": 25532.234375, + "learning_rate": 1.2039551695860251e-05, + "loss": 0.4021, + "step": 176500 + }, + { + "epoch": 0.9108920086058786, + "grad_norm": 25247.884765625, + "learning_rate": 1.201578629458835e-05, + "loss": 0.4074, + "step": 176550 + }, + { + "epoch": 0.9111499785884914, + "grad_norm": 29377.486328125, + "learning_rate": 1.1992041168554236e-05, + "loss": 0.4064, + "step": 176600 + }, + { + "epoch": 0.9114079485711043, + "grad_norm": 22188.34375, + "learning_rate": 1.1968316330432527e-05, + "loss": 0.404, + "step": 176650 + }, + { + "epoch": 0.911665918553717, + "grad_norm": 23766.0546875, + "learning_rate": 1.194461179288714e-05, + "loss": 0.4016, + "step": 176700 + }, + { + "epoch": 0.9119238885363299, + "grad_norm": 21386.623046875, + "learning_rate": 1.1920927568571078e-05, + "loss": 0.4055, + "step": 176750 + }, + { + "epoch": 0.9121818585189427, + "grad_norm": 25873.052734375, + "learning_rate": 1.1897263670126507e-05, + "loss": 0.3978, + "step": 176800 + }, + { + "epoch": 0.9124398285015556, + "grad_norm": 25235.5390625, + "learning_rate": 1.1873620110184803e-05, + "loss": 0.3975, + "step": 176850 + }, + { + "epoch": 0.9126977984841684, + "grad_norm": 22841.5, + "learning_rate": 1.1849996901366383e-05, + "loss": 0.4031, + "step": 176900 + }, + { + "epoch": 0.9129557684667812, + "grad_norm": 21522.388671875, + "learning_rate": 1.1826394056280893e-05, + "loss": 0.4048, + "step": 176950 + }, + { + "epoch": 0.913213738449394, + "grad_norm": 27600.689453125, + "learning_rate": 1.1802811587527074e-05, + "loss": 0.3984, + "step": 177000 + }, + { + "epoch": 0.9134717084320069, + "grad_norm": 24698.60546875, + "learning_rate": 1.177924950769278e-05, + "loss": 0.406, + "step": 177050 + }, + { + "epoch": 0.9137296784146197, + "grad_norm": 27378.033203125, + "learning_rate": 1.1755707829355001e-05, + "loss": 0.3993, + "step": 177100 + }, + { + "epoch": 0.9139876483972325, + "grad_norm": 27578.4296875, + "learning_rate": 1.1732186565079805e-05, + "loss": 0.3984, + "step": 177150 + }, + { + "epoch": 0.9142456183798453, + "grad_norm": 24650.6953125, + "learning_rate": 1.1708685727422424e-05, + "loss": 0.401, + "step": 177200 + }, + { + "epoch": 0.9145035883624582, + "grad_norm": 25550.0859375, + "learning_rate": 1.1685205328927135e-05, + "loss": 0.399, + "step": 177250 + }, + { + "epoch": 0.914761558345071, + "grad_norm": 22760.77734375, + "learning_rate": 1.166174538212732e-05, + "loss": 0.403, + "step": 177300 + }, + { + "epoch": 0.9150195283276837, + "grad_norm": 22038.26171875, + "learning_rate": 1.1638305899545443e-05, + "loss": 0.4066, + "step": 177350 + }, + { + "epoch": 0.9152774983102966, + "grad_norm": 23857.66015625, + "learning_rate": 1.1614886893693044e-05, + "loss": 0.4038, + "step": 177400 + }, + { + "epoch": 0.9155354682929094, + "grad_norm": 24813.55859375, + "learning_rate": 1.1591488377070724e-05, + "loss": 0.3992, + "step": 177450 + }, + { + "epoch": 0.9157934382755223, + "grad_norm": 24467.5859375, + "learning_rate": 1.1568110362168199e-05, + "loss": 0.4, + "step": 177500 + }, + { + "epoch": 0.9160514082581351, + "grad_norm": 22464.98046875, + "learning_rate": 1.1544752861464143e-05, + "loss": 0.4069, + "step": 177550 + }, + { + "epoch": 0.9163093782407479, + "grad_norm": 26591.51171875, + "learning_rate": 1.1521415887426379e-05, + "loss": 0.4008, + "step": 177600 + }, + { + "epoch": 0.9165673482233607, + "grad_norm": 21086.318359375, + "learning_rate": 1.1498099452511724e-05, + "loss": 0.4036, + "step": 177650 + }, + { + "epoch": 0.9168253182059736, + "grad_norm": 24243.072265625, + "learning_rate": 1.147480356916602e-05, + "loss": 0.4019, + "step": 177700 + }, + { + "epoch": 0.9170832881885864, + "grad_norm": 26714.83984375, + "learning_rate": 1.1451528249824206e-05, + "loss": 0.3978, + "step": 177750 + }, + { + "epoch": 0.9173412581711992, + "grad_norm": 24799.712890625, + "learning_rate": 1.1428273506910132e-05, + "loss": 0.4078, + "step": 177800 + }, + { + "epoch": 0.917599228153812, + "grad_norm": 25010.435546875, + "learning_rate": 1.1405039352836777e-05, + "loss": 0.4054, + "step": 177850 + }, + { + "epoch": 0.9178571981364249, + "grad_norm": 23657.78125, + "learning_rate": 1.1381825800006068e-05, + "loss": 0.4001, + "step": 177900 + }, + { + "epoch": 0.9181151681190377, + "grad_norm": 23865.349609375, + "learning_rate": 1.1358632860808955e-05, + "loss": 0.4012, + "step": 177950 + }, + { + "epoch": 0.9183731381016504, + "grad_norm": 26476.04296875, + "learning_rate": 1.1335460547625365e-05, + "loss": 0.3998, + "step": 178000 + }, + { + "epoch": 0.9186311080842633, + "grad_norm": 24907.89453125, + "learning_rate": 1.1312308872824235e-05, + "loss": 0.401, + "step": 178050 + }, + { + "epoch": 0.9188890780668761, + "grad_norm": 24008.54296875, + "learning_rate": 1.1289177848763494e-05, + "loss": 0.3991, + "step": 178100 + }, + { + "epoch": 0.919147048049489, + "grad_norm": 23814.396484375, + "learning_rate": 1.1266067487790027e-05, + "loss": 0.4039, + "step": 178150 + }, + { + "epoch": 0.9194050180321018, + "grad_norm": 25892.994140625, + "learning_rate": 1.1242977802239696e-05, + "loss": 0.4015, + "step": 178200 + }, + { + "epoch": 0.9196629880147146, + "grad_norm": 24185.7265625, + "learning_rate": 1.1219908804437328e-05, + "loss": 0.3992, + "step": 178250 + }, + { + "epoch": 0.9199209579973274, + "grad_norm": 23890.54296875, + "learning_rate": 1.1196860506696705e-05, + "loss": 0.4087, + "step": 178300 + }, + { + "epoch": 0.9201789279799403, + "grad_norm": 25288.83203125, + "learning_rate": 1.1173832921320554e-05, + "loss": 0.4038, + "step": 178350 + }, + { + "epoch": 0.9204368979625531, + "grad_norm": 27609.994140625, + "learning_rate": 1.1150826060600594e-05, + "loss": 0.4047, + "step": 178400 + }, + { + "epoch": 0.9206948679451659, + "grad_norm": 25010.259765625, + "learning_rate": 1.112783993681738e-05, + "loss": 0.4037, + "step": 178450 + }, + { + "epoch": 0.9209528379277787, + "grad_norm": 23663.78515625, + "learning_rate": 1.1104874562240514e-05, + "loss": 0.396, + "step": 178500 + }, + { + "epoch": 0.9212108079103916, + "grad_norm": 24960.072265625, + "learning_rate": 1.108192994912844e-05, + "loss": 0.4024, + "step": 178550 + }, + { + "epoch": 0.9214687778930044, + "grad_norm": 22778.66796875, + "learning_rate": 1.1059006109728543e-05, + "loss": 0.4039, + "step": 178600 + }, + { + "epoch": 0.9217267478756171, + "grad_norm": 20177.640625, + "learning_rate": 1.1036103056277165e-05, + "loss": 0.4008, + "step": 178650 + }, + { + "epoch": 0.92198471785823, + "grad_norm": 25084.703125, + "learning_rate": 1.1013220800999452e-05, + "loss": 0.4082, + "step": 178700 + }, + { + "epoch": 0.9222426878408428, + "grad_norm": 23697.529296875, + "learning_rate": 1.0990359356109558e-05, + "loss": 0.4083, + "step": 178750 + }, + { + "epoch": 0.9225006578234557, + "grad_norm": 26252.25, + "learning_rate": 1.0967518733810462e-05, + "loss": 0.4114, + "step": 178800 + }, + { + "epoch": 0.9227586278060684, + "grad_norm": 25295.103515625, + "learning_rate": 1.094469894629403e-05, + "loss": 0.4062, + "step": 178850 + }, + { + "epoch": 0.9230165977886813, + "grad_norm": 24484.203125, + "learning_rate": 1.0921900005741053e-05, + "loss": 0.4008, + "step": 178900 + }, + { + "epoch": 0.9232745677712941, + "grad_norm": 23360.701171875, + "learning_rate": 1.0899121924321154e-05, + "loss": 0.405, + "step": 178950 + }, + { + "epoch": 0.923532537753907, + "grad_norm": 22507.24609375, + "learning_rate": 1.0876364714192822e-05, + "loss": 0.3968, + "step": 179000 + }, + { + "epoch": 0.9237905077365198, + "grad_norm": 26761.66015625, + "learning_rate": 1.0853628387503423e-05, + "loss": 0.4021, + "step": 179050 + }, + { + "epoch": 0.9240484777191326, + "grad_norm": 26596.376953125, + "learning_rate": 1.0830912956389166e-05, + "loss": 0.3984, + "step": 179100 + }, + { + "epoch": 0.9243064477017454, + "grad_norm": 23996.490234375, + "learning_rate": 1.0808218432975093e-05, + "loss": 0.3996, + "step": 179150 + }, + { + "epoch": 0.9245644176843583, + "grad_norm": 22681.4609375, + "learning_rate": 1.0785544829375143e-05, + "loss": 0.4021, + "step": 179200 + }, + { + "epoch": 0.9248223876669711, + "grad_norm": 25675.728515625, + "learning_rate": 1.0762892157691995e-05, + "loss": 0.3942, + "step": 179250 + }, + { + "epoch": 0.9250803576495839, + "grad_norm": 26039.25, + "learning_rate": 1.0740260430017247e-05, + "loss": 0.4014, + "step": 179300 + }, + { + "epoch": 0.9253383276321967, + "grad_norm": 21596.50390625, + "learning_rate": 1.0717649658431256e-05, + "loss": 0.4017, + "step": 179350 + }, + { + "epoch": 0.9255962976148095, + "grad_norm": 25318.3125, + "learning_rate": 1.0695059855003204e-05, + "loss": 0.3968, + "step": 179400 + }, + { + "epoch": 0.9258542675974224, + "grad_norm": 20999.10546875, + "learning_rate": 1.0672491031791137e-05, + "loss": 0.4032, + "step": 179450 + }, + { + "epoch": 0.9261122375800351, + "grad_norm": 25034.404296875, + "learning_rate": 1.0649943200841794e-05, + "loss": 0.3987, + "step": 179500 + }, + { + "epoch": 0.926370207562648, + "grad_norm": 23470.205078125, + "learning_rate": 1.0627416374190819e-05, + "loss": 0.4009, + "step": 179550 + }, + { + "epoch": 0.9266281775452608, + "grad_norm": 23667.298828125, + "learning_rate": 1.0604910563862575e-05, + "loss": 0.4022, + "step": 179600 + }, + { + "epoch": 0.9268861475278737, + "grad_norm": 25315.5390625, + "learning_rate": 1.058242578187023e-05, + "loss": 0.4023, + "step": 179650 + }, + { + "epoch": 0.9271441175104865, + "grad_norm": 23639.34375, + "learning_rate": 1.0559962040215727e-05, + "loss": 0.407, + "step": 179700 + }, + { + "epoch": 0.9274020874930993, + "grad_norm": 29350.244140625, + "learning_rate": 1.0537519350889764e-05, + "loss": 0.4063, + "step": 179750 + }, + { + "epoch": 0.9276600574757121, + "grad_norm": 26077.30859375, + "learning_rate": 1.051509772587183e-05, + "loss": 0.4011, + "step": 179800 + }, + { + "epoch": 0.927918027458325, + "grad_norm": 22387.8046875, + "learning_rate": 1.0492697177130157e-05, + "loss": 0.398, + "step": 179850 + }, + { + "epoch": 0.9281759974409378, + "grad_norm": 24023.2734375, + "learning_rate": 1.0470317716621719e-05, + "loss": 0.4026, + "step": 179900 + }, + { + "epoch": 0.9284339674235506, + "grad_norm": 24288.666015625, + "learning_rate": 1.044795935629223e-05, + "loss": 0.403, + "step": 179950 + }, + { + "epoch": 0.9286919374061634, + "grad_norm": 26163.923828125, + "learning_rate": 1.042562210807616e-05, + "loss": 0.4001, + "step": 180000 + }, + { + "epoch": 0.9286919374061634, + "eval_loss": 0.3886363208293915, + "eval_runtime": 3188.2841, + "eval_samples_per_second": 972.661, + "eval_steps_per_second": 1.9, + "step": 180000 + }, + { + "epoch": 0.9289499073887763, + "grad_norm": 24379.322265625, + "learning_rate": 1.0403305983896683e-05, + "loss": 0.3978, + "step": 180050 + }, + { + "epoch": 0.9292078773713891, + "grad_norm": 23249.939453125, + "learning_rate": 1.0381010995665752e-05, + "loss": 0.4055, + "step": 180100 + }, + { + "epoch": 0.9294658473540018, + "grad_norm": 25460.6875, + "learning_rate": 1.0358737155283942e-05, + "loss": 0.4059, + "step": 180150 + }, + { + "epoch": 0.9297238173366147, + "grad_norm": 23166.548828125, + "learning_rate": 1.0336484474640651e-05, + "loss": 0.4051, + "step": 180200 + }, + { + "epoch": 0.9299817873192275, + "grad_norm": 23631.94921875, + "learning_rate": 1.0314252965613908e-05, + "loss": 0.3974, + "step": 180250 + }, + { + "epoch": 0.9302397573018404, + "grad_norm": 26213.556640625, + "learning_rate": 1.0292042640070449e-05, + "loss": 0.3983, + "step": 180300 + }, + { + "epoch": 0.9304977272844532, + "grad_norm": 24056.875, + "learning_rate": 1.0269853509865751e-05, + "loss": 0.3979, + "step": 180350 + }, + { + "epoch": 0.930755697267066, + "grad_norm": 24793.658203125, + "learning_rate": 1.0247685586843897e-05, + "loss": 0.3993, + "step": 180400 + }, + { + "epoch": 0.9310136672496788, + "grad_norm": 25296.04296875, + "learning_rate": 1.0225538882837733e-05, + "loss": 0.4047, + "step": 180450 + }, + { + "epoch": 0.9312716372322917, + "grad_norm": 21486.990234375, + "learning_rate": 1.0203413409668722e-05, + "loss": 0.3995, + "step": 180500 + }, + { + "epoch": 0.9315296072149045, + "grad_norm": 24168.083984375, + "learning_rate": 1.018130917914702e-05, + "loss": 0.4081, + "step": 180550 + }, + { + "epoch": 0.9317875771975173, + "grad_norm": 25313.568359375, + "learning_rate": 1.0159226203071431e-05, + "loss": 0.4024, + "step": 180600 + }, + { + "epoch": 0.9320455471801301, + "grad_norm": 22535.845703125, + "learning_rate": 1.0137164493229411e-05, + "loss": 0.3974, + "step": 180650 + }, + { + "epoch": 0.932303517162743, + "grad_norm": 24480.0703125, + "learning_rate": 1.0115124061397102e-05, + "loss": 0.4031, + "step": 180700 + }, + { + "epoch": 0.9325614871453558, + "grad_norm": 29667.470703125, + "learning_rate": 1.0093104919339241e-05, + "loss": 0.3991, + "step": 180750 + }, + { + "epoch": 0.9328194571279685, + "grad_norm": 22311.767578125, + "learning_rate": 1.0071107078809228e-05, + "loss": 0.402, + "step": 180800 + }, + { + "epoch": 0.9330774271105814, + "grad_norm": 22752.642578125, + "learning_rate": 1.0049130551549068e-05, + "loss": 0.4022, + "step": 180850 + }, + { + "epoch": 0.9333353970931942, + "grad_norm": 26333.43359375, + "learning_rate": 1.0027175349289424e-05, + "loss": 0.4006, + "step": 180900 + }, + { + "epoch": 0.9335933670758071, + "grad_norm": 22951.927734375, + "learning_rate": 1.0005241483749533e-05, + "loss": 0.4022, + "step": 180950 + }, + { + "epoch": 0.9338513370584198, + "grad_norm": 24532.15625, + "learning_rate": 9.983328966637318e-06, + "loss": 0.398, + "step": 181000 + }, + { + "epoch": 0.9341093070410327, + "grad_norm": 24624.205078125, + "learning_rate": 9.961437809649188e-06, + "loss": 0.4021, + "step": 181050 + }, + { + "epoch": 0.9343672770236455, + "grad_norm": 23679.087890625, + "learning_rate": 9.93956802447027e-06, + "loss": 0.4038, + "step": 181100 + }, + { + "epoch": 0.9346252470062584, + "grad_norm": 22279.52734375, + "learning_rate": 9.917719622774219e-06, + "loss": 0.3987, + "step": 181150 + }, + { + "epoch": 0.9348832169888712, + "grad_norm": 25709.376953125, + "learning_rate": 9.895892616223268e-06, + "loss": 0.4062, + "step": 181200 + }, + { + "epoch": 0.935141186971484, + "grad_norm": 24607.25, + "learning_rate": 9.874087016468298e-06, + "loss": 0.3973, + "step": 181250 + }, + { + "epoch": 0.9353991569540968, + "grad_norm": 25458.861328125, + "learning_rate": 9.852302835148652e-06, + "loss": 0.3993, + "step": 181300 + }, + { + "epoch": 0.9356571269367097, + "grad_norm": 24070.654296875, + "learning_rate": 9.830540083892358e-06, + "loss": 0.4057, + "step": 181350 + }, + { + "epoch": 0.9359150969193225, + "grad_norm": 25323.736328125, + "learning_rate": 9.80879877431593e-06, + "loss": 0.407, + "step": 181400 + }, + { + "epoch": 0.9361730669019352, + "grad_norm": 27513.087890625, + "learning_rate": 9.787078918024455e-06, + "loss": 0.3979, + "step": 181450 + }, + { + "epoch": 0.9364310368845481, + "grad_norm": 22324.669921875, + "learning_rate": 9.765380526611568e-06, + "loss": 0.3984, + "step": 181500 + }, + { + "epoch": 0.936689006867161, + "grad_norm": 23778.37890625, + "learning_rate": 9.743703611659465e-06, + "loss": 0.4055, + "step": 181550 + }, + { + "epoch": 0.9369469768497738, + "grad_norm": 26777.255859375, + "learning_rate": 9.722048184738864e-06, + "loss": 0.4047, + "step": 181600 + }, + { + "epoch": 0.9372049468323865, + "grad_norm": 23210.876953125, + "learning_rate": 9.700414257409002e-06, + "loss": 0.393, + "step": 181650 + }, + { + "epoch": 0.9374629168149994, + "grad_norm": 22539.84765625, + "learning_rate": 9.67880184121765e-06, + "loss": 0.4069, + "step": 181700 + }, + { + "epoch": 0.9377208867976122, + "grad_norm": 25191.609375, + "learning_rate": 9.65721094770109e-06, + "loss": 0.4069, + "step": 181750 + }, + { + "epoch": 0.9379788567802251, + "grad_norm": 23813.578125, + "learning_rate": 9.63564158838416e-06, + "loss": 0.3954, + "step": 181800 + }, + { + "epoch": 0.9382368267628379, + "grad_norm": 23869.703125, + "learning_rate": 9.614093774780114e-06, + "loss": 0.3998, + "step": 181850 + }, + { + "epoch": 0.9384947967454507, + "grad_norm": 23316.384765625, + "learning_rate": 9.5925675183908e-06, + "loss": 0.3989, + "step": 181900 + }, + { + "epoch": 0.9387527667280635, + "grad_norm": 23641.65625, + "learning_rate": 9.571062830706496e-06, + "loss": 0.4017, + "step": 181950 + }, + { + "epoch": 0.9390107367106764, + "grad_norm": 23724.431640625, + "learning_rate": 9.549579723205982e-06, + "loss": 0.4042, + "step": 182000 + }, + { + "epoch": 0.9392687066932892, + "grad_norm": 24013.849609375, + "learning_rate": 9.528118207356556e-06, + "loss": 0.3966, + "step": 182050 + }, + { + "epoch": 0.939526676675902, + "grad_norm": 21843.55859375, + "learning_rate": 9.506678294613919e-06, + "loss": 0.4051, + "step": 182100 + }, + { + "epoch": 0.9397846466585148, + "grad_norm": 22000.7734375, + "learning_rate": 9.485259996422313e-06, + "loss": 0.4042, + "step": 182150 + }, + { + "epoch": 0.9400426166411276, + "grad_norm": 23307.556640625, + "learning_rate": 9.463863324214395e-06, + "loss": 0.4018, + "step": 182200 + }, + { + "epoch": 0.9403005866237405, + "grad_norm": 22961.353515625, + "learning_rate": 9.4424882894113e-06, + "loss": 0.3991, + "step": 182250 + }, + { + "epoch": 0.9405585566063532, + "grad_norm": 24167.134765625, + "learning_rate": 9.421134903422607e-06, + "loss": 0.4033, + "step": 182300 + }, + { + "epoch": 0.9408165265889661, + "grad_norm": 24116.75, + "learning_rate": 9.399803177646339e-06, + "loss": 0.3979, + "step": 182350 + }, + { + "epoch": 0.9410744965715789, + "grad_norm": 25658.6640625, + "learning_rate": 9.378493123468946e-06, + "loss": 0.4093, + "step": 182400 + }, + { + "epoch": 0.9413324665541918, + "grad_norm": 27761.8828125, + "learning_rate": 9.357204752265341e-06, + "loss": 0.3974, + "step": 182450 + }, + { + "epoch": 0.9415904365368046, + "grad_norm": 23456.90234375, + "learning_rate": 9.335938075398842e-06, + "loss": 0.4072, + "step": 182500 + }, + { + "epoch": 0.9418484065194174, + "grad_norm": 21258.984375, + "learning_rate": 9.314693104221184e-06, + "loss": 0.3952, + "step": 182550 + }, + { + "epoch": 0.9421063765020302, + "grad_norm": 22634.01953125, + "learning_rate": 9.293469850072522e-06, + "loss": 0.402, + "step": 182600 + }, + { + "epoch": 0.9423643464846431, + "grad_norm": 22349.267578125, + "learning_rate": 9.272268324281407e-06, + "loss": 0.3974, + "step": 182650 + }, + { + "epoch": 0.9426223164672559, + "grad_norm": 23658.505859375, + "learning_rate": 9.251088538164837e-06, + "loss": 0.3979, + "step": 182700 + }, + { + "epoch": 0.9428802864498687, + "grad_norm": 26879.39453125, + "learning_rate": 9.229930503028129e-06, + "loss": 0.3965, + "step": 182750 + }, + { + "epoch": 0.9431382564324815, + "grad_norm": 25313.255859375, + "learning_rate": 9.208794230165058e-06, + "loss": 0.4049, + "step": 182800 + }, + { + "epoch": 0.9433962264150944, + "grad_norm": 26135.587890625, + "learning_rate": 9.187679730857756e-06, + "loss": 0.408, + "step": 182850 + }, + { + "epoch": 0.9436541963977072, + "grad_norm": 24064.087890625, + "learning_rate": 9.166587016376715e-06, + "loss": 0.4025, + "step": 182900 + }, + { + "epoch": 0.9439121663803199, + "grad_norm": 24475.30859375, + "learning_rate": 9.145516097980856e-06, + "loss": 0.4019, + "step": 182950 + }, + { + "epoch": 0.9441701363629328, + "grad_norm": 23691.06640625, + "learning_rate": 9.12446698691738e-06, + "loss": 0.4031, + "step": 183000 + }, + { + "epoch": 0.9444281063455456, + "grad_norm": 25653.37109375, + "learning_rate": 9.103439694421928e-06, + "loss": 0.4007, + "step": 183050 + }, + { + "epoch": 0.9446860763281585, + "grad_norm": 22718.71875, + "learning_rate": 9.08243423171845e-06, + "loss": 0.3996, + "step": 183100 + }, + { + "epoch": 0.9449440463107712, + "grad_norm": 23337.986328125, + "learning_rate": 9.061450610019262e-06, + "loss": 0.4043, + "step": 183150 + }, + { + "epoch": 0.9452020162933841, + "grad_norm": 27628.021484375, + "learning_rate": 9.040488840525001e-06, + "loss": 0.409, + "step": 183200 + }, + { + "epoch": 0.9454599862759969, + "grad_norm": 22894.26953125, + "learning_rate": 9.01954893442467e-06, + "loss": 0.4026, + "step": 183250 + }, + { + "epoch": 0.9457179562586098, + "grad_norm": 27624.564453125, + "learning_rate": 8.998630902895566e-06, + "loss": 0.4011, + "step": 183300 + }, + { + "epoch": 0.9459759262412226, + "grad_norm": 25944.05859375, + "learning_rate": 8.977734757103351e-06, + "loss": 0.3995, + "step": 183350 + }, + { + "epoch": 0.9462338962238354, + "grad_norm": 27243.31640625, + "learning_rate": 8.95686050820197e-06, + "loss": 0.3983, + "step": 183400 + }, + { + "epoch": 0.9464918662064482, + "grad_norm": 24556.611328125, + "learning_rate": 8.936008167333699e-06, + "loss": 0.4041, + "step": 183450 + }, + { + "epoch": 0.9467498361890611, + "grad_norm": 22205.880859375, + "learning_rate": 8.915177745629112e-06, + "loss": 0.3973, + "step": 183500 + }, + { + "epoch": 0.9470078061716739, + "grad_norm": 26829.6328125, + "learning_rate": 8.894369254207069e-06, + "loss": 0.4023, + "step": 183550 + }, + { + "epoch": 0.9472657761542866, + "grad_norm": 24388.59765625, + "learning_rate": 8.873582704174776e-06, + "loss": 0.397, + "step": 183600 + }, + { + "epoch": 0.9475237461368995, + "grad_norm": 25665.98828125, + "learning_rate": 8.852818106627647e-06, + "loss": 0.4055, + "step": 183650 + }, + { + "epoch": 0.9477817161195123, + "grad_norm": 24880.47265625, + "learning_rate": 8.83207547264946e-06, + "loss": 0.4016, + "step": 183700 + }, + { + "epoch": 0.9480396861021252, + "grad_norm": 26516.6953125, + "learning_rate": 8.81135481331221e-06, + "loss": 0.3992, + "step": 183750 + }, + { + "epoch": 0.9482976560847379, + "grad_norm": 22604.123046875, + "learning_rate": 8.790656139676179e-06, + "loss": 0.401, + "step": 183800 + }, + { + "epoch": 0.9485556260673508, + "grad_norm": 24668.94921875, + "learning_rate": 8.769979462789957e-06, + "loss": 0.3974, + "step": 183850 + }, + { + "epoch": 0.9488135960499636, + "grad_norm": 26522.896484375, + "learning_rate": 8.749324793690295e-06, + "loss": 0.4048, + "step": 183900 + }, + { + "epoch": 0.9490715660325765, + "grad_norm": 26786.48046875, + "learning_rate": 8.728692143402295e-06, + "loss": 0.4075, + "step": 183950 + }, + { + "epoch": 0.9493295360151893, + "grad_norm": 23683.54296875, + "learning_rate": 8.708081522939265e-06, + "loss": 0.3996, + "step": 184000 + }, + { + "epoch": 0.9495875059978021, + "grad_norm": 23064.400390625, + "learning_rate": 8.687492943302739e-06, + "loss": 0.4036, + "step": 184050 + }, + { + "epoch": 0.9498454759804149, + "grad_norm": 24142.4921875, + "learning_rate": 8.666926415482501e-06, + "loss": 0.4023, + "step": 184100 + }, + { + "epoch": 0.9501034459630278, + "grad_norm": 24012.076171875, + "learning_rate": 8.6463819504566e-06, + "loss": 0.4024, + "step": 184150 + }, + { + "epoch": 0.9503614159456406, + "grad_norm": 22214.41015625, + "learning_rate": 8.625859559191224e-06, + "loss": 0.4002, + "step": 184200 + }, + { + "epoch": 0.9506193859282533, + "grad_norm": 24664.162109375, + "learning_rate": 8.60535925264086e-06, + "loss": 0.4027, + "step": 184250 + }, + { + "epoch": 0.9508773559108662, + "grad_norm": 21136.900390625, + "learning_rate": 8.584881041748171e-06, + "loss": 0.3957, + "step": 184300 + }, + { + "epoch": 0.951135325893479, + "grad_norm": 22411.33984375, + "learning_rate": 8.56442493744401e-06, + "loss": 0.3977, + "step": 184350 + }, + { + "epoch": 0.9513932958760919, + "grad_norm": 23004.173828125, + "learning_rate": 8.54399095064749e-06, + "loss": 0.4014, + "step": 184400 + }, + { + "epoch": 0.9516512658587046, + "grad_norm": 23692.26171875, + "learning_rate": 8.523579092265827e-06, + "loss": 0.4013, + "step": 184450 + }, + { + "epoch": 0.9519092358413175, + "grad_norm": 25310.919921875, + "learning_rate": 8.503189373194509e-06, + "loss": 0.3961, + "step": 184500 + }, + { + "epoch": 0.9521672058239303, + "grad_norm": 25963.943359375, + "learning_rate": 8.482821804317171e-06, + "loss": 0.4049, + "step": 184550 + }, + { + "epoch": 0.9524251758065432, + "grad_norm": 24282.115234375, + "learning_rate": 8.46247639650562e-06, + "loss": 0.4008, + "step": 184600 + }, + { + "epoch": 0.952683145789156, + "grad_norm": 24703.26953125, + "learning_rate": 8.442153160619837e-06, + "loss": 0.4063, + "step": 184650 + }, + { + "epoch": 0.9529411157717688, + "grad_norm": 23616.09375, + "learning_rate": 8.421852107507966e-06, + "loss": 0.3974, + "step": 184700 + }, + { + "epoch": 0.9531990857543816, + "grad_norm": 25447.408203125, + "learning_rate": 8.40157324800634e-06, + "loss": 0.4066, + "step": 184750 + }, + { + "epoch": 0.9534570557369945, + "grad_norm": 25534.3984375, + "learning_rate": 8.381316592939403e-06, + "loss": 0.4027, + "step": 184800 + }, + { + "epoch": 0.9537150257196073, + "grad_norm": 24251.138671875, + "learning_rate": 8.361082153119777e-06, + "loss": 0.3958, + "step": 184850 + }, + { + "epoch": 0.95397299570222, + "grad_norm": 26980.046875, + "learning_rate": 8.3408699393482e-06, + "loss": 0.4058, + "step": 184900 + }, + { + "epoch": 0.9542309656848329, + "grad_norm": 26143.732421875, + "learning_rate": 8.320679962413574e-06, + "loss": 0.4006, + "step": 184950 + }, + { + "epoch": 0.9544889356674457, + "grad_norm": 24566.15234375, + "learning_rate": 8.300512233092893e-06, + "loss": 0.405, + "step": 185000 + }, + { + "epoch": 0.9544889356674457, + "eval_loss": 0.3880694806575775, + "eval_runtime": 3197.8794, + "eval_samples_per_second": 969.743, + "eval_steps_per_second": 1.894, + "step": 185000 + }, + { + "epoch": 0.9547469056500586, + "grad_norm": 22463.359375, + "learning_rate": 8.280366762151349e-06, + "loss": 0.4035, + "step": 185050 + }, + { + "epoch": 0.9550048756326713, + "grad_norm": 23964.845703125, + "learning_rate": 8.260243560342146e-06, + "loss": 0.399, + "step": 185100 + }, + { + "epoch": 0.9552628456152842, + "grad_norm": 22267.978515625, + "learning_rate": 8.2401426384067e-06, + "loss": 0.4065, + "step": 185150 + }, + { + "epoch": 0.955520815597897, + "grad_norm": 23959.732421875, + "learning_rate": 8.220064007074485e-06, + "loss": 0.3988, + "step": 185200 + }, + { + "epoch": 0.9557787855805099, + "grad_norm": 22042.95703125, + "learning_rate": 8.200007677063066e-06, + "loss": 0.4005, + "step": 185250 + }, + { + "epoch": 0.9560367555631226, + "grad_norm": 23760.798828125, + "learning_rate": 8.17997365907816e-06, + "loss": 0.4043, + "step": 185300 + }, + { + "epoch": 0.9562947255457355, + "grad_norm": 23235.8828125, + "learning_rate": 8.1599619638135e-06, + "loss": 0.3999, + "step": 185350 + }, + { + "epoch": 0.9565526955283483, + "grad_norm": 22637.701171875, + "learning_rate": 8.139972601950967e-06, + "loss": 0.4004, + "step": 185400 + }, + { + "epoch": 0.9568106655109612, + "grad_norm": 28806.810546875, + "learning_rate": 8.120005584160489e-06, + "loss": 0.4022, + "step": 185450 + }, + { + "epoch": 0.957068635493574, + "grad_norm": 22143.8203125, + "learning_rate": 8.100060921100067e-06, + "loss": 0.3977, + "step": 185500 + }, + { + "epoch": 0.9573266054761868, + "grad_norm": 22921.810546875, + "learning_rate": 8.080138623415783e-06, + "loss": 0.4, + "step": 185550 + }, + { + "epoch": 0.9575845754587996, + "grad_norm": 25425.640625, + "learning_rate": 8.060238701741762e-06, + "loss": 0.4021, + "step": 185600 + }, + { + "epoch": 0.9578425454414125, + "grad_norm": 27279.6796875, + "learning_rate": 8.040361166700216e-06, + "loss": 0.4064, + "step": 185650 + }, + { + "epoch": 0.9581005154240253, + "grad_norm": 25144.322265625, + "learning_rate": 8.020506028901376e-06, + "loss": 0.4031, + "step": 185700 + }, + { + "epoch": 0.958358485406638, + "grad_norm": 21046.607421875, + "learning_rate": 8.000673298943534e-06, + "loss": 0.4041, + "step": 185750 + }, + { + "epoch": 0.9586164553892509, + "grad_norm": 23166.087890625, + "learning_rate": 7.980862987413018e-06, + "loss": 0.3996, + "step": 185800 + }, + { + "epoch": 0.9588744253718637, + "grad_norm": 23506.693359375, + "learning_rate": 7.961075104884186e-06, + "loss": 0.3973, + "step": 185850 + }, + { + "epoch": 0.9591323953544766, + "grad_norm": 25975.408203125, + "learning_rate": 7.94130966191941e-06, + "loss": 0.4048, + "step": 185900 + }, + { + "epoch": 0.9593903653370893, + "grad_norm": 23704.638671875, + "learning_rate": 7.921566669069147e-06, + "loss": 0.4045, + "step": 185950 + }, + { + "epoch": 0.9596483353197022, + "grad_norm": 27402.2421875, + "learning_rate": 7.901846136871766e-06, + "loss": 0.4007, + "step": 186000 + }, + { + "epoch": 0.959906305302315, + "grad_norm": 23186.658203125, + "learning_rate": 7.882148075853752e-06, + "loss": 0.4072, + "step": 186050 + }, + { + "epoch": 0.9601642752849279, + "grad_norm": 24789.619140625, + "learning_rate": 7.862472496529528e-06, + "loss": 0.4056, + "step": 186100 + }, + { + "epoch": 0.9604222452675407, + "grad_norm": 23849.71875, + "learning_rate": 7.842819409401524e-06, + "loss": 0.4067, + "step": 186150 + }, + { + "epoch": 0.9606802152501535, + "grad_norm": 24820.765625, + "learning_rate": 7.823188824960221e-06, + "loss": 0.4071, + "step": 186200 + }, + { + "epoch": 0.9609381852327663, + "grad_norm": 23276.568359375, + "learning_rate": 7.803580753683992e-06, + "loss": 0.3989, + "step": 186250 + }, + { + "epoch": 0.9611961552153792, + "grad_norm": 21064.8984375, + "learning_rate": 7.783995206039279e-06, + "loss": 0.3994, + "step": 186300 + }, + { + "epoch": 0.961454125197992, + "grad_norm": 27310.30078125, + "learning_rate": 7.764432192480464e-06, + "loss": 0.4015, + "step": 186350 + }, + { + "epoch": 0.9617120951806047, + "grad_norm": 24786.1796875, + "learning_rate": 7.744891723449888e-06, + "loss": 0.4042, + "step": 186400 + }, + { + "epoch": 0.9619700651632176, + "grad_norm": 22362.47265625, + "learning_rate": 7.725373809377911e-06, + "loss": 0.3991, + "step": 186450 + }, + { + "epoch": 0.9622280351458304, + "grad_norm": 23751.4296875, + "learning_rate": 7.705878460682775e-06, + "loss": 0.3988, + "step": 186500 + }, + { + "epoch": 0.9624860051284433, + "grad_norm": 22956.935546875, + "learning_rate": 7.686405687770748e-06, + "loss": 0.4049, + "step": 186550 + }, + { + "epoch": 0.962743975111056, + "grad_norm": 25276.861328125, + "learning_rate": 7.666955501036006e-06, + "loss": 0.4005, + "step": 186600 + }, + { + "epoch": 0.9630019450936689, + "grad_norm": 22390.625, + "learning_rate": 7.647527910860691e-06, + "loss": 0.4008, + "step": 186650 + }, + { + "epoch": 0.9632599150762817, + "grad_norm": 28946.125, + "learning_rate": 7.628122927614856e-06, + "loss": 0.3987, + "step": 186700 + }, + { + "epoch": 0.9635178850588946, + "grad_norm": 23663.3125, + "learning_rate": 7.608740561656541e-06, + "loss": 0.4006, + "step": 186750 + }, + { + "epoch": 0.9637758550415074, + "grad_norm": 21705.16015625, + "learning_rate": 7.589380823331632e-06, + "loss": 0.4023, + "step": 186800 + }, + { + "epoch": 0.9640338250241202, + "grad_norm": 25353.228515625, + "learning_rate": 7.570043722974019e-06, + "loss": 0.4006, + "step": 186850 + }, + { + "epoch": 0.964291795006733, + "grad_norm": 26046.412109375, + "learning_rate": 7.55072927090546e-06, + "loss": 0.3931, + "step": 186900 + }, + { + "epoch": 0.9645497649893459, + "grad_norm": 25989.2578125, + "learning_rate": 7.531437477435621e-06, + "loss": 0.3989, + "step": 186950 + }, + { + "epoch": 0.9648077349719587, + "grad_norm": 22714.423828125, + "learning_rate": 7.51216835286212e-06, + "loss": 0.4018, + "step": 187000 + }, + { + "epoch": 0.9650657049545714, + "grad_norm": 26353.42578125, + "learning_rate": 7.492921907470407e-06, + "loss": 0.4056, + "step": 187050 + }, + { + "epoch": 0.9653236749371843, + "grad_norm": 23085.212890625, + "learning_rate": 7.4736981515338864e-06, + "loss": 0.3995, + "step": 187100 + }, + { + "epoch": 0.9655816449197971, + "grad_norm": 23125.970703125, + "learning_rate": 7.454497095313817e-06, + "loss": 0.4069, + "step": 187150 + }, + { + "epoch": 0.96583961490241, + "grad_norm": 23488.2265625, + "learning_rate": 7.435318749059356e-06, + "loss": 0.4039, + "step": 187200 + }, + { + "epoch": 0.9660975848850227, + "grad_norm": 22577.46875, + "learning_rate": 7.4161631230075305e-06, + "loss": 0.4051, + "step": 187250 + }, + { + "epoch": 0.9663555548676356, + "grad_norm": 22637.890625, + "learning_rate": 7.397030227383228e-06, + "loss": 0.3986, + "step": 187300 + }, + { + "epoch": 0.9666135248502484, + "grad_norm": 26084.412109375, + "learning_rate": 7.377920072399247e-06, + "loss": 0.398, + "step": 187350 + }, + { + "epoch": 0.9668714948328613, + "grad_norm": 25263.6328125, + "learning_rate": 7.3588326682562e-06, + "loss": 0.4035, + "step": 187400 + }, + { + "epoch": 0.9671294648154741, + "grad_norm": 22348.236328125, + "learning_rate": 7.339768025142573e-06, + "loss": 0.4003, + "step": 187450 + }, + { + "epoch": 0.9673874347980869, + "grad_norm": 23006.091796875, + "learning_rate": 7.320726153234714e-06, + "loss": 0.399, + "step": 187500 + }, + { + "epoch": 0.9676454047806997, + "grad_norm": 24137.44921875, + "learning_rate": 7.301707062696794e-06, + "loss": 0.3999, + "step": 187550 + }, + { + "epoch": 0.9679033747633126, + "grad_norm": 26101.837890625, + "learning_rate": 7.282710763680828e-06, + "loss": 0.4007, + "step": 187600 + }, + { + "epoch": 0.9681613447459254, + "grad_norm": 21417.814453125, + "learning_rate": 7.263737266326709e-06, + "loss": 0.3994, + "step": 187650 + }, + { + "epoch": 0.9684193147285381, + "grad_norm": 25831.45703125, + "learning_rate": 7.244786580762075e-06, + "loss": 0.3925, + "step": 187700 + }, + { + "epoch": 0.968677284711151, + "grad_norm": 24546.84765625, + "learning_rate": 7.225858717102474e-06, + "loss": 0.4004, + "step": 187750 + }, + { + "epoch": 0.9689352546937638, + "grad_norm": 23773.09765625, + "learning_rate": 7.206953685451212e-06, + "loss": 0.4041, + "step": 187800 + }, + { + "epoch": 0.9691932246763767, + "grad_norm": 23538.923828125, + "learning_rate": 7.188071495899423e-06, + "loss": 0.3971, + "step": 187850 + }, + { + "epoch": 0.9694511946589894, + "grad_norm": 24968.310546875, + "learning_rate": 7.169212158526084e-06, + "loss": 0.4047, + "step": 187900 + }, + { + "epoch": 0.9697091646416023, + "grad_norm": 24379.23828125, + "learning_rate": 7.150375683397908e-06, + "loss": 0.3983, + "step": 187950 + }, + { + "epoch": 0.9699671346242151, + "grad_norm": 25501.638671875, + "learning_rate": 7.131562080569465e-06, + "loss": 0.4024, + "step": 188000 + }, + { + "epoch": 0.970225104606828, + "grad_norm": 24917.73046875, + "learning_rate": 7.112771360083087e-06, + "loss": 0.3998, + "step": 188050 + }, + { + "epoch": 0.9704830745894407, + "grad_norm": 24725.638671875, + "learning_rate": 7.094003531968896e-06, + "loss": 0.3964, + "step": 188100 + }, + { + "epoch": 0.9707410445720536, + "grad_norm": 23913.5703125, + "learning_rate": 7.075258606244789e-06, + "loss": 0.3987, + "step": 188150 + }, + { + "epoch": 0.9709990145546664, + "grad_norm": 25010.09375, + "learning_rate": 7.05653659291644e-06, + "loss": 0.4021, + "step": 188200 + }, + { + "epoch": 0.9712569845372793, + "grad_norm": 25357.556640625, + "learning_rate": 7.037837501977318e-06, + "loss": 0.4007, + "step": 188250 + }, + { + "epoch": 0.9715149545198921, + "grad_norm": 24599.890625, + "learning_rate": 7.019161343408625e-06, + "loss": 0.3962, + "step": 188300 + }, + { + "epoch": 0.9717729245025049, + "grad_norm": 25866.2734375, + "learning_rate": 7.000508127179328e-06, + "loss": 0.3983, + "step": 188350 + }, + { + "epoch": 0.9720308944851177, + "grad_norm": 22591.40625, + "learning_rate": 6.981877863246161e-06, + "loss": 0.3971, + "step": 188400 + }, + { + "epoch": 0.9722888644677306, + "grad_norm": 20752.091796875, + "learning_rate": 6.963270561553586e-06, + "loss": 0.3946, + "step": 188450 + }, + { + "epoch": 0.9725468344503434, + "grad_norm": 22927.109375, + "learning_rate": 6.94468623203382e-06, + "loss": 0.4036, + "step": 188500 + }, + { + "epoch": 0.9728048044329561, + "grad_norm": 27096.041015625, + "learning_rate": 6.92612488460685e-06, + "loss": 0.3982, + "step": 188550 + }, + { + "epoch": 0.973062774415569, + "grad_norm": 24426.93359375, + "learning_rate": 6.907586529180321e-06, + "loss": 0.4054, + "step": 188600 + }, + { + "epoch": 0.9733207443981818, + "grad_norm": 25097.658203125, + "learning_rate": 6.889071175649669e-06, + "loss": 0.4015, + "step": 188650 + }, + { + "epoch": 0.9735787143807947, + "grad_norm": 24646.548828125, + "learning_rate": 6.870578833898033e-06, + "loss": 0.3977, + "step": 188700 + }, + { + "epoch": 0.9738366843634074, + "grad_norm": 23465.357421875, + "learning_rate": 6.852109513796257e-06, + "loss": 0.396, + "step": 188750 + }, + { + "epoch": 0.9740946543460203, + "grad_norm": 22382.603515625, + "learning_rate": 6.83366322520293e-06, + "loss": 0.4018, + "step": 188800 + }, + { + "epoch": 0.9743526243286331, + "grad_norm": 24666.61328125, + "learning_rate": 6.815239977964283e-06, + "loss": 0.4046, + "step": 188850 + }, + { + "epoch": 0.974610594311246, + "grad_norm": 25308.685546875, + "learning_rate": 6.796839781914321e-06, + "loss": 0.3998, + "step": 188900 + }, + { + "epoch": 0.9748685642938588, + "grad_norm": 24856.64453125, + "learning_rate": 6.778462646874706e-06, + "loss": 0.4014, + "step": 188950 + }, + { + "epoch": 0.9751265342764716, + "grad_norm": 27452.50390625, + "learning_rate": 6.760108582654795e-06, + "loss": 0.4008, + "step": 189000 + }, + { + "epoch": 0.9753845042590844, + "grad_norm": 25027.416015625, + "learning_rate": 6.741777599051629e-06, + "loss": 0.4006, + "step": 189050 + }, + { + "epoch": 0.9756424742416973, + "grad_norm": 24687.740234375, + "learning_rate": 6.723469705849927e-06, + "loss": 0.4056, + "step": 189100 + }, + { + "epoch": 0.9759004442243101, + "grad_norm": 24812.55078125, + "learning_rate": 6.705184912822105e-06, + "loss": 0.4043, + "step": 189150 + }, + { + "epoch": 0.9761584142069228, + "grad_norm": 25776.005859375, + "learning_rate": 6.686923229728214e-06, + "loss": 0.4052, + "step": 189200 + }, + { + "epoch": 0.9764163841895357, + "grad_norm": 24319.34765625, + "learning_rate": 6.668684666316005e-06, + "loss": 0.4014, + "step": 189250 + }, + { + "epoch": 0.9766743541721485, + "grad_norm": 28024.419921875, + "learning_rate": 6.650469232320839e-06, + "loss": 0.3991, + "step": 189300 + }, + { + "epoch": 0.9769323241547614, + "grad_norm": 25074.068359375, + "learning_rate": 6.6322769374658085e-06, + "loss": 0.4034, + "step": 189350 + }, + { + "epoch": 0.9771902941373741, + "grad_norm": 21126.572265625, + "learning_rate": 6.61410779146156e-06, + "loss": 0.3998, + "step": 189400 + }, + { + "epoch": 0.977448264119987, + "grad_norm": 25041.337890625, + "learning_rate": 6.595961804006467e-06, + "loss": 0.4012, + "step": 189450 + }, + { + "epoch": 0.9777062341025998, + "grad_norm": 25474.263671875, + "learning_rate": 6.577838984786489e-06, + "loss": 0.3991, + "step": 189500 + }, + { + "epoch": 0.9779642040852127, + "grad_norm": 22192.98828125, + "learning_rate": 6.55973934347523e-06, + "loss": 0.3965, + "step": 189550 + }, + { + "epoch": 0.9782221740678255, + "grad_norm": 24587.9453125, + "learning_rate": 6.5416628897339625e-06, + "loss": 0.4008, + "step": 189600 + }, + { + "epoch": 0.9784801440504383, + "grad_norm": 23246.314453125, + "learning_rate": 6.523609633211497e-06, + "loss": 0.4036, + "step": 189650 + }, + { + "epoch": 0.9787381140330511, + "grad_norm": 24233.033203125, + "learning_rate": 6.505579583544353e-06, + "loss": 0.4002, + "step": 189700 + }, + { + "epoch": 0.978996084015664, + "grad_norm": 24149.6953125, + "learning_rate": 6.487572750356602e-06, + "loss": 0.4043, + "step": 189750 + }, + { + "epoch": 0.9792540539982768, + "grad_norm": 25376.3046875, + "learning_rate": 6.469589143259952e-06, + "loss": 0.3997, + "step": 189800 + }, + { + "epoch": 0.9795120239808895, + "grad_norm": 25878.90625, + "learning_rate": 6.451628771853696e-06, + "loss": 0.3936, + "step": 189850 + }, + { + "epoch": 0.9797699939635024, + "grad_norm": 24123.169921875, + "learning_rate": 6.433691645724743e-06, + "loss": 0.3976, + "step": 189900 + }, + { + "epoch": 0.9800279639461152, + "grad_norm": 23894.5625, + "learning_rate": 6.4157777744475626e-06, + "loss": 0.4025, + "step": 189950 + }, + { + "epoch": 0.9802859339287281, + "grad_norm": 27271.9609375, + "learning_rate": 6.3978871675842544e-06, + "loss": 0.4007, + "step": 190000 + }, + { + "epoch": 0.9802859339287281, + "eval_loss": 0.3872862458229065, + "eval_runtime": 3184.1416, + "eval_samples_per_second": 973.927, + "eval_steps_per_second": 1.902, + "step": 190000 + }, + { + "epoch": 0.9805439039113408, + "grad_norm": 25592.9296875, + "learning_rate": 6.380019834684475e-06, + "loss": 0.4041, + "step": 190050 + }, + { + "epoch": 0.9808018738939537, + "grad_norm": 22425.51953125, + "learning_rate": 6.362175785285457e-06, + "loss": 0.4028, + "step": 190100 + }, + { + "epoch": 0.9810598438765665, + "grad_norm": 25178.28125, + "learning_rate": 6.344355028912008e-06, + "loss": 0.3972, + "step": 190150 + }, + { + "epoch": 0.9813178138591794, + "grad_norm": 25157.537109375, + "learning_rate": 6.326557575076486e-06, + "loss": 0.3989, + "step": 190200 + }, + { + "epoch": 0.9815757838417921, + "grad_norm": 23774.67578125, + "learning_rate": 6.3087834332788695e-06, + "loss": 0.4057, + "step": 190250 + }, + { + "epoch": 0.981833753824405, + "grad_norm": 25307.736328125, + "learning_rate": 6.2910326130066035e-06, + "loss": 0.3946, + "step": 190300 + }, + { + "epoch": 0.9820917238070178, + "grad_norm": 28657.8125, + "learning_rate": 6.273305123734769e-06, + "loss": 0.4006, + "step": 190350 + }, + { + "epoch": 0.9823496937896307, + "grad_norm": 24404.603515625, + "learning_rate": 6.255600974925935e-06, + "loss": 0.3998, + "step": 190400 + }, + { + "epoch": 0.9826076637722435, + "grad_norm": 22460.1640625, + "learning_rate": 6.237920176030232e-06, + "loss": 0.4039, + "step": 190450 + }, + { + "epoch": 0.9828656337548562, + "grad_norm": 27335.625, + "learning_rate": 6.220262736485355e-06, + "loss": 0.3937, + "step": 190500 + }, + { + "epoch": 0.9831236037374691, + "grad_norm": 27996.9765625, + "learning_rate": 6.202628665716464e-06, + "loss": 0.4025, + "step": 190550 + }, + { + "epoch": 0.983381573720082, + "grad_norm": 23532.66796875, + "learning_rate": 6.18501797313632e-06, + "loss": 0.4007, + "step": 190600 + }, + { + "epoch": 0.9836395437026948, + "grad_norm": 27360.333984375, + "learning_rate": 6.167430668145146e-06, + "loss": 0.3994, + "step": 190650 + }, + { + "epoch": 0.9838975136853075, + "grad_norm": 23754.23828125, + "learning_rate": 6.149866760130718e-06, + "loss": 0.4043, + "step": 190700 + }, + { + "epoch": 0.9841554836679204, + "grad_norm": 24313.943359375, + "learning_rate": 6.1323262584683075e-06, + "loss": 0.4039, + "step": 190750 + }, + { + "epoch": 0.9844134536505332, + "grad_norm": 22932.11328125, + "learning_rate": 6.114809172520686e-06, + "loss": 0.3977, + "step": 190800 + }, + { + "epoch": 0.9846714236331461, + "grad_norm": 27614.103515625, + "learning_rate": 6.097315511638135e-06, + "loss": 0.405, + "step": 190850 + }, + { + "epoch": 0.9849293936157588, + "grad_norm": 21648.470703125, + "learning_rate": 6.079845285158447e-06, + "loss": 0.403, + "step": 190900 + }, + { + "epoch": 0.9851873635983717, + "grad_norm": 25720.76953125, + "learning_rate": 6.0623985024068854e-06, + "loss": 0.4069, + "step": 190950 + }, + { + "epoch": 0.9854453335809845, + "grad_norm": 22051.30078125, + "learning_rate": 6.044975172696199e-06, + "loss": 0.4062, + "step": 191000 + }, + { + "epoch": 0.9857033035635974, + "grad_norm": 27862.138671875, + "learning_rate": 6.027575305326621e-06, + "loss": 0.4029, + "step": 191050 + }, + { + "epoch": 0.9859612735462102, + "grad_norm": 24624.951171875, + "learning_rate": 6.010198909585862e-06, + "loss": 0.3995, + "step": 191100 + }, + { + "epoch": 0.986219243528823, + "grad_norm": 23278.45703125, + "learning_rate": 5.992845994749136e-06, + "loss": 0.3981, + "step": 191150 + }, + { + "epoch": 0.9864772135114358, + "grad_norm": 27549.26953125, + "learning_rate": 5.975516570079048e-06, + "loss": 0.3999, + "step": 191200 + }, + { + "epoch": 0.9867351834940487, + "grad_norm": 24570.40625, + "learning_rate": 5.95821064482574e-06, + "loss": 0.4052, + "step": 191250 + }, + { + "epoch": 0.9869931534766615, + "grad_norm": 23672.029296875, + "learning_rate": 5.9409282282267665e-06, + "loss": 0.4045, + "step": 191300 + }, + { + "epoch": 0.9872511234592742, + "grad_norm": 22627.697265625, + "learning_rate": 5.923669329507148e-06, + "loss": 0.4017, + "step": 191350 + }, + { + "epoch": 0.9875090934418871, + "grad_norm": 22583.0390625, + "learning_rate": 5.906433957879365e-06, + "loss": 0.399, + "step": 191400 + }, + { + "epoch": 0.9877670634244999, + "grad_norm": 22665.984375, + "learning_rate": 5.889222122543298e-06, + "loss": 0.3989, + "step": 191450 + }, + { + "epoch": 0.9880250334071128, + "grad_norm": 25125.6640625, + "learning_rate": 5.872033832686319e-06, + "loss": 0.4001, + "step": 191500 + }, + { + "epoch": 0.9882830033897255, + "grad_norm": 24863.34375, + "learning_rate": 5.8548690974831845e-06, + "loss": 0.3991, + "step": 191550 + }, + { + "epoch": 0.9885409733723384, + "grad_norm": 23538.44921875, + "learning_rate": 5.837727926096109e-06, + "loss": 0.3979, + "step": 191600 + }, + { + "epoch": 0.9887989433549512, + "grad_norm": 23396.3203125, + "learning_rate": 5.820610327674708e-06, + "loss": 0.4049, + "step": 191650 + }, + { + "epoch": 0.9890569133375641, + "grad_norm": 22553.01171875, + "learning_rate": 5.803516311356044e-06, + "loss": 0.3983, + "step": 191700 + }, + { + "epoch": 0.9893148833201769, + "grad_norm": 25163.04296875, + "learning_rate": 5.786445886264541e-06, + "loss": 0.3969, + "step": 191750 + }, + { + "epoch": 0.9895728533027897, + "grad_norm": 22826.181640625, + "learning_rate": 5.769399061512093e-06, + "loss": 0.4016, + "step": 191800 + }, + { + "epoch": 0.9898308232854025, + "grad_norm": 22302.7265625, + "learning_rate": 5.752375846197944e-06, + "loss": 0.3988, + "step": 191850 + }, + { + "epoch": 0.9900887932680154, + "grad_norm": 20985.990234375, + "learning_rate": 5.735376249408753e-06, + "loss": 0.3952, + "step": 191900 + }, + { + "epoch": 0.9903467632506282, + "grad_norm": 23513.19921875, + "learning_rate": 5.718400280218611e-06, + "loss": 0.4052, + "step": 191950 + }, + { + "epoch": 0.9906047332332409, + "grad_norm": 23184.818359375, + "learning_rate": 5.7014479476889145e-06, + "loss": 0.399, + "step": 192000 + }, + { + "epoch": 0.9908627032158538, + "grad_norm": 23472.9453125, + "learning_rate": 5.684519260868521e-06, + "loss": 0.3946, + "step": 192050 + }, + { + "epoch": 0.9911206731984666, + "grad_norm": 26255.388671875, + "learning_rate": 5.667614228793622e-06, + "loss": 0.3964, + "step": 192100 + }, + { + "epoch": 0.9913786431810795, + "grad_norm": 23894.54296875, + "learning_rate": 5.650732860487806e-06, + "loss": 0.3928, + "step": 192150 + }, + { + "epoch": 0.9916366131636922, + "grad_norm": 24135.478515625, + "learning_rate": 5.633875164962016e-06, + "loss": 0.4019, + "step": 192200 + }, + { + "epoch": 0.9918945831463051, + "grad_norm": 26928.08984375, + "learning_rate": 5.617041151214553e-06, + "loss": 0.3958, + "step": 192250 + }, + { + "epoch": 0.9921525531289179, + "grad_norm": 22469.884765625, + "learning_rate": 5.600230828231107e-06, + "loss": 0.4031, + "step": 192300 + }, + { + "epoch": 0.9924105231115308, + "grad_norm": 23694.59765625, + "learning_rate": 5.583444204984695e-06, + "loss": 0.3926, + "step": 192350 + }, + { + "epoch": 0.9926684930941435, + "grad_norm": 23482.986328125, + "learning_rate": 5.566681290435688e-06, + "loss": 0.4112, + "step": 192400 + }, + { + "epoch": 0.9929264630767564, + "grad_norm": 22524.994140625, + "learning_rate": 5.549942093531812e-06, + "loss": 0.3981, + "step": 192450 + }, + { + "epoch": 0.9931844330593692, + "grad_norm": 27258.35546875, + "learning_rate": 5.5332266232081155e-06, + "loss": 0.4024, + "step": 192500 + }, + { + "epoch": 0.9934424030419821, + "grad_norm": 19928.40625, + "learning_rate": 5.516534888386992e-06, + "loss": 0.4028, + "step": 192550 + }, + { + "epoch": 0.9937003730245949, + "grad_norm": 21809.205078125, + "learning_rate": 5.499866897978189e-06, + "loss": 0.3996, + "step": 192600 + }, + { + "epoch": 0.9939583430072076, + "grad_norm": 22132.6171875, + "learning_rate": 5.483222660878729e-06, + "loss": 0.4012, + "step": 192650 + }, + { + "epoch": 0.9942163129898205, + "grad_norm": 25306.728515625, + "learning_rate": 5.466602185973002e-06, + "loss": 0.3987, + "step": 192700 + }, + { + "epoch": 0.9944742829724333, + "grad_norm": 29266.78515625, + "learning_rate": 5.4500054821326865e-06, + "loss": 0.4028, + "step": 192750 + }, + { + "epoch": 0.9947322529550462, + "grad_norm": 23506.931640625, + "learning_rate": 5.433432558216778e-06, + "loss": 0.3948, + "step": 192800 + }, + { + "epoch": 0.9949902229376589, + "grad_norm": 22564.177734375, + "learning_rate": 5.416883423071606e-06, + "loss": 0.4015, + "step": 192850 + }, + { + "epoch": 0.9952481929202718, + "grad_norm": 24564.380859375, + "learning_rate": 5.400358085530738e-06, + "loss": 0.4046, + "step": 192900 + }, + { + "epoch": 0.9955061629028846, + "grad_norm": 24793.91796875, + "learning_rate": 5.383856554415117e-06, + "loss": 0.4003, + "step": 192950 + }, + { + "epoch": 0.9957641328854975, + "grad_norm": 23798.228515625, + "learning_rate": 5.367378838532927e-06, + "loss": 0.3982, + "step": 193000 + }, + { + "epoch": 0.9960221028681102, + "grad_norm": 23164.642578125, + "learning_rate": 5.350924946679653e-06, + "loss": 0.3977, + "step": 193050 + }, + { + "epoch": 0.9962800728507231, + "grad_norm": 25646.29296875, + "learning_rate": 5.334494887638058e-06, + "loss": 0.3992, + "step": 193100 + }, + { + "epoch": 0.9965380428333359, + "grad_norm": 24146.2421875, + "learning_rate": 5.318088670178189e-06, + "loss": 0.4037, + "step": 193150 + }, + { + "epoch": 0.9967960128159488, + "grad_norm": 22594.72265625, + "learning_rate": 5.301706303057386e-06, + "loss": 0.4004, + "step": 193200 + }, + { + "epoch": 0.9970539827985616, + "grad_norm": 23395.515625, + "learning_rate": 5.285347795020224e-06, + "loss": 0.3958, + "step": 193250 + }, + { + "epoch": 0.9973119527811743, + "grad_norm": 23383.431640625, + "learning_rate": 5.269013154798558e-06, + "loss": 0.3998, + "step": 193300 + }, + { + "epoch": 0.9975699227637872, + "grad_norm": 20586.341796875, + "learning_rate": 5.252702391111508e-06, + "loss": 0.3979, + "step": 193350 + }, + { + "epoch": 0.9978278927464, + "grad_norm": 26526.83203125, + "learning_rate": 5.236415512665438e-06, + "loss": 0.4036, + "step": 193400 + }, + { + "epoch": 0.9980858627290129, + "grad_norm": 25045.224609375, + "learning_rate": 5.220152528153965e-06, + "loss": 0.4028, + "step": 193450 + }, + { + "epoch": 0.9983438327116256, + "grad_norm": 23480.755859375, + "learning_rate": 5.20391344625798e-06, + "loss": 0.4053, + "step": 193500 + }, + { + "epoch": 0.9986018026942385, + "grad_norm": 25235.927734375, + "learning_rate": 5.187698275645553e-06, + "loss": 0.3964, + "step": 193550 + }, + { + "epoch": 0.9988597726768513, + "grad_norm": 24883.29296875, + "learning_rate": 5.1715070249720555e-06, + "loss": 0.3978, + "step": 193600 + }, + { + "epoch": 0.9991177426594642, + "grad_norm": 25161.71484375, + "learning_rate": 5.155339702880052e-06, + "loss": 0.3998, + "step": 193650 + }, + { + "epoch": 0.9993757126420769, + "grad_norm": 21524.724609375, + "learning_rate": 5.13919631799934e-06, + "loss": 0.3955, + "step": 193700 + }, + { + "epoch": 0.9996336826246898, + "grad_norm": 23394.1015625, + "learning_rate": 5.123076878946981e-06, + "loss": 0.3962, + "step": 193750 + }, + { + "epoch": 0.9998916526073026, + "grad_norm": 24562.419921875, + "learning_rate": 5.106981394327165e-06, + "loss": 0.4, + "step": 193800 + }, + { + "epoch": 1.0001496225899154, + "grad_norm": 23818.201171875, + "learning_rate": 5.090909872731392e-06, + "loss": 0.4065, + "step": 193850 + }, + { + "epoch": 1.0004075925725282, + "grad_norm": 25973.83984375, + "learning_rate": 5.074862322738316e-06, + "loss": 0.4015, + "step": 193900 + }, + { + "epoch": 1.000665562555141, + "grad_norm": 26476.041015625, + "learning_rate": 5.0588387529138085e-06, + "loss": 0.401, + "step": 193950 + }, + { + "epoch": 1.000923532537754, + "grad_norm": 22776.267578125, + "learning_rate": 5.042839171810937e-06, + "loss": 0.4021, + "step": 194000 + }, + { + "epoch": 1.0011815025203668, + "grad_norm": 22484.884765625, + "learning_rate": 5.026863587969966e-06, + "loss": 0.4013, + "step": 194050 + }, + { + "epoch": 1.0014394725029796, + "grad_norm": 21445.009765625, + "learning_rate": 5.010912009918361e-06, + "loss": 0.4001, + "step": 194100 + }, + { + "epoch": 1.0016974424855924, + "grad_norm": 23748.365234375, + "learning_rate": 4.994984446170764e-06, + "loss": 0.3985, + "step": 194150 + }, + { + "epoch": 1.0019554124682053, + "grad_norm": 25007.73828125, + "learning_rate": 4.9790809052289996e-06, + "loss": 0.403, + "step": 194200 + }, + { + "epoch": 1.002213382450818, + "grad_norm": 26824.900390625, + "learning_rate": 4.963201395582062e-06, + "loss": 0.3966, + "step": 194250 + }, + { + "epoch": 1.0024713524334308, + "grad_norm": 21838.662109375, + "learning_rate": 4.947345925706148e-06, + "loss": 0.3955, + "step": 194300 + }, + { + "epoch": 1.0027293224160436, + "grad_norm": 20830.59375, + "learning_rate": 4.931514504064566e-06, + "loss": 0.3976, + "step": 194350 + }, + { + "epoch": 1.0029872923986565, + "grad_norm": 24187.484375, + "learning_rate": 4.915707139107856e-06, + "loss": 0.4009, + "step": 194400 + }, + { + "epoch": 1.0032452623812693, + "grad_norm": 23026.99609375, + "learning_rate": 4.899923839273662e-06, + "loss": 0.4017, + "step": 194450 + }, + { + "epoch": 1.0035032323638822, + "grad_norm": 25855.919921875, + "learning_rate": 4.884164612986808e-06, + "loss": 0.3966, + "step": 194500 + }, + { + "epoch": 1.003761202346495, + "grad_norm": 23424.58984375, + "learning_rate": 4.86842946865928e-06, + "loss": 0.4007, + "step": 194550 + }, + { + "epoch": 1.0040191723291079, + "grad_norm": 20644.318359375, + "learning_rate": 4.852718414690166e-06, + "loss": 0.405, + "step": 194600 + }, + { + "epoch": 1.0042771423117207, + "grad_norm": 24923.30078125, + "learning_rate": 4.8370314594657405e-06, + "loss": 0.3961, + "step": 194650 + }, + { + "epoch": 1.0045351122943333, + "grad_norm": 23334.19921875, + "learning_rate": 4.821368611359395e-06, + "loss": 0.3981, + "step": 194700 + }, + { + "epoch": 1.0047930822769462, + "grad_norm": 24258.54296875, + "learning_rate": 4.8057298787316516e-06, + "loss": 0.3998, + "step": 194750 + }, + { + "epoch": 1.005051052259559, + "grad_norm": 23366.234375, + "learning_rate": 4.790115269930162e-06, + "loss": 0.3998, + "step": 194800 + }, + { + "epoch": 1.005309022242172, + "grad_norm": 22389.498046875, + "learning_rate": 4.774524793289692e-06, + "loss": 0.4025, + "step": 194850 + }, + { + "epoch": 1.0055669922247847, + "grad_norm": 25497.361328125, + "learning_rate": 4.758958457132157e-06, + "loss": 0.3979, + "step": 194900 + }, + { + "epoch": 1.0058249622073976, + "grad_norm": 24179.626953125, + "learning_rate": 4.7434162697665595e-06, + "loss": 0.3984, + "step": 194950 + }, + { + "epoch": 1.0060829321900104, + "grad_norm": 24002.955078125, + "learning_rate": 4.727898239489015e-06, + "loss": 0.398, + "step": 195000 + }, + { + "epoch": 1.0060829321900104, + "eval_loss": 0.3868441879749298, + "eval_runtime": 3205.6792, + "eval_samples_per_second": 967.383, + "eval_steps_per_second": 1.889, + "step": 195000 + }, + { + "epoch": 1.0063409021726233, + "grad_norm": 26567.27734375, + "learning_rate": 4.712404374582741e-06, + "loss": 0.399, + "step": 195050 + }, + { + "epoch": 1.006598872155236, + "grad_norm": 25244.615234375, + "learning_rate": 4.696934683318077e-06, + "loss": 0.3998, + "step": 195100 + }, + { + "epoch": 1.0068568421378488, + "grad_norm": 23278.265625, + "learning_rate": 4.6814891739524195e-06, + "loss": 0.4002, + "step": 195150 + }, + { + "epoch": 1.0071148121204616, + "grad_norm": 23141.138671875, + "learning_rate": 4.666067854730322e-06, + "loss": 0.3965, + "step": 195200 + }, + { + "epoch": 1.0073727821030745, + "grad_norm": 23506.640625, + "learning_rate": 4.650670733883344e-06, + "loss": 0.3962, + "step": 195250 + }, + { + "epoch": 1.0076307520856873, + "grad_norm": 26591.212890625, + "learning_rate": 4.635297819630202e-06, + "loss": 0.3992, + "step": 195300 + }, + { + "epoch": 1.0078887220683002, + "grad_norm": 22111.640625, + "learning_rate": 4.619949120176642e-06, + "loss": 0.401, + "step": 195350 + }, + { + "epoch": 1.008146692050913, + "grad_norm": 25048.17578125, + "learning_rate": 4.604624643715505e-06, + "loss": 0.4016, + "step": 195400 + }, + { + "epoch": 1.0084046620335259, + "grad_norm": 23263.23828125, + "learning_rate": 4.589324398426714e-06, + "loss": 0.3942, + "step": 195450 + }, + { + "epoch": 1.0086626320161387, + "grad_norm": 23640.9296875, + "learning_rate": 4.57404839247722e-06, + "loss": 0.4039, + "step": 195500 + }, + { + "epoch": 1.0089206019987513, + "grad_norm": 25680.390625, + "learning_rate": 4.558796634021079e-06, + "loss": 0.3986, + "step": 195550 + }, + { + "epoch": 1.0091785719813642, + "grad_norm": 23321.78125, + "learning_rate": 4.543569131199382e-06, + "loss": 0.4039, + "step": 195600 + }, + { + "epoch": 1.009436541963977, + "grad_norm": 24123.205078125, + "learning_rate": 4.528365892140263e-06, + "loss": 0.397, + "step": 195650 + }, + { + "epoch": 1.0096945119465899, + "grad_norm": 23332.673828125, + "learning_rate": 4.513186924958928e-06, + "loss": 0.3941, + "step": 195700 + }, + { + "epoch": 1.0099524819292027, + "grad_norm": 25583.609375, + "learning_rate": 4.498032237757605e-06, + "loss": 0.4046, + "step": 195750 + }, + { + "epoch": 1.0102104519118156, + "grad_norm": 25230.3515625, + "learning_rate": 4.482901838625586e-06, + "loss": 0.4012, + "step": 195800 + }, + { + "epoch": 1.0104684218944284, + "grad_norm": 24376.5859375, + "learning_rate": 4.46779573563918e-06, + "loss": 0.3911, + "step": 195850 + }, + { + "epoch": 1.0107263918770413, + "grad_norm": 23978.17578125, + "learning_rate": 4.452713936861724e-06, + "loss": 0.4031, + "step": 195900 + }, + { + "epoch": 1.010984361859654, + "grad_norm": 23535.03515625, + "learning_rate": 4.437656450343602e-06, + "loss": 0.3933, + "step": 195950 + }, + { + "epoch": 1.0112423318422668, + "grad_norm": 24465.794921875, + "learning_rate": 4.422623284122207e-06, + "loss": 0.4027, + "step": 196000 + }, + { + "epoch": 1.0115003018248796, + "grad_norm": 23942.03125, + "learning_rate": 4.407614446221936e-06, + "loss": 0.4024, + "step": 196050 + }, + { + "epoch": 1.0117582718074924, + "grad_norm": 23610.720703125, + "learning_rate": 4.392629944654248e-06, + "loss": 0.3982, + "step": 196100 + }, + { + "epoch": 1.0120162417901053, + "grad_norm": 25937.53125, + "learning_rate": 4.3776697874175375e-06, + "loss": 0.3991, + "step": 196150 + }, + { + "epoch": 1.0122742117727181, + "grad_norm": 24008.5234375, + "learning_rate": 4.362733982497286e-06, + "loss": 0.3968, + "step": 196200 + }, + { + "epoch": 1.012532181755331, + "grad_norm": 23377.744140625, + "learning_rate": 4.347822537865914e-06, + "loss": 0.3958, + "step": 196250 + }, + { + "epoch": 1.0127901517379438, + "grad_norm": 23768.7421875, + "learning_rate": 4.332935461482862e-06, + "loss": 0.4004, + "step": 196300 + }, + { + "epoch": 1.0130481217205567, + "grad_norm": 25974.603515625, + "learning_rate": 4.3180727612945896e-06, + "loss": 0.4038, + "step": 196350 + }, + { + "epoch": 1.0133060917031693, + "grad_norm": 22376.34765625, + "learning_rate": 4.303234445234477e-06, + "loss": 0.3991, + "step": 196400 + }, + { + "epoch": 1.0135640616857822, + "grad_norm": 22145.03515625, + "learning_rate": 4.288420521222963e-06, + "loss": 0.3971, + "step": 196450 + }, + { + "epoch": 1.013822031668395, + "grad_norm": 21512.77734375, + "learning_rate": 4.273630997167422e-06, + "loss": 0.399, + "step": 196500 + }, + { + "epoch": 1.0140800016510079, + "grad_norm": 22957.626953125, + "learning_rate": 4.258865880962215e-06, + "loss": 0.3995, + "step": 196550 + }, + { + "epoch": 1.0143379716336207, + "grad_norm": 21951.89453125, + "learning_rate": 4.244125180488673e-06, + "loss": 0.3961, + "step": 196600 + }, + { + "epoch": 1.0145959416162336, + "grad_norm": 23440.005859375, + "learning_rate": 4.229408903615095e-06, + "loss": 0.4057, + "step": 196650 + }, + { + "epoch": 1.0148539115988464, + "grad_norm": 23987.21484375, + "learning_rate": 4.214717058196754e-06, + "loss": 0.3999, + "step": 196700 + }, + { + "epoch": 1.0151118815814593, + "grad_norm": 24526.482421875, + "learning_rate": 4.200049652075866e-06, + "loss": 0.3964, + "step": 196750 + }, + { + "epoch": 1.0153698515640721, + "grad_norm": 23351.193359375, + "learning_rate": 4.185406693081612e-06, + "loss": 0.3978, + "step": 196800 + }, + { + "epoch": 1.0156278215466847, + "grad_norm": 25014.873046875, + "learning_rate": 4.170788189030106e-06, + "loss": 0.3963, + "step": 196850 + }, + { + "epoch": 1.0158857915292976, + "grad_norm": 21085.181640625, + "learning_rate": 4.156194147724451e-06, + "loss": 0.4015, + "step": 196900 + }, + { + "epoch": 1.0161437615119104, + "grad_norm": 20203.427734375, + "learning_rate": 4.141624576954634e-06, + "loss": 0.4037, + "step": 196950 + }, + { + "epoch": 1.0164017314945233, + "grad_norm": 23869.416015625, + "learning_rate": 4.1270794844976255e-06, + "loss": 0.4038, + "step": 197000 + }, + { + "epoch": 1.0166597014771361, + "grad_norm": 24936.158203125, + "learning_rate": 4.112558878117318e-06, + "loss": 0.4073, + "step": 197050 + }, + { + "epoch": 1.016917671459749, + "grad_norm": 23021.921875, + "learning_rate": 4.098062765564509e-06, + "loss": 0.4056, + "step": 197100 + }, + { + "epoch": 1.0171756414423618, + "grad_norm": 21626.19921875, + "learning_rate": 4.083591154576971e-06, + "loss": 0.3989, + "step": 197150 + }, + { + "epoch": 1.0174336114249747, + "grad_norm": 25556.169921875, + "learning_rate": 4.069144052879342e-06, + "loss": 0.3975, + "step": 197200 + }, + { + "epoch": 1.0176915814075873, + "grad_norm": 23286.365234375, + "learning_rate": 4.054721468183226e-06, + "loss": 0.3974, + "step": 197250 + }, + { + "epoch": 1.0179495513902002, + "grad_norm": 24497.57421875, + "learning_rate": 4.040323408187113e-06, + "loss": 0.4028, + "step": 197300 + }, + { + "epoch": 1.018207521372813, + "grad_norm": 26279.40625, + "learning_rate": 4.025949880576407e-06, + "loss": 0.4034, + "step": 197350 + }, + { + "epoch": 1.0184654913554259, + "grad_norm": 22679.267578125, + "learning_rate": 4.011600893023421e-06, + "loss": 0.3991, + "step": 197400 + }, + { + "epoch": 1.0187234613380387, + "grad_norm": 25421.83984375, + "learning_rate": 3.997276453187365e-06, + "loss": 0.4023, + "step": 197450 + }, + { + "epoch": 1.0189814313206516, + "grad_norm": 25313.75, + "learning_rate": 3.982976568714336e-06, + "loss": 0.4018, + "step": 197500 + }, + { + "epoch": 1.0192394013032644, + "grad_norm": 24318.505859375, + "learning_rate": 3.96870124723736e-06, + "loss": 0.4027, + "step": 197550 + }, + { + "epoch": 1.0194973712858773, + "grad_norm": 22409.70703125, + "learning_rate": 3.9544504963763105e-06, + "loss": 0.3982, + "step": 197600 + }, + { + "epoch": 1.01975534126849, + "grad_norm": 25028.7265625, + "learning_rate": 3.9402243237379675e-06, + "loss": 0.4037, + "step": 197650 + }, + { + "epoch": 1.0200133112511027, + "grad_norm": 21235.19140625, + "learning_rate": 3.926022736915985e-06, + "loss": 0.3972, + "step": 197700 + }, + { + "epoch": 1.0202712812337156, + "grad_norm": 24214.41015625, + "learning_rate": 3.911845743490889e-06, + "loss": 0.3984, + "step": 197750 + }, + { + "epoch": 1.0205292512163284, + "grad_norm": 24445.375, + "learning_rate": 3.897693351030102e-06, + "loss": 0.4025, + "step": 197800 + }, + { + "epoch": 1.0207872211989413, + "grad_norm": 25233.3515625, + "learning_rate": 3.883565567087871e-06, + "loss": 0.3993, + "step": 197850 + }, + { + "epoch": 1.0210451911815541, + "grad_norm": 23982.43359375, + "learning_rate": 3.8694623992053534e-06, + "loss": 0.4023, + "step": 197900 + }, + { + "epoch": 1.021303161164167, + "grad_norm": 28533.689453125, + "learning_rate": 3.855383854910549e-06, + "loss": 0.3917, + "step": 197950 + }, + { + "epoch": 1.0215611311467798, + "grad_norm": 26334.77734375, + "learning_rate": 3.841329941718286e-06, + "loss": 0.3989, + "step": 198000 + }, + { + "epoch": 1.0218191011293927, + "grad_norm": 24765.802734375, + "learning_rate": 3.827300667130312e-06, + "loss": 0.398, + "step": 198050 + }, + { + "epoch": 1.0220770711120055, + "grad_norm": 25089.34765625, + "learning_rate": 3.8132960386351445e-06, + "loss": 0.4049, + "step": 198100 + }, + { + "epoch": 1.0223350410946181, + "grad_norm": 23840.72265625, + "learning_rate": 3.7993160637082027e-06, + "loss": 0.3998, + "step": 198150 + }, + { + "epoch": 1.022593011077231, + "grad_norm": 21590.1328125, + "learning_rate": 3.7853607498117282e-06, + "loss": 0.404, + "step": 198200 + }, + { + "epoch": 1.0228509810598438, + "grad_norm": 24620.478515625, + "learning_rate": 3.7714301043947855e-06, + "loss": 0.3958, + "step": 198250 + }, + { + "epoch": 1.0231089510424567, + "grad_norm": 22476.82421875, + "learning_rate": 3.757524134893292e-06, + "loss": 0.3993, + "step": 198300 + }, + { + "epoch": 1.0233669210250695, + "grad_norm": 22550.45703125, + "learning_rate": 3.7436428487299836e-06, + "loss": 0.3983, + "step": 198350 + }, + { + "epoch": 1.0236248910076824, + "grad_norm": 23764.958984375, + "learning_rate": 3.7297862533144045e-06, + "loss": 0.4005, + "step": 198400 + }, + { + "epoch": 1.0238828609902952, + "grad_norm": 23600.103515625, + "learning_rate": 3.7159543560429667e-06, + "loss": 0.3976, + "step": 198450 + }, + { + "epoch": 1.024140830972908, + "grad_norm": 24258.537109375, + "learning_rate": 3.7021471642988583e-06, + "loss": 0.4015, + "step": 198500 + }, + { + "epoch": 1.0243988009555207, + "grad_norm": 22559.609375, + "learning_rate": 3.6883646854520837e-06, + "loss": 0.4028, + "step": 198550 + }, + { + "epoch": 1.0246567709381336, + "grad_norm": 20827.234375, + "learning_rate": 3.67460692685947e-06, + "loss": 0.3954, + "step": 198600 + }, + { + "epoch": 1.0249147409207464, + "grad_norm": 24864.171875, + "learning_rate": 3.6608738958646303e-06, + "loss": 0.3919, + "step": 198650 + }, + { + "epoch": 1.0251727109033593, + "grad_norm": 25603.6796875, + "learning_rate": 3.647165599798019e-06, + "loss": 0.3984, + "step": 198700 + }, + { + "epoch": 1.0254306808859721, + "grad_norm": 21448.0234375, + "learning_rate": 3.6334820459768217e-06, + "loss": 0.4031, + "step": 198750 + }, + { + "epoch": 1.025688650868585, + "grad_norm": 24923.51953125, + "learning_rate": 3.6198232417050782e-06, + "loss": 0.4023, + "step": 198800 + }, + { + "epoch": 1.0259466208511978, + "grad_norm": 21672.09765625, + "learning_rate": 3.6061891942735957e-06, + "loss": 0.4027, + "step": 198850 + }, + { + "epoch": 1.0262045908338107, + "grad_norm": 24733.31640625, + "learning_rate": 3.5925799109599423e-06, + "loss": 0.401, + "step": 198900 + }, + { + "epoch": 1.0264625608164235, + "grad_norm": 25941.05859375, + "learning_rate": 3.5789953990285284e-06, + "loss": 0.3944, + "step": 198950 + }, + { + "epoch": 1.0267205307990361, + "grad_norm": 25462.96875, + "learning_rate": 3.56543566573046e-06, + "loss": 0.4021, + "step": 199000 + }, + { + "epoch": 1.026978500781649, + "grad_norm": 24243.462890625, + "learning_rate": 3.5519007183036856e-06, + "loss": 0.4009, + "step": 199050 + }, + { + "epoch": 1.0272364707642618, + "grad_norm": 22507.208984375, + "learning_rate": 3.5383905639728987e-06, + "loss": 0.3968, + "step": 199100 + }, + { + "epoch": 1.0274944407468747, + "grad_norm": 22496.060546875, + "learning_rate": 3.524905209949553e-06, + "loss": 0.3988, + "step": 199150 + }, + { + "epoch": 1.0277524107294875, + "grad_norm": 22755.974609375, + "learning_rate": 3.511444663431862e-06, + "loss": 0.3944, + "step": 199200 + }, + { + "epoch": 1.0280103807121004, + "grad_norm": 24945.93359375, + "learning_rate": 3.498008931604818e-06, + "loss": 0.4015, + "step": 199250 + }, + { + "epoch": 1.0282683506947132, + "grad_norm": 23216.15625, + "learning_rate": 3.484598021640134e-06, + "loss": 0.3982, + "step": 199300 + }, + { + "epoch": 1.028526320677326, + "grad_norm": 24690.8203125, + "learning_rate": 3.4712119406963174e-06, + "loss": 0.4, + "step": 199350 + }, + { + "epoch": 1.0287842906599387, + "grad_norm": 23324.27734375, + "learning_rate": 3.4578506959185907e-06, + "loss": 0.4005, + "step": 199400 + }, + { + "epoch": 1.0290422606425516, + "grad_norm": 22831.544921875, + "learning_rate": 3.444514294438922e-06, + "loss": 0.3987, + "step": 199450 + }, + { + "epoch": 1.0293002306251644, + "grad_norm": 22126.681640625, + "learning_rate": 3.4312027433760383e-06, + "loss": 0.4044, + "step": 199500 + }, + { + "epoch": 1.0295582006077773, + "grad_norm": 22105.94140625, + "learning_rate": 3.417916049835368e-06, + "loss": 0.4023, + "step": 199550 + }, + { + "epoch": 1.02981617059039, + "grad_norm": 24164.646484375, + "learning_rate": 3.4046542209091037e-06, + "loss": 0.3968, + "step": 199600 + }, + { + "epoch": 1.030074140573003, + "grad_norm": 23752.33203125, + "learning_rate": 3.3914172636761554e-06, + "loss": 0.3974, + "step": 199650 + }, + { + "epoch": 1.0303321105556158, + "grad_norm": 21793.787109375, + "learning_rate": 3.3782051852021433e-06, + "loss": 0.3981, + "step": 199700 + }, + { + "epoch": 1.0305900805382286, + "grad_norm": 26727.91796875, + "learning_rate": 3.365017992539432e-06, + "loss": 0.4025, + "step": 199750 + }, + { + "epoch": 1.0308480505208415, + "grad_norm": 21089.958984375, + "learning_rate": 3.3518556927270683e-06, + "loss": 0.4001, + "step": 199800 + }, + { + "epoch": 1.0311060205034541, + "grad_norm": 23690.0390625, + "learning_rate": 3.33871829279086e-06, + "loss": 0.3956, + "step": 199850 + }, + { + "epoch": 1.031363990486067, + "grad_norm": 24266.84375, + "learning_rate": 3.325605799743281e-06, + "loss": 0.3966, + "step": 199900 + }, + { + "epoch": 1.0316219604686798, + "grad_norm": 22199.455078125, + "learning_rate": 3.312518220583527e-06, + "loss": 0.4058, + "step": 199950 + }, + { + "epoch": 1.0318799304512927, + "grad_norm": 21272.033203125, + "learning_rate": 3.299455562297504e-06, + "loss": 0.3969, + "step": 200000 + }, + { + "epoch": 1.0318799304512927, + "eval_loss": 0.38684460520744324, + "eval_runtime": 3230.0057, + "eval_samples_per_second": 960.097, + "eval_steps_per_second": 1.875, + "step": 200000 + }, + { + "epoch": 1.0321379004339055, + "grad_norm": 23089.7578125, + "learning_rate": 3.286417831857791e-06, + "loss": 0.4011, + "step": 200050 + }, + { + "epoch": 1.0323958704165184, + "grad_norm": 27875.5859375, + "learning_rate": 3.2734050362236814e-06, + "loss": 0.4014, + "step": 200100 + }, + { + "epoch": 1.0326538403991312, + "grad_norm": 22023.40234375, + "learning_rate": 3.260417182341169e-06, + "loss": 0.398, + "step": 200150 + }, + { + "epoch": 1.032911810381744, + "grad_norm": 23899.208984375, + "learning_rate": 3.247454277142892e-06, + "loss": 0.3976, + "step": 200200 + }, + { + "epoch": 1.0331697803643567, + "grad_norm": 22874.44921875, + "learning_rate": 3.2345163275482147e-06, + "loss": 0.4014, + "step": 200250 + }, + { + "epoch": 1.0334277503469695, + "grad_norm": 21650.296875, + "learning_rate": 3.221603340463164e-06, + "loss": 0.4012, + "step": 200300 + }, + { + "epoch": 1.0336857203295824, + "grad_norm": 24189.89453125, + "learning_rate": 3.2087153227804314e-06, + "loss": 0.401, + "step": 200350 + }, + { + "epoch": 1.0339436903121952, + "grad_norm": 21525.12109375, + "learning_rate": 3.1958522813794134e-06, + "loss": 0.4016, + "step": 200400 + }, + { + "epoch": 1.034201660294808, + "grad_norm": 23732.640625, + "learning_rate": 3.1830142231261294e-06, + "loss": 0.4021, + "step": 200450 + }, + { + "epoch": 1.034459630277421, + "grad_norm": 24911.607421875, + "learning_rate": 3.170201154873298e-06, + "loss": 0.3943, + "step": 200500 + }, + { + "epoch": 1.0347176002600338, + "grad_norm": 25295.861328125, + "learning_rate": 3.1574130834602813e-06, + "loss": 0.401, + "step": 200550 + }, + { + "epoch": 1.0349755702426466, + "grad_norm": 23536.498046875, + "learning_rate": 3.1446500157131075e-06, + "loss": 0.3964, + "step": 200600 + }, + { + "epoch": 1.0352335402252595, + "grad_norm": 26484.287109375, + "learning_rate": 3.131911958444461e-06, + "loss": 0.4068, + "step": 200650 + }, + { + "epoch": 1.0354915102078721, + "grad_norm": 24330.001953125, + "learning_rate": 3.1191989184536474e-06, + "loss": 0.3911, + "step": 200700 + }, + { + "epoch": 1.035749480190485, + "grad_norm": 21095.994140625, + "learning_rate": 3.1065109025266713e-06, + "loss": 0.4, + "step": 200750 + }, + { + "epoch": 1.0360074501730978, + "grad_norm": 21829.64453125, + "learning_rate": 3.093847917436132e-06, + "loss": 0.4016, + "step": 200800 + }, + { + "epoch": 1.0362654201557107, + "grad_norm": 25772.79296875, + "learning_rate": 3.0812099699412953e-06, + "loss": 0.4032, + "step": 200850 + }, + { + "epoch": 1.0365233901383235, + "grad_norm": 25614.240234375, + "learning_rate": 3.0685970667880425e-06, + "loss": 0.3976, + "step": 200900 + }, + { + "epoch": 1.0367813601209364, + "grad_norm": 26170.455078125, + "learning_rate": 3.056009214708905e-06, + "loss": 0.4001, + "step": 200950 + }, + { + "epoch": 1.0370393301035492, + "grad_norm": 24801.76171875, + "learning_rate": 3.0434464204230186e-06, + "loss": 0.3924, + "step": 201000 + }, + { + "epoch": 1.037297300086162, + "grad_norm": 28940.640625, + "learning_rate": 3.0309086906361917e-06, + "loss": 0.3998, + "step": 201050 + }, + { + "epoch": 1.037555270068775, + "grad_norm": 23856.90625, + "learning_rate": 3.018396032040788e-06, + "loss": 0.397, + "step": 201100 + }, + { + "epoch": 1.0378132400513875, + "grad_norm": 23309.861328125, + "learning_rate": 3.005908451315842e-06, + "loss": 0.4026, + "step": 201150 + }, + { + "epoch": 1.0380712100340004, + "grad_norm": 23592.7265625, + "learning_rate": 2.993445955126978e-06, + "loss": 0.3971, + "step": 201200 + }, + { + "epoch": 1.0383291800166132, + "grad_norm": 23301.861328125, + "learning_rate": 2.9810085501264296e-06, + "loss": 0.403, + "step": 201250 + }, + { + "epoch": 1.038587149999226, + "grad_norm": 23200.0859375, + "learning_rate": 2.968596242953059e-06, + "loss": 0.4001, + "step": 201300 + }, + { + "epoch": 1.038845119981839, + "grad_norm": 26894.70703125, + "learning_rate": 2.956209040232294e-06, + "loss": 0.3988, + "step": 201350 + }, + { + "epoch": 1.0391030899644518, + "grad_norm": 22423.931640625, + "learning_rate": 2.9438469485761956e-06, + "loss": 0.3981, + "step": 201400 + }, + { + "epoch": 1.0393610599470646, + "grad_norm": 24167.068359375, + "learning_rate": 2.9315099745834073e-06, + "loss": 0.4024, + "step": 201450 + }, + { + "epoch": 1.0396190299296775, + "grad_norm": 25832.712890625, + "learning_rate": 2.9191981248391677e-06, + "loss": 0.3937, + "step": 201500 + }, + { + "epoch": 1.03987699991229, + "grad_norm": 26923.005859375, + "learning_rate": 2.9069114059153024e-06, + "loss": 0.3922, + "step": 201550 + }, + { + "epoch": 1.040134969894903, + "grad_norm": 23295.380859375, + "learning_rate": 2.8946498243702158e-06, + "loss": 0.4011, + "step": 201600 + }, + { + "epoch": 1.0403929398775158, + "grad_norm": 23378.5234375, + "learning_rate": 2.882413386748922e-06, + "loss": 0.4033, + "step": 201650 + }, + { + "epoch": 1.0406509098601286, + "grad_norm": 24349.9140625, + "learning_rate": 2.8702020995829803e-06, + "loss": 0.3964, + "step": 201700 + }, + { + "epoch": 1.0409088798427415, + "grad_norm": 24178.61328125, + "learning_rate": 2.8580159693905485e-06, + "loss": 0.3978, + "step": 201750 + }, + { + "epoch": 1.0411668498253543, + "grad_norm": 24998.189453125, + "learning_rate": 2.8458550026763344e-06, + "loss": 0.3943, + "step": 201800 + }, + { + "epoch": 1.0414248198079672, + "grad_norm": 28928.828125, + "learning_rate": 2.8337192059316344e-06, + "loss": 0.3998, + "step": 201850 + }, + { + "epoch": 1.04168278979058, + "grad_norm": 24329.37890625, + "learning_rate": 2.8216085856342946e-06, + "loss": 0.3976, + "step": 201900 + }, + { + "epoch": 1.041940759773193, + "grad_norm": 24121.482421875, + "learning_rate": 2.809523148248744e-06, + "loss": 0.3952, + "step": 201950 + }, + { + "epoch": 1.0421987297558055, + "grad_norm": 23812.671875, + "learning_rate": 2.7974629002259443e-06, + "loss": 0.4052, + "step": 202000 + }, + { + "epoch": 1.0424566997384184, + "grad_norm": 25162.40234375, + "learning_rate": 2.785427848003419e-06, + "loss": 0.3948, + "step": 202050 + }, + { + "epoch": 1.0427146697210312, + "grad_norm": 23631.462890625, + "learning_rate": 2.773417998005262e-06, + "loss": 0.3982, + "step": 202100 + }, + { + "epoch": 1.042972639703644, + "grad_norm": 24178.177734375, + "learning_rate": 2.761433356642079e-06, + "loss": 0.4012, + "step": 202150 + }, + { + "epoch": 1.043230609686257, + "grad_norm": 24726.37890625, + "learning_rate": 2.7494739303110527e-06, + "loss": 0.3926, + "step": 202200 + }, + { + "epoch": 1.0434885796688698, + "grad_norm": 23798.73828125, + "learning_rate": 2.7375397253958935e-06, + "loss": 0.3998, + "step": 202250 + }, + { + "epoch": 1.0437465496514826, + "grad_norm": 25162.677734375, + "learning_rate": 2.725630748266844e-06, + "loss": 0.4038, + "step": 202300 + }, + { + "epoch": 1.0440045196340955, + "grad_norm": 28668.78515625, + "learning_rate": 2.7137470052806814e-06, + "loss": 0.3989, + "step": 202350 + }, + { + "epoch": 1.0442624896167083, + "grad_norm": 22550.810546875, + "learning_rate": 2.7018885027807195e-06, + "loss": 0.3994, + "step": 202400 + }, + { + "epoch": 1.044520459599321, + "grad_norm": 26758.71484375, + "learning_rate": 2.6900552470968064e-06, + "loss": 0.4063, + "step": 202450 + }, + { + "epoch": 1.0447784295819338, + "grad_norm": 24895.77734375, + "learning_rate": 2.678247244545301e-06, + "loss": 0.3968, + "step": 202500 + }, + { + "epoch": 1.0450363995645466, + "grad_norm": 22442.416015625, + "learning_rate": 2.6664645014290833e-06, + "loss": 0.4009, + "step": 202550 + }, + { + "epoch": 1.0452943695471595, + "grad_norm": 24647.232421875, + "learning_rate": 2.654707024037556e-06, + "loss": 0.3984, + "step": 202600 + }, + { + "epoch": 1.0455523395297723, + "grad_norm": 24156.189453125, + "learning_rate": 2.6429748186466265e-06, + "loss": 0.3983, + "step": 202650 + }, + { + "epoch": 1.0458103095123852, + "grad_norm": 24131.658203125, + "learning_rate": 2.6312678915187185e-06, + "loss": 0.3941, + "step": 202700 + }, + { + "epoch": 1.046068279494998, + "grad_norm": 24890.5625, + "learning_rate": 2.6195862489027833e-06, + "loss": 0.3936, + "step": 202750 + }, + { + "epoch": 1.0463262494776109, + "grad_norm": 26486.58203125, + "learning_rate": 2.607929897034228e-06, + "loss": 0.4073, + "step": 202800 + }, + { + "epoch": 1.0465842194602235, + "grad_norm": 24554.09375, + "learning_rate": 2.5962988421350033e-06, + "loss": 0.3985, + "step": 202850 + }, + { + "epoch": 1.0468421894428364, + "grad_norm": 24964.349609375, + "learning_rate": 2.584693090413537e-06, + "loss": 0.3974, + "step": 202900 + }, + { + "epoch": 1.0471001594254492, + "grad_norm": 21256.87890625, + "learning_rate": 2.5731126480647516e-06, + "loss": 0.3969, + "step": 202950 + }, + { + "epoch": 1.047358129408062, + "grad_norm": 23721.197265625, + "learning_rate": 2.5615575212700804e-06, + "loss": 0.4039, + "step": 203000 + }, + { + "epoch": 1.047616099390675, + "grad_norm": 25096.4609375, + "learning_rate": 2.550027716197395e-06, + "loss": 0.3953, + "step": 203050 + }, + { + "epoch": 1.0478740693732878, + "grad_norm": 22199.11328125, + "learning_rate": 2.5385232390011114e-06, + "loss": 0.3979, + "step": 203100 + }, + { + "epoch": 1.0481320393559006, + "grad_norm": 24967.4609375, + "learning_rate": 2.527044095822084e-06, + "loss": 0.4023, + "step": 203150 + }, + { + "epoch": 1.0483900093385135, + "grad_norm": 28301.302734375, + "learning_rate": 2.5155902927876564e-06, + "loss": 0.4047, + "step": 203200 + }, + { + "epoch": 1.0486479793211263, + "grad_norm": 22268.037109375, + "learning_rate": 2.504161836011648e-06, + "loss": 0.4032, + "step": 203250 + }, + { + "epoch": 1.048905949303739, + "grad_norm": 28254.658203125, + "learning_rate": 2.4927587315943414e-06, + "loss": 0.3915, + "step": 203300 + }, + { + "epoch": 1.0491639192863518, + "grad_norm": 24471.462890625, + "learning_rate": 2.4813809856225112e-06, + "loss": 0.3986, + "step": 203350 + }, + { + "epoch": 1.0494218892689646, + "grad_norm": 24208.7578125, + "learning_rate": 2.470028604169361e-06, + "loss": 0.3969, + "step": 203400 + }, + { + "epoch": 1.0496798592515775, + "grad_norm": 23962.025390625, + "learning_rate": 2.4587015932945824e-06, + "loss": 0.3992, + "step": 203450 + }, + { + "epoch": 1.0499378292341903, + "grad_norm": 24777.421875, + "learning_rate": 2.4473999590443054e-06, + "loss": 0.4042, + "step": 203500 + }, + { + "epoch": 1.0501957992168032, + "grad_norm": 26705.40234375, + "learning_rate": 2.4361237074511323e-06, + "loss": 0.3985, + "step": 203550 + }, + { + "epoch": 1.050453769199416, + "grad_norm": 22508.51171875, + "learning_rate": 2.424872844534093e-06, + "loss": 0.3967, + "step": 203600 + }, + { + "epoch": 1.0507117391820289, + "grad_norm": 24678.62109375, + "learning_rate": 2.4136473762987057e-06, + "loss": 0.4002, + "step": 203650 + }, + { + "epoch": 1.0509697091646415, + "grad_norm": 24190.259765625, + "learning_rate": 2.402447308736883e-06, + "loss": 0.4002, + "step": 203700 + }, + { + "epoch": 1.0512276791472543, + "grad_norm": 27986.912109375, + "learning_rate": 2.391272647827014e-06, + "loss": 0.406, + "step": 203750 + }, + { + "epoch": 1.0514856491298672, + "grad_norm": 23664.740234375, + "learning_rate": 2.3801233995339236e-06, + "loss": 0.3988, + "step": 203800 + }, + { + "epoch": 1.05174361911248, + "grad_norm": 32503.17578125, + "learning_rate": 2.368999569808844e-06, + "loss": 0.3996, + "step": 203850 + }, + { + "epoch": 1.052001589095093, + "grad_norm": 24140.591796875, + "learning_rate": 2.3579011645894933e-06, + "loss": 0.4021, + "step": 203900 + }, + { + "epoch": 1.0522595590777057, + "grad_norm": 24920.033203125, + "learning_rate": 2.3468281897999487e-06, + "loss": 0.4038, + "step": 203950 + }, + { + "epoch": 1.0525175290603186, + "grad_norm": 20836.1796875, + "learning_rate": 2.335780651350772e-06, + "loss": 0.3929, + "step": 204000 + }, + { + "epoch": 1.0527754990429314, + "grad_norm": 22305.021484375, + "learning_rate": 2.324758555138923e-06, + "loss": 0.3963, + "step": 204050 + }, + { + "epoch": 1.0530334690255443, + "grad_norm": 22536.13671875, + "learning_rate": 2.3137619070477788e-06, + "loss": 0.3923, + "step": 204100 + }, + { + "epoch": 1.053291439008157, + "grad_norm": 23319.326171875, + "learning_rate": 2.3027907129471395e-06, + "loss": 0.4034, + "step": 204150 + }, + { + "epoch": 1.0535494089907698, + "grad_norm": 25774.677734375, + "learning_rate": 2.2918449786932085e-06, + "loss": 0.4015, + "step": 204200 + }, + { + "epoch": 1.0538073789733826, + "grad_norm": 23130.119140625, + "learning_rate": 2.280924710128618e-06, + "loss": 0.3971, + "step": 204250 + }, + { + "epoch": 1.0540653489559955, + "grad_norm": 23122.1875, + "learning_rate": 2.270029913082394e-06, + "loss": 0.3969, + "step": 204300 + }, + { + "epoch": 1.0543233189386083, + "grad_norm": 21518.763671875, + "learning_rate": 2.2591605933699632e-06, + "loss": 0.3992, + "step": 204350 + }, + { + "epoch": 1.0545812889212212, + "grad_norm": 25077.322265625, + "learning_rate": 2.248316756793156e-06, + "loss": 0.405, + "step": 204400 + }, + { + "epoch": 1.054839258903834, + "grad_norm": 23907.869140625, + "learning_rate": 2.237498409140215e-06, + "loss": 0.4009, + "step": 204450 + }, + { + "epoch": 1.0550972288864469, + "grad_norm": 22796.865234375, + "learning_rate": 2.2267055561857484e-06, + "loss": 0.4044, + "step": 204500 + }, + { + "epoch": 1.0553551988690595, + "grad_norm": 33471.05859375, + "learning_rate": 2.2159382036907927e-06, + "loss": 0.4021, + "step": 204550 + }, + { + "epoch": 1.0556131688516723, + "grad_norm": 23975.6640625, + "learning_rate": 2.2051963574027225e-06, + "loss": 0.3922, + "step": 204600 + }, + { + "epoch": 1.0558711388342852, + "grad_norm": 24563.220703125, + "learning_rate": 2.194480023055351e-06, + "loss": 0.3952, + "step": 204650 + }, + { + "epoch": 1.056129108816898, + "grad_norm": 24479.20703125, + "learning_rate": 2.1837892063688525e-06, + "loss": 0.4005, + "step": 204700 + }, + { + "epoch": 1.0563870787995109, + "grad_norm": 24895.6640625, + "learning_rate": 2.173123913049757e-06, + "loss": 0.3985, + "step": 204750 + }, + { + "epoch": 1.0566450487821237, + "grad_norm": 25606.34765625, + "learning_rate": 2.1624841487910052e-06, + "loss": 0.4019, + "step": 204800 + }, + { + "epoch": 1.0569030187647366, + "grad_norm": 23026.8828125, + "learning_rate": 2.151869919271904e-06, + "loss": 0.4023, + "step": 204850 + }, + { + "epoch": 1.0571609887473494, + "grad_norm": 24365.9609375, + "learning_rate": 2.1412812301581097e-06, + "loss": 0.3992, + "step": 204900 + }, + { + "epoch": 1.0574189587299623, + "grad_norm": 25374.990234375, + "learning_rate": 2.130718087101663e-06, + "loss": 0.4009, + "step": 204950 + }, + { + "epoch": 1.057676928712575, + "grad_norm": 23697.388671875, + "learning_rate": 2.1201804957409697e-06, + "loss": 0.4042, + "step": 205000 + }, + { + "epoch": 1.057676928712575, + "eval_loss": 0.386392205953598, + "eval_runtime": 3213.2768, + "eval_samples_per_second": 965.096, + "eval_steps_per_second": 1.885, + "step": 205000 + }, + { + "epoch": 1.0579348986951878, + "grad_norm": 23768.669921875, + "learning_rate": 2.109668461700781e-06, + "loss": 0.4058, + "step": 205050 + }, + { + "epoch": 1.0581928686778006, + "grad_norm": 24203.693359375, + "learning_rate": 2.099181990592236e-06, + "loss": 0.3971, + "step": 205100 + }, + { + "epoch": 1.0584508386604135, + "grad_norm": 26739.72265625, + "learning_rate": 2.088721088012796e-06, + "loss": 0.4008, + "step": 205150 + }, + { + "epoch": 1.0587088086430263, + "grad_norm": 25664.5, + "learning_rate": 2.078285759546289e-06, + "loss": 0.4001, + "step": 205200 + }, + { + "epoch": 1.0589667786256391, + "grad_norm": 22887.986328125, + "learning_rate": 2.067876010762898e-06, + "loss": 0.3946, + "step": 205250 + }, + { + "epoch": 1.059224748608252, + "grad_norm": 24908.890625, + "learning_rate": 2.057491847219134e-06, + "loss": 0.3997, + "step": 205300 + }, + { + "epoch": 1.0594827185908648, + "grad_norm": 26352.986328125, + "learning_rate": 2.0471332744578853e-06, + "loss": 0.4022, + "step": 205350 + }, + { + "epoch": 1.0597406885734777, + "grad_norm": 23545.6640625, + "learning_rate": 2.0368002980083235e-06, + "loss": 0.3976, + "step": 205400 + }, + { + "epoch": 1.0599986585560903, + "grad_norm": 24206.896484375, + "learning_rate": 2.02649292338602e-06, + "loss": 0.3934, + "step": 205450 + }, + { + "epoch": 1.0602566285387032, + "grad_norm": 22331.580078125, + "learning_rate": 2.0162111560928345e-06, + "loss": 0.3969, + "step": 205500 + }, + { + "epoch": 1.060514598521316, + "grad_norm": 24358.099609375, + "learning_rate": 2.0059550016169827e-06, + "loss": 0.3934, + "step": 205550 + }, + { + "epoch": 1.0607725685039289, + "grad_norm": 23970.693359375, + "learning_rate": 1.9957244654330133e-06, + "loss": 0.4012, + "step": 205600 + }, + { + "epoch": 1.0610305384865417, + "grad_norm": 23980.03125, + "learning_rate": 1.985519553001758e-06, + "loss": 0.3979, + "step": 205650 + }, + { + "epoch": 1.0612885084691546, + "grad_norm": 25418.708984375, + "learning_rate": 1.9753402697704313e-06, + "loss": 0.3988, + "step": 205700 + }, + { + "epoch": 1.0615464784517674, + "grad_norm": 22902.38671875, + "learning_rate": 1.965186621172521e-06, + "loss": 0.393, + "step": 205750 + }, + { + "epoch": 1.0618044484343803, + "grad_norm": 24547.4375, + "learning_rate": 1.9550586126278525e-06, + "loss": 0.4, + "step": 205800 + }, + { + "epoch": 1.062062418416993, + "grad_norm": 24038.619140625, + "learning_rate": 1.9449562495425623e-06, + "loss": 0.3995, + "step": 205850 + }, + { + "epoch": 1.0623203883996057, + "grad_norm": 22873.3515625, + "learning_rate": 1.9348795373090977e-06, + "loss": 0.4028, + "step": 205900 + }, + { + "epoch": 1.0625783583822186, + "grad_norm": 22140.7890625, + "learning_rate": 1.9248284813061957e-06, + "loss": 0.4036, + "step": 205950 + }, + { + "epoch": 1.0628363283648314, + "grad_norm": 23617.9140625, + "learning_rate": 1.914803086898942e-06, + "loss": 0.4005, + "step": 206000 + }, + { + "epoch": 1.0630942983474443, + "grad_norm": 22808.267578125, + "learning_rate": 1.9048033594386838e-06, + "loss": 0.3989, + "step": 206050 + }, + { + "epoch": 1.0633522683300571, + "grad_norm": 23189.298828125, + "learning_rate": 1.8948293042630794e-06, + "loss": 0.3982, + "step": 206100 + }, + { + "epoch": 1.06361023831267, + "grad_norm": 23994.052734375, + "learning_rate": 1.884880926696092e-06, + "loss": 0.4023, + "step": 206150 + }, + { + "epoch": 1.0638682082952828, + "grad_norm": 25587.49609375, + "learning_rate": 1.8749582320479687e-06, + "loss": 0.4056, + "step": 206200 + }, + { + "epoch": 1.0641261782778957, + "grad_norm": 22929.3984375, + "learning_rate": 1.865061225615261e-06, + "loss": 0.3987, + "step": 206250 + }, + { + "epoch": 1.0643841482605083, + "grad_norm": 24747.65234375, + "learning_rate": 1.8551899126807825e-06, + "loss": 0.3959, + "step": 206300 + }, + { + "epoch": 1.0646421182431212, + "grad_norm": 24856.77734375, + "learning_rate": 1.8453442985136682e-06, + "loss": 0.3989, + "step": 206350 + }, + { + "epoch": 1.064900088225734, + "grad_norm": 28136.97265625, + "learning_rate": 1.835524388369303e-06, + "loss": 0.396, + "step": 206400 + }, + { + "epoch": 1.0651580582083469, + "grad_norm": 25035.076171875, + "learning_rate": 1.8257301874893607e-06, + "loss": 0.4, + "step": 206450 + }, + { + "epoch": 1.0654160281909597, + "grad_norm": 23690.525390625, + "learning_rate": 1.8159617011018205e-06, + "loss": 0.3982, + "step": 206500 + }, + { + "epoch": 1.0656739981735726, + "grad_norm": 25419.333984375, + "learning_rate": 1.8062189344208835e-06, + "loss": 0.401, + "step": 206550 + }, + { + "epoch": 1.0659319681561854, + "grad_norm": 24045.5703125, + "learning_rate": 1.7965018926470622e-06, + "loss": 0.3974, + "step": 206600 + }, + { + "epoch": 1.0661899381387983, + "grad_norm": 23030.5625, + "learning_rate": 1.7868105809671298e-06, + "loss": 0.4049, + "step": 206650 + }, + { + "epoch": 1.066447908121411, + "grad_norm": 26036.546875, + "learning_rate": 1.7771450045541149e-06, + "loss": 0.3948, + "step": 206700 + }, + { + "epoch": 1.0667058781040237, + "grad_norm": 25025.001953125, + "learning_rate": 1.7675051685673127e-06, + "loss": 0.404, + "step": 206750 + }, + { + "epoch": 1.0669638480866366, + "grad_norm": 24296.775390625, + "learning_rate": 1.757891078152285e-06, + "loss": 0.3953, + "step": 206800 + }, + { + "epoch": 1.0672218180692494, + "grad_norm": 22506.177734375, + "learning_rate": 1.748302738440838e-06, + "loss": 0.4, + "step": 206850 + }, + { + "epoch": 1.0674797880518623, + "grad_norm": 26741.724609375, + "learning_rate": 1.738740154551055e-06, + "loss": 0.3971, + "step": 206900 + }, + { + "epoch": 1.0677377580344751, + "grad_norm": 24435.2421875, + "learning_rate": 1.7292033315872592e-06, + "loss": 0.3955, + "step": 206950 + }, + { + "epoch": 1.067995728017088, + "grad_norm": 21010.521484375, + "learning_rate": 1.7196922746400058e-06, + "loss": 0.3997, + "step": 207000 + }, + { + "epoch": 1.0682536979997008, + "grad_norm": 21215.384765625, + "learning_rate": 1.710206988786134e-06, + "loss": 0.3971, + "step": 207050 + }, + { + "epoch": 1.0685116679823137, + "grad_norm": 23713.73046875, + "learning_rate": 1.7007474790886823e-06, + "loss": 0.4022, + "step": 207100 + }, + { + "epoch": 1.0687696379649263, + "grad_norm": 26804.658203125, + "learning_rate": 1.691313750596979e-06, + "loss": 0.4028, + "step": 207150 + }, + { + "epoch": 1.0690276079475391, + "grad_norm": 24873.318359375, + "learning_rate": 1.68190580834654e-06, + "loss": 0.3995, + "step": 207200 + }, + { + "epoch": 1.069285577930152, + "grad_norm": 23567.91796875, + "learning_rate": 1.6725236573591596e-06, + "loss": 0.3988, + "step": 207250 + }, + { + "epoch": 1.0695435479127648, + "grad_norm": 23659.513671875, + "learning_rate": 1.6631673026428484e-06, + "loss": 0.4036, + "step": 207300 + }, + { + "epoch": 1.0698015178953777, + "grad_norm": 22014.623046875, + "learning_rate": 1.6538367491918339e-06, + "loss": 0.4003, + "step": 207350 + }, + { + "epoch": 1.0700594878779905, + "grad_norm": 26985.240234375, + "learning_rate": 1.6445320019865984e-06, + "loss": 0.3949, + "step": 207400 + }, + { + "epoch": 1.0703174578606034, + "grad_norm": 25032.328125, + "learning_rate": 1.635253065993836e-06, + "loss": 0.4072, + "step": 207450 + }, + { + "epoch": 1.0705754278432162, + "grad_norm": 23999.62890625, + "learning_rate": 1.6259999461664566e-06, + "loss": 0.4018, + "step": 207500 + }, + { + "epoch": 1.070833397825829, + "grad_norm": 24842.439453125, + "learning_rate": 1.616772647443593e-06, + "loss": 0.3992, + "step": 207550 + }, + { + "epoch": 1.0710913678084417, + "grad_norm": 26740.083984375, + "learning_rate": 1.6075711747506106e-06, + "loss": 0.3954, + "step": 207600 + }, + { + "epoch": 1.0713493377910546, + "grad_norm": 25067.95703125, + "learning_rate": 1.598395532999064e-06, + "loss": 0.4008, + "step": 207650 + }, + { + "epoch": 1.0716073077736674, + "grad_norm": 22218.814453125, + "learning_rate": 1.5892457270867467e-06, + "loss": 0.4005, + "step": 207700 + }, + { + "epoch": 1.0718652777562803, + "grad_norm": 25727.36328125, + "learning_rate": 1.5801217618976294e-06, + "loss": 0.402, + "step": 207750 + }, + { + "epoch": 1.0721232477388931, + "grad_norm": 24692.19921875, + "learning_rate": 1.5710236423019275e-06, + "loss": 0.4035, + "step": 207800 + }, + { + "epoch": 1.072381217721506, + "grad_norm": 25514.009765625, + "learning_rate": 1.5619513731560342e-06, + "loss": 0.3964, + "step": 207850 + }, + { + "epoch": 1.0726391877041188, + "grad_norm": 24503.408203125, + "learning_rate": 1.5529049593025425e-06, + "loss": 0.4036, + "step": 207900 + }, + { + "epoch": 1.0728971576867317, + "grad_norm": 27466.498046875, + "learning_rate": 1.5438844055702728e-06, + "loss": 0.4019, + "step": 207950 + }, + { + "epoch": 1.0731551276693443, + "grad_norm": 24170.1171875, + "learning_rate": 1.5348897167742015e-06, + "loss": 0.4005, + "step": 208000 + }, + { + "epoch": 1.0734130976519571, + "grad_norm": 24094.044921875, + "learning_rate": 1.525920897715527e-06, + "loss": 0.402, + "step": 208050 + }, + { + "epoch": 1.07367106763457, + "grad_norm": 22958.8125, + "learning_rate": 1.5169779531816365e-06, + "loss": 0.4041, + "step": 208100 + }, + { + "epoch": 1.0739290376171828, + "grad_norm": 24056.849609375, + "learning_rate": 1.508060887946089e-06, + "loss": 0.4017, + "step": 208150 + }, + { + "epoch": 1.0741870075997957, + "grad_norm": 22397.435546875, + "learning_rate": 1.499169706768655e-06, + "loss": 0.4036, + "step": 208200 + }, + { + "epoch": 1.0744449775824085, + "grad_norm": 22317.74609375, + "learning_rate": 1.4903044143952604e-06, + "loss": 0.3992, + "step": 208250 + }, + { + "epoch": 1.0747029475650214, + "grad_norm": 22923.57421875, + "learning_rate": 1.4814650155580367e-06, + "loss": 0.3953, + "step": 208300 + }, + { + "epoch": 1.0749609175476342, + "grad_norm": 24276.650390625, + "learning_rate": 1.4726515149752818e-06, + "loss": 0.4052, + "step": 208350 + }, + { + "epoch": 1.075218887530247, + "grad_norm": 27791.369140625, + "learning_rate": 1.4638639173514712e-06, + "loss": 0.4027, + "step": 208400 + }, + { + "epoch": 1.0754768575128597, + "grad_norm": 22683.73046875, + "learning_rate": 1.4551022273772585e-06, + "loss": 0.4036, + "step": 208450 + }, + { + "epoch": 1.0757348274954726, + "grad_norm": 26474.087890625, + "learning_rate": 1.4463664497294527e-06, + "loss": 0.3966, + "step": 208500 + }, + { + "epoch": 1.0759927974780854, + "grad_norm": 25933.25390625, + "learning_rate": 1.4376565890710514e-06, + "loss": 0.4042, + "step": 208550 + }, + { + "epoch": 1.0762507674606983, + "grad_norm": 23373.078125, + "learning_rate": 1.4289726500512134e-06, + "loss": 0.3971, + "step": 208600 + }, + { + "epoch": 1.076508737443311, + "grad_norm": 23282.916015625, + "learning_rate": 1.4203146373052423e-06, + "loss": 0.4038, + "step": 208650 + }, + { + "epoch": 1.076766707425924, + "grad_norm": 26307.12109375, + "learning_rate": 1.4116825554546353e-06, + "loss": 0.3959, + "step": 208700 + }, + { + "epoch": 1.0770246774085368, + "grad_norm": 24472.884765625, + "learning_rate": 1.4030764091070237e-06, + "loss": 0.3954, + "step": 208750 + }, + { + "epoch": 1.0772826473911497, + "grad_norm": 25234.9375, + "learning_rate": 1.394496202856188e-06, + "loss": 0.4003, + "step": 208800 + }, + { + "epoch": 1.0775406173737623, + "grad_norm": 31742.607421875, + "learning_rate": 1.385941941282104e-06, + "loss": 0.3963, + "step": 208850 + }, + { + "epoch": 1.0777985873563751, + "grad_norm": 22577.599609375, + "learning_rate": 1.3774136289508466e-06, + "loss": 0.4003, + "step": 208900 + }, + { + "epoch": 1.078056557338988, + "grad_norm": 24765.111328125, + "learning_rate": 1.3689112704146745e-06, + "loss": 0.3965, + "step": 208950 + }, + { + "epoch": 1.0783145273216008, + "grad_norm": 24935.205078125, + "learning_rate": 1.3604348702119795e-06, + "loss": 0.4001, + "step": 209000 + }, + { + "epoch": 1.0785724973042137, + "grad_norm": 25825.361328125, + "learning_rate": 1.3519844328673037e-06, + "loss": 0.4014, + "step": 209050 + }, + { + "epoch": 1.0788304672868265, + "grad_norm": 23713.068359375, + "learning_rate": 1.343559962891322e-06, + "loss": 0.3961, + "step": 209100 + }, + { + "epoch": 1.0790884372694394, + "grad_norm": 24578.435546875, + "learning_rate": 1.3351614647808542e-06, + "loss": 0.3983, + "step": 209150 + }, + { + "epoch": 1.0793464072520522, + "grad_norm": 22323.19140625, + "learning_rate": 1.3267889430188585e-06, + "loss": 0.4005, + "step": 209200 + }, + { + "epoch": 1.079604377234665, + "grad_norm": 22834.76953125, + "learning_rate": 1.3184424020744212e-06, + "loss": 0.3964, + "step": 209250 + }, + { + "epoch": 1.0798623472172777, + "grad_norm": 22097.615234375, + "learning_rate": 1.3101218464027676e-06, + "loss": 0.3932, + "step": 209300 + }, + { + "epoch": 1.0801203171998905, + "grad_norm": 23564.677734375, + "learning_rate": 1.3018272804452503e-06, + "loss": 0.3996, + "step": 209350 + }, + { + "epoch": 1.0803782871825034, + "grad_norm": 25264.150390625, + "learning_rate": 1.2935587086293443e-06, + "loss": 0.3975, + "step": 209400 + }, + { + "epoch": 1.0806362571651162, + "grad_norm": 22622.1015625, + "learning_rate": 1.2853161353686526e-06, + "loss": 0.4028, + "step": 209450 + }, + { + "epoch": 1.080894227147729, + "grad_norm": 25768.478515625, + "learning_rate": 1.2770995650629058e-06, + "loss": 0.395, + "step": 209500 + }, + { + "epoch": 1.081152197130342, + "grad_norm": 21997.23828125, + "learning_rate": 1.2689090020979455e-06, + "loss": 0.3997, + "step": 209550 + }, + { + "epoch": 1.0814101671129548, + "grad_norm": 24653.796875, + "learning_rate": 1.26074445084573e-06, + "loss": 0.3992, + "step": 209600 + }, + { + "epoch": 1.0816681370955676, + "grad_norm": 25631.18359375, + "learning_rate": 1.252605915664362e-06, + "loss": 0.4006, + "step": 209650 + }, + { + "epoch": 1.0819261070781803, + "grad_norm": 25373.162109375, + "learning_rate": 1.2444934008980058e-06, + "loss": 0.4047, + "step": 209700 + }, + { + "epoch": 1.0821840770607931, + "grad_norm": 23108.03125, + "learning_rate": 1.2364069108769804e-06, + "loss": 0.3994, + "step": 209750 + }, + { + "epoch": 1.082442047043406, + "grad_norm": 23362.0546875, + "learning_rate": 1.2283464499177e-06, + "loss": 0.4059, + "step": 209800 + }, + { + "epoch": 1.0827000170260188, + "grad_norm": 19350.4609375, + "learning_rate": 1.2203120223226727e-06, + "loss": 0.4012, + "step": 209850 + }, + { + "epoch": 1.0829579870086317, + "grad_norm": 24877.921875, + "learning_rate": 1.2123036323805237e-06, + "loss": 0.3989, + "step": 209900 + }, + { + "epoch": 1.0832159569912445, + "grad_norm": 25544.15625, + "learning_rate": 1.2043212843659724e-06, + "loss": 0.3963, + "step": 209950 + }, + { + "epoch": 1.0834739269738574, + "grad_norm": 24271.380859375, + "learning_rate": 1.1963649825398494e-06, + "loss": 0.4026, + "step": 210000 + }, + { + "epoch": 1.0834739269738574, + "eval_loss": 0.3863469064235687, + "eval_runtime": 3245.2698, + "eval_samples_per_second": 955.582, + "eval_steps_per_second": 1.866, + "step": 210000 + } + ], + "logging_steps": 50, + "max_steps": 225000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 5000, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 5, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.825838697690079e+17, + "train_batch_size": 128, + "trial_name": null, + "trial_params": null +} diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/training_args.bin b/pretrain_glome_nano_model_tiny/checkpoint-210000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..afd49ab13e1adc210b7ee9755ab768f1bc6434dc --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c41aa9c6023a3a9650c2ca731b440abde601b316b41906bb1dab8748c3c13ed +size 5304 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/vocab.json b/pretrain_glome_nano_model_tiny/checkpoint-210000/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..54045330cccae0d703647b73183868a84aa6c91f --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/vocab.json @@ -0,0 +1 @@ +{"A":0,"R":1,"N":2,"D":3,"C":4,"Q":5,"E":6,"G":7,"H":8,"I":9,"L":10,"K":11,"M":12,"F":13,"P":14,"S":15,"T":16,"W":17,"Y":18,"V":19,"X":20,"B":21,"U":22,"Z":23,"O":24,".":25,"-":26,"":27,"":28,"":29,"":30,"":31} \ No newline at end of file diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/config.json b/pretrain_glome_nano_model_tiny/checkpoint-215000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4e7a54fdf8bdda8d2a7ac6356523b75cecb2eb5 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/config.json @@ -0,0 +1,44 @@ +{ + "architectures": [ + "GloMeModelForMaskedLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "bos_token_id": 28, + "classifier_activation": "gelu", + "classifier_bias": false, + "classifier_dropout": 0.1, + "classifier_pooling": "cls", + "cls_token_id": 28, + "compress_block_size": 16, + "compress_block_sliding_stride": 16, + "decoder_bias": true, + "dice_weight": 0.0, + "embedding_dropout": 0.1, + "eos_token_id": 29, + "hidden_activation": "gelu", + "hidden_size": 320, + "inner_rank": 32, + "intermediate_size": 1280, + "kv_heads": 10, + "mask_token_id": 31, + "mlp_bias": false, + "mlp_dropout": 0.1, + "model_size": "tiny", + "model_type": "glome", + "norm_bias": false, + "norm_eps": 1e-05, + "num_attention_heads": 20, + "num_hidden_layers": 6, + "num_selected_blocks": 8, + "num_slots": 64, + "pad_token_id": 30, + "reference_compile": null, + "selection_block_size": 16, + "sep_token_id": 29, + "sliding_window_size": 0, + "torch_dtype": "float32", + "transformers_version": "4.52.3", + "unk_token_id": 27, + "vocab_size": 36 +} diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/merges.txt b/pretrain_glome_nano_model_tiny/checkpoint-215000/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..5e7f1fd94996c8e2b65adea828af1b398eace61f --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/merges.txt @@ -0,0 +1 @@ +#version: 0.2 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/model.safetensors b/pretrain_glome_nano_model_tiny/checkpoint-215000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af986ededba173b18731c67bb33527bc0c72d958 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:725530a204fe419bdd8e2a4edec8a015eba100ba76e0afb7a0d980312b63ddcd +size 61429032 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/optimizer.pt b/pretrain_glome_nano_model_tiny/checkpoint-215000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7852f4ac1775b4a124f8b01b6dec472823c8360e --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52fa15f82ebc4b95c156cc950e35d3872a0ad0324045a251738e568568579b51 +size 122968954 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/rng_state.pth b/pretrain_glome_nano_model_tiny/checkpoint-215000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7cf378f6781eee516e16f9aaed3717745152038c --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:604a00dce62859f6b063100785ea5aed25010c438c435155b45a6eccf452b0a9 +size 14244 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/scaler.pt b/pretrain_glome_nano_model_tiny/checkpoint-215000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363 +size 988 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/scheduler.pt b/pretrain_glome_nano_model_tiny/checkpoint-215000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e7eae53f649c018ceae4f7a20dad67f78793923 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa362a246a873fcb06a969bc3dc63f8cef55ddbb99c9ce75ae91e3bd8e19ed28 +size 1064 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/special_tokens_map.json b/pretrain_glome_nano_model_tiny/checkpoint-215000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..14f7c9ed7b0bde6d23ee7b6a24ac2996789d1a0b --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/special_tokens_map.json @@ -0,0 +1,51 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/tokenizer.json b/pretrain_glome_nano_model_tiny/checkpoint-215000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..0ece9b8e6fa70a006c5c10c47e30c9cff4ff95f0 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/tokenizer.json @@ -0,0 +1,123 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 27, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 28, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 29, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 30, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 31, + "content": "", + "single_word": false, + "lstrip": true, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": true + }, + "post_processor": { + "type": "RobertaProcessing", + "sep": [ + "", + 29 + ], + "cls": [ + "", + 28 + ], + "trim_offsets": true, + "add_prefix_space": false + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": "", + "end_of_word_suffix": "", + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "A": 0, + "R": 1, + "N": 2, + "D": 3, + "C": 4, + "Q": 5, + "E": 6, + "G": 7, + "H": 8, + "I": 9, + "L": 10, + "K": 11, + "M": 12, + "F": 13, + "P": 14, + "S": 15, + "T": 16, + "W": 17, + "Y": 18, + "V": 19, + "X": 20, + "B": 21, + "U": 22, + "Z": 23, + "O": 24, + ".": 25, + "-": 26, + "": 27, + "": 28, + "": 29, + "": 30, + "": 31 + }, + "merges": [] + } +} \ No newline at end of file diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/tokenizer_config.json b/pretrain_glome_nano_model_tiny/checkpoint-215000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1c77f0533c6d3bd60b0a23b8adfacc351923d671 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/tokenizer_config.json @@ -0,0 +1,58 @@ +{ + "add_prefix_space": false, + "added_tokens_decoder": { + "27": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "28": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "29": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "30": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "31": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "errors": "replace", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "RobertaTokenizer", + "trim_offsets": true, + "unk_token": "" +} diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/trainer_state.json b/pretrain_glome_nano_model_tiny/checkpoint-215000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..73974255fb8ff73559573fcbdd6aad8d1adc28b8 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/trainer_state.json @@ -0,0 +1,30487 @@ +{ + "best_global_step": null, + "best_metric": 0.3860665559768677, + "best_model_checkpoint": null, + "epoch": 1.1092709252351396, + "eval_steps": 5000, + "global_step": 215000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002579699826128232, + "grad_norm": 314643.03125, + "learning_rate": 4.9e-07, + "loss": 3.7018, + "step": 50 + }, + { + "epoch": 0.0005159399652256464, + "grad_norm": 286448.65625, + "learning_rate": 9.9e-07, + "loss": 3.6146, + "step": 100 + }, + { + "epoch": 0.0007739099478384695, + "grad_norm": 214582.28125, + "learning_rate": 1.4900000000000001e-06, + "loss": 3.4562, + "step": 150 + }, + { + "epoch": 0.0010318799304512927, + "grad_norm": 137193.9375, + "learning_rate": 1.99e-06, + "loss": 3.2558, + "step": 200 + }, + { + "epoch": 0.0012898499130641159, + "grad_norm": 82222.84375, + "learning_rate": 2.49e-06, + "loss": 3.0641, + "step": 250 + }, + { + "epoch": 0.001547819895676939, + "grad_norm": 56772.109375, + "learning_rate": 2.99e-06, + "loss": 2.9123, + "step": 300 + }, + { + "epoch": 0.0018057898782897622, + "grad_norm": 46737.6796875, + "learning_rate": 3.49e-06, + "loss": 2.8131, + "step": 350 + }, + { + "epoch": 0.0020637598609025854, + "grad_norm": 38769.04296875, + "learning_rate": 3.99e-06, + "loss": 2.7526, + "step": 400 + }, + { + "epoch": 0.0023217298435154084, + "grad_norm": 34701.5546875, + "learning_rate": 4.49e-06, + "loss": 2.7178, + "step": 450 + }, + { + "epoch": 0.0025796998261282318, + "grad_norm": 30332.826171875, + "learning_rate": 4.9900000000000005e-06, + "loss": 2.6967, + "step": 500 + }, + { + "epoch": 0.0028376698087410547, + "grad_norm": 27192.7421875, + "learning_rate": 5.49e-06, + "loss": 2.6798, + "step": 550 + }, + { + "epoch": 0.003095639791353878, + "grad_norm": 36959.33984375, + "learning_rate": 5.99e-06, + "loss": 2.668, + "step": 600 + }, + { + "epoch": 0.003353609773966701, + "grad_norm": 30939.365234375, + "learning_rate": 6.4900000000000005e-06, + "loss": 2.6572, + "step": 650 + }, + { + "epoch": 0.0036115797565795245, + "grad_norm": 26976.78125, + "learning_rate": 6.990000000000001e-06, + "loss": 2.6397, + "step": 700 + }, + { + "epoch": 0.0038695497391923474, + "grad_norm": 32308.447265625, + "learning_rate": 7.4899999999999994e-06, + "loss": 2.6021, + "step": 750 + }, + { + "epoch": 0.004127519721805171, + "grad_norm": 33838.8046875, + "learning_rate": 7.99e-06, + "loss": 2.5058, + "step": 800 + }, + { + "epoch": 0.004385489704417994, + "grad_norm": 49298.84765625, + "learning_rate": 8.49e-06, + "loss": 2.4095, + "step": 850 + }, + { + "epoch": 0.004643459687030817, + "grad_norm": 36241.56640625, + "learning_rate": 8.99e-06, + "loss": 2.2961, + "step": 900 + }, + { + "epoch": 0.004901429669643641, + "grad_norm": 37884.82421875, + "learning_rate": 9.49e-06, + "loss": 2.1526, + "step": 950 + }, + { + "epoch": 0.0051593996522564635, + "grad_norm": 36827.66796875, + "learning_rate": 9.990000000000001e-06, + "loss": 2.0338, + "step": 1000 + }, + { + "epoch": 0.0054173696348692865, + "grad_norm": 41398.421875, + "learning_rate": 1.049e-05, + "loss": 1.939, + "step": 1050 + }, + { + "epoch": 0.0056753396174821094, + "grad_norm": 42648.38671875, + "learning_rate": 1.099e-05, + "loss": 1.8325, + "step": 1100 + }, + { + "epoch": 0.005933309600094933, + "grad_norm": 40172.9453125, + "learning_rate": 1.149e-05, + "loss": 1.7643, + "step": 1150 + }, + { + "epoch": 0.006191279582707756, + "grad_norm": 35860.8515625, + "learning_rate": 1.199e-05, + "loss": 1.6742, + "step": 1200 + }, + { + "epoch": 0.006449249565320579, + "grad_norm": 44456.93359375, + "learning_rate": 1.249e-05, + "loss": 1.6026, + "step": 1250 + }, + { + "epoch": 0.006707219547933402, + "grad_norm": 36839.08984375, + "learning_rate": 1.299e-05, + "loss": 1.521, + "step": 1300 + }, + { + "epoch": 0.006965189530546226, + "grad_norm": 44026.68359375, + "learning_rate": 1.349e-05, + "loss": 1.4436, + "step": 1350 + }, + { + "epoch": 0.007223159513159049, + "grad_norm": 35557.578125, + "learning_rate": 1.399e-05, + "loss": 1.3773, + "step": 1400 + }, + { + "epoch": 0.007481129495771872, + "grad_norm": 38767.60546875, + "learning_rate": 1.449e-05, + "loss": 1.3023, + "step": 1450 + }, + { + "epoch": 0.007739099478384695, + "grad_norm": 36654.796875, + "learning_rate": 1.499e-05, + "loss": 1.2627, + "step": 1500 + }, + { + "epoch": 0.007997069460997519, + "grad_norm": 41690.328125, + "learning_rate": 1.5490000000000002e-05, + "loss": 1.2063, + "step": 1550 + }, + { + "epoch": 0.008255039443610342, + "grad_norm": 38743.59375, + "learning_rate": 1.599e-05, + "loss": 1.1626, + "step": 1600 + }, + { + "epoch": 0.008513009426223165, + "grad_norm": 41839.7890625, + "learning_rate": 1.649e-05, + "loss": 1.1225, + "step": 1650 + }, + { + "epoch": 0.008770979408835988, + "grad_norm": 42897.0703125, + "learning_rate": 1.699e-05, + "loss": 1.0864, + "step": 1700 + }, + { + "epoch": 0.00902894939144881, + "grad_norm": 37412.30859375, + "learning_rate": 1.749e-05, + "loss": 1.0613, + "step": 1750 + }, + { + "epoch": 0.009286919374061633, + "grad_norm": 37235.484375, + "learning_rate": 1.7990000000000002e-05, + "loss": 1.0354, + "step": 1800 + }, + { + "epoch": 0.009544889356674458, + "grad_norm": 39117.6328125, + "learning_rate": 1.849e-05, + "loss": 1.0059, + "step": 1850 + }, + { + "epoch": 0.009802859339287281, + "grad_norm": 37297.6875, + "learning_rate": 1.8990000000000003e-05, + "loss": 0.9795, + "step": 1900 + }, + { + "epoch": 0.010060829321900104, + "grad_norm": 33772.24609375, + "learning_rate": 1.949e-05, + "loss": 0.9639, + "step": 1950 + }, + { + "epoch": 0.010318799304512927, + "grad_norm": 39775.046875, + "learning_rate": 1.999e-05, + "loss": 0.9386, + "step": 2000 + }, + { + "epoch": 0.01057676928712575, + "grad_norm": 38412.2109375, + "learning_rate": 2.0490000000000002e-05, + "loss": 0.9212, + "step": 2050 + }, + { + "epoch": 0.010834739269738573, + "grad_norm": 39548.98046875, + "learning_rate": 2.099e-05, + "loss": 0.9112, + "step": 2100 + }, + { + "epoch": 0.011092709252351396, + "grad_norm": 38127.77734375, + "learning_rate": 2.1490000000000003e-05, + "loss": 0.8866, + "step": 2150 + }, + { + "epoch": 0.011350679234964219, + "grad_norm": 39877.0390625, + "learning_rate": 2.199e-05, + "loss": 0.8806, + "step": 2200 + }, + { + "epoch": 0.011608649217577044, + "grad_norm": 34642.28515625, + "learning_rate": 2.249e-05, + "loss": 0.8645, + "step": 2250 + }, + { + "epoch": 0.011866619200189867, + "grad_norm": 38508.0078125, + "learning_rate": 2.2990000000000002e-05, + "loss": 0.8609, + "step": 2300 + }, + { + "epoch": 0.01212458918280269, + "grad_norm": 33287.765625, + "learning_rate": 2.349e-05, + "loss": 0.8443, + "step": 2350 + }, + { + "epoch": 0.012382559165415512, + "grad_norm": 35477.5546875, + "learning_rate": 2.3990000000000002e-05, + "loss": 0.839, + "step": 2400 + }, + { + "epoch": 0.012640529148028335, + "grad_norm": 32204.408203125, + "learning_rate": 2.449e-05, + "loss": 0.8204, + "step": 2450 + }, + { + "epoch": 0.012898499130641158, + "grad_norm": 35113.59765625, + "learning_rate": 2.4990000000000003e-05, + "loss": 0.8214, + "step": 2500 + }, + { + "epoch": 0.013156469113253981, + "grad_norm": 36591.2421875, + "learning_rate": 2.549e-05, + "loss": 0.8066, + "step": 2550 + }, + { + "epoch": 0.013414439095866804, + "grad_norm": 37926.3125, + "learning_rate": 2.5990000000000004e-05, + "loss": 0.7993, + "step": 2600 + }, + { + "epoch": 0.013672409078479627, + "grad_norm": 35413.01171875, + "learning_rate": 2.6490000000000002e-05, + "loss": 0.8012, + "step": 2650 + }, + { + "epoch": 0.013930379061092452, + "grad_norm": 33275.1796875, + "learning_rate": 2.6989999999999997e-05, + "loss": 0.7879, + "step": 2700 + }, + { + "epoch": 0.014188349043705275, + "grad_norm": 35463.87109375, + "learning_rate": 2.749e-05, + "loss": 0.7808, + "step": 2750 + }, + { + "epoch": 0.014446319026318098, + "grad_norm": 33143.234375, + "learning_rate": 2.7989999999999998e-05, + "loss": 0.7813, + "step": 2800 + }, + { + "epoch": 0.01470428900893092, + "grad_norm": 32908.71484375, + "learning_rate": 2.849e-05, + "loss": 0.7725, + "step": 2850 + }, + { + "epoch": 0.014962258991543744, + "grad_norm": 36443.578125, + "learning_rate": 2.8990000000000002e-05, + "loss": 0.761, + "step": 2900 + }, + { + "epoch": 0.015220228974156567, + "grad_norm": 32331.728515625, + "learning_rate": 2.949e-05, + "loss": 0.7588, + "step": 2950 + }, + { + "epoch": 0.01547819895676939, + "grad_norm": 33401.546875, + "learning_rate": 2.9990000000000003e-05, + "loss": 0.7462, + "step": 3000 + }, + { + "epoch": 0.015736168939382213, + "grad_norm": 32041.26171875, + "learning_rate": 3.049e-05, + "loss": 0.7449, + "step": 3050 + }, + { + "epoch": 0.015994138921995037, + "grad_norm": 32035.814453125, + "learning_rate": 3.099e-05, + "loss": 0.7373, + "step": 3100 + }, + { + "epoch": 0.01625210890460786, + "grad_norm": 31430.421875, + "learning_rate": 3.1490000000000005e-05, + "loss": 0.7371, + "step": 3150 + }, + { + "epoch": 0.016510078887220683, + "grad_norm": 30911.267578125, + "learning_rate": 3.1990000000000004e-05, + "loss": 0.7315, + "step": 3200 + }, + { + "epoch": 0.016768048869833505, + "grad_norm": 31906.193359375, + "learning_rate": 3.249e-05, + "loss": 0.7405, + "step": 3250 + }, + { + "epoch": 0.01702601885244633, + "grad_norm": 30320.1640625, + "learning_rate": 3.299e-05, + "loss": 0.7323, + "step": 3300 + }, + { + "epoch": 0.017283988835059154, + "grad_norm": 32357.072265625, + "learning_rate": 3.349e-05, + "loss": 0.7244, + "step": 3350 + }, + { + "epoch": 0.017541958817671975, + "grad_norm": 34023.2109375, + "learning_rate": 3.399e-05, + "loss": 0.7214, + "step": 3400 + }, + { + "epoch": 0.0177999288002848, + "grad_norm": 33940.8046875, + "learning_rate": 3.449e-05, + "loss": 0.7158, + "step": 3450 + }, + { + "epoch": 0.01805789878289762, + "grad_norm": 31701.14453125, + "learning_rate": 3.499e-05, + "loss": 0.7102, + "step": 3500 + }, + { + "epoch": 0.018315868765510446, + "grad_norm": 32291.861328125, + "learning_rate": 3.549e-05, + "loss": 0.7104, + "step": 3550 + }, + { + "epoch": 0.018573838748123267, + "grad_norm": 28074.177734375, + "learning_rate": 3.599e-05, + "loss": 0.7001, + "step": 3600 + }, + { + "epoch": 0.01883180873073609, + "grad_norm": 29823.787109375, + "learning_rate": 3.6490000000000005e-05, + "loss": 0.7029, + "step": 3650 + }, + { + "epoch": 0.019089778713348916, + "grad_norm": 29792.24609375, + "learning_rate": 3.699e-05, + "loss": 0.6949, + "step": 3700 + }, + { + "epoch": 0.019347748695961738, + "grad_norm": 31345.296875, + "learning_rate": 3.749e-05, + "loss": 0.6989, + "step": 3750 + }, + { + "epoch": 0.019605718678574562, + "grad_norm": 33923.0625, + "learning_rate": 3.799e-05, + "loss": 0.6984, + "step": 3800 + }, + { + "epoch": 0.019863688661187383, + "grad_norm": 30762.97265625, + "learning_rate": 3.8490000000000006e-05, + "loss": 0.6931, + "step": 3850 + }, + { + "epoch": 0.020121658643800208, + "grad_norm": 30794.13671875, + "learning_rate": 3.8990000000000004e-05, + "loss": 0.6923, + "step": 3900 + }, + { + "epoch": 0.02037962862641303, + "grad_norm": 29854.923828125, + "learning_rate": 3.9489999999999996e-05, + "loss": 0.6895, + "step": 3950 + }, + { + "epoch": 0.020637598609025854, + "grad_norm": 27336.958984375, + "learning_rate": 3.999e-05, + "loss": 0.6853, + "step": 4000 + }, + { + "epoch": 0.020895568591638675, + "grad_norm": 31836.81640625, + "learning_rate": 4.049e-05, + "loss": 0.6821, + "step": 4050 + }, + { + "epoch": 0.0211535385742515, + "grad_norm": 28508.548828125, + "learning_rate": 4.099e-05, + "loss": 0.6857, + "step": 4100 + }, + { + "epoch": 0.021411508556864325, + "grad_norm": 30309.2421875, + "learning_rate": 4.1490000000000004e-05, + "loss": 0.6791, + "step": 4150 + }, + { + "epoch": 0.021669478539477146, + "grad_norm": 31035.0703125, + "learning_rate": 4.199e-05, + "loss": 0.6762, + "step": 4200 + }, + { + "epoch": 0.02192744852208997, + "grad_norm": 30893.951171875, + "learning_rate": 4.249e-05, + "loss": 0.6739, + "step": 4250 + }, + { + "epoch": 0.022185418504702792, + "grad_norm": 28317.12890625, + "learning_rate": 4.299e-05, + "loss": 0.6635, + "step": 4300 + }, + { + "epoch": 0.022443388487315617, + "grad_norm": 27140.29296875, + "learning_rate": 4.3490000000000005e-05, + "loss": 0.6694, + "step": 4350 + }, + { + "epoch": 0.022701358469928438, + "grad_norm": 27948.32421875, + "learning_rate": 4.3990000000000004e-05, + "loss": 0.6667, + "step": 4400 + }, + { + "epoch": 0.022959328452541262, + "grad_norm": 27243.44140625, + "learning_rate": 4.449e-05, + "loss": 0.6689, + "step": 4450 + }, + { + "epoch": 0.023217298435154087, + "grad_norm": 29163.98828125, + "learning_rate": 4.499e-05, + "loss": 0.6639, + "step": 4500 + }, + { + "epoch": 0.02347526841776691, + "grad_norm": 27801.79296875, + "learning_rate": 4.549000000000001e-05, + "loss": 0.6612, + "step": 4550 + }, + { + "epoch": 0.023733238400379733, + "grad_norm": 28201.7265625, + "learning_rate": 4.599e-05, + "loss": 0.6608, + "step": 4600 + }, + { + "epoch": 0.023991208382992554, + "grad_norm": 28875.06640625, + "learning_rate": 4.649e-05, + "loss": 0.6642, + "step": 4650 + }, + { + "epoch": 0.02424917836560538, + "grad_norm": 25467.376953125, + "learning_rate": 4.699e-05, + "loss": 0.6513, + "step": 4700 + }, + { + "epoch": 0.0245071483482182, + "grad_norm": 27359.97265625, + "learning_rate": 4.749e-05, + "loss": 0.6554, + "step": 4750 + }, + { + "epoch": 0.024765118330831025, + "grad_norm": 30614.15234375, + "learning_rate": 4.799e-05, + "loss": 0.6574, + "step": 4800 + }, + { + "epoch": 0.025023088313443846, + "grad_norm": 29069.677734375, + "learning_rate": 4.8490000000000005e-05, + "loss": 0.6562, + "step": 4850 + }, + { + "epoch": 0.02528105829605667, + "grad_norm": 27337.37109375, + "learning_rate": 4.8990000000000004e-05, + "loss": 0.6507, + "step": 4900 + }, + { + "epoch": 0.025539028278669496, + "grad_norm": 26784.7265625, + "learning_rate": 4.949e-05, + "loss": 0.64, + "step": 4950 + }, + { + "epoch": 0.025796998261282317, + "grad_norm": 27480.509765625, + "learning_rate": 4.999e-05, + "loss": 0.6515, + "step": 5000 + }, + { + "epoch": 0.025796998261282317, + "eval_loss": 0.6312834024429321, + "eval_runtime": 3280.995, + "eval_samples_per_second": 945.177, + "eval_steps_per_second": 1.846, + "step": 5000 + }, + { + "epoch": 0.02605496824389514, + "grad_norm": 27871.740234375, + "learning_rate": 5.0490000000000006e-05, + "loss": 0.6424, + "step": 5050 + }, + { + "epoch": 0.026312938226507963, + "grad_norm": 31187.00390625, + "learning_rate": 5.0990000000000005e-05, + "loss": 0.643, + "step": 5100 + }, + { + "epoch": 0.026570908209120787, + "grad_norm": 25956.521484375, + "learning_rate": 5.149e-05, + "loss": 0.65, + "step": 5150 + }, + { + "epoch": 0.02682887819173361, + "grad_norm": 25967.70703125, + "learning_rate": 5.199000000000001e-05, + "loss": 0.6466, + "step": 5200 + }, + { + "epoch": 0.027086848174346433, + "grad_norm": 25310.275390625, + "learning_rate": 5.249000000000001e-05, + "loss": 0.6429, + "step": 5250 + }, + { + "epoch": 0.027344818156959255, + "grad_norm": 24740.033203125, + "learning_rate": 5.2990000000000006e-05, + "loss": 0.6415, + "step": 5300 + }, + { + "epoch": 0.02760278813957208, + "grad_norm": 30795.58984375, + "learning_rate": 5.3490000000000005e-05, + "loss": 0.6424, + "step": 5350 + }, + { + "epoch": 0.027860758122184904, + "grad_norm": 30625.59375, + "learning_rate": 5.399000000000001e-05, + "loss": 0.6361, + "step": 5400 + }, + { + "epoch": 0.028118728104797725, + "grad_norm": 27036.14453125, + "learning_rate": 5.449000000000001e-05, + "loss": 0.6351, + "step": 5450 + }, + { + "epoch": 0.02837669808741055, + "grad_norm": 26934.447265625, + "learning_rate": 5.499000000000001e-05, + "loss": 0.6304, + "step": 5500 + }, + { + "epoch": 0.02863466807002337, + "grad_norm": 25540.291015625, + "learning_rate": 5.549e-05, + "loss": 0.6304, + "step": 5550 + }, + { + "epoch": 0.028892638052636196, + "grad_norm": 26574.9375, + "learning_rate": 5.599e-05, + "loss": 0.6444, + "step": 5600 + }, + { + "epoch": 0.029150608035249017, + "grad_norm": 26941.955078125, + "learning_rate": 5.6489999999999996e-05, + "loss": 0.6373, + "step": 5650 + }, + { + "epoch": 0.02940857801786184, + "grad_norm": 26957.7734375, + "learning_rate": 5.699e-05, + "loss": 0.6363, + "step": 5700 + }, + { + "epoch": 0.029666548000474666, + "grad_norm": 24377.55859375, + "learning_rate": 5.749e-05, + "loss": 0.6213, + "step": 5750 + }, + { + "epoch": 0.029924517983087488, + "grad_norm": 25600.697265625, + "learning_rate": 5.799e-05, + "loss": 0.6362, + "step": 5800 + }, + { + "epoch": 0.030182487965700312, + "grad_norm": 23841.47265625, + "learning_rate": 5.849e-05, + "loss": 0.6274, + "step": 5850 + }, + { + "epoch": 0.030440457948313134, + "grad_norm": 23847.73046875, + "learning_rate": 5.899e-05, + "loss": 0.624, + "step": 5900 + }, + { + "epoch": 0.030698427930925958, + "grad_norm": 25549.033203125, + "learning_rate": 5.949e-05, + "loss": 0.627, + "step": 5950 + }, + { + "epoch": 0.03095639791353878, + "grad_norm": 25286.8046875, + "learning_rate": 5.999e-05, + "loss": 0.6272, + "step": 6000 + }, + { + "epoch": 0.031214367896151604, + "grad_norm": 25137.384765625, + "learning_rate": 6.0490000000000005e-05, + "loss": 0.622, + "step": 6050 + }, + { + "epoch": 0.031472337878764425, + "grad_norm": 23606.23828125, + "learning_rate": 6.0990000000000004e-05, + "loss": 0.6262, + "step": 6100 + }, + { + "epoch": 0.031730307861377254, + "grad_norm": 32101.404296875, + "learning_rate": 6.149000000000001e-05, + "loss": 0.619, + "step": 6150 + }, + { + "epoch": 0.031988277843990075, + "grad_norm": 23683.73046875, + "learning_rate": 6.199000000000001e-05, + "loss": 0.6129, + "step": 6200 + }, + { + "epoch": 0.032246247826602896, + "grad_norm": 25243.49609375, + "learning_rate": 6.249e-05, + "loss": 0.6194, + "step": 6250 + }, + { + "epoch": 0.03250421780921572, + "grad_norm": 28690.10546875, + "learning_rate": 6.299e-05, + "loss": 0.6199, + "step": 6300 + }, + { + "epoch": 0.032762187791828545, + "grad_norm": 24198.47265625, + "learning_rate": 6.349e-05, + "loss": 0.6077, + "step": 6350 + }, + { + "epoch": 0.03302015777444137, + "grad_norm": 24742.998046875, + "learning_rate": 6.399e-05, + "loss": 0.6168, + "step": 6400 + }, + { + "epoch": 0.03327812775705419, + "grad_norm": 27489.93359375, + "learning_rate": 6.449e-05, + "loss": 0.6136, + "step": 6450 + }, + { + "epoch": 0.03353609773966701, + "grad_norm": 28733.7265625, + "learning_rate": 6.499000000000001e-05, + "loss": 0.6184, + "step": 6500 + }, + { + "epoch": 0.03379406772227984, + "grad_norm": 23810.544921875, + "learning_rate": 6.549000000000001e-05, + "loss": 0.6167, + "step": 6550 + }, + { + "epoch": 0.03405203770489266, + "grad_norm": 25503.98828125, + "learning_rate": 6.599000000000001e-05, + "loss": 0.6184, + "step": 6600 + }, + { + "epoch": 0.03431000768750548, + "grad_norm": 24550.26171875, + "learning_rate": 6.649000000000001e-05, + "loss": 0.6146, + "step": 6650 + }, + { + "epoch": 0.03456797767011831, + "grad_norm": 22774.71875, + "learning_rate": 6.699000000000001e-05, + "loss": 0.6132, + "step": 6700 + }, + { + "epoch": 0.03482594765273113, + "grad_norm": 23878.90625, + "learning_rate": 6.749e-05, + "loss": 0.6127, + "step": 6750 + }, + { + "epoch": 0.03508391763534395, + "grad_norm": 28744.9921875, + "learning_rate": 6.799e-05, + "loss": 0.6203, + "step": 6800 + }, + { + "epoch": 0.03534188761795677, + "grad_norm": 24239.826171875, + "learning_rate": 6.849e-05, + "loss": 0.6069, + "step": 6850 + }, + { + "epoch": 0.0355998576005696, + "grad_norm": 27030.513671875, + "learning_rate": 6.899e-05, + "loss": 0.614, + "step": 6900 + }, + { + "epoch": 0.03585782758318242, + "grad_norm": 22872.59375, + "learning_rate": 6.949e-05, + "loss": 0.6068, + "step": 6950 + }, + { + "epoch": 0.03611579756579524, + "grad_norm": 23280.333984375, + "learning_rate": 6.999e-05, + "loss": 0.6064, + "step": 7000 + }, + { + "epoch": 0.03637376754840807, + "grad_norm": 24819.060546875, + "learning_rate": 7.049e-05, + "loss": 0.606, + "step": 7050 + }, + { + "epoch": 0.03663173753102089, + "grad_norm": 23739.595703125, + "learning_rate": 7.099e-05, + "loss": 0.6065, + "step": 7100 + }, + { + "epoch": 0.03688970751363371, + "grad_norm": 24261.28515625, + "learning_rate": 7.149e-05, + "loss": 0.6037, + "step": 7150 + }, + { + "epoch": 0.037147677496246534, + "grad_norm": 24133.744140625, + "learning_rate": 7.199000000000001e-05, + "loss": 0.6097, + "step": 7200 + }, + { + "epoch": 0.03740564747885936, + "grad_norm": 22903.197265625, + "learning_rate": 7.249e-05, + "loss": 0.6048, + "step": 7250 + }, + { + "epoch": 0.03766361746147218, + "grad_norm": 23503.970703125, + "learning_rate": 7.299e-05, + "loss": 0.6039, + "step": 7300 + }, + { + "epoch": 0.037921587444085005, + "grad_norm": 20935.388671875, + "learning_rate": 7.349e-05, + "loss": 0.6016, + "step": 7350 + }, + { + "epoch": 0.03817955742669783, + "grad_norm": 22991.720703125, + "learning_rate": 7.399e-05, + "loss": 0.6111, + "step": 7400 + }, + { + "epoch": 0.038437527409310654, + "grad_norm": 21915.90234375, + "learning_rate": 7.449e-05, + "loss": 0.5969, + "step": 7450 + }, + { + "epoch": 0.038695497391923475, + "grad_norm": 22474.25390625, + "learning_rate": 7.499e-05, + "loss": 0.6068, + "step": 7500 + }, + { + "epoch": 0.038953467374536296, + "grad_norm": 24122.150390625, + "learning_rate": 7.549000000000001e-05, + "loss": 0.6037, + "step": 7550 + }, + { + "epoch": 0.039211437357149125, + "grad_norm": 22262.220703125, + "learning_rate": 7.599000000000001e-05, + "loss": 0.5946, + "step": 7600 + }, + { + "epoch": 0.039469407339761946, + "grad_norm": 23959.7265625, + "learning_rate": 7.649000000000001e-05, + "loss": 0.598, + "step": 7650 + }, + { + "epoch": 0.03972737732237477, + "grad_norm": 21918.5859375, + "learning_rate": 7.699e-05, + "loss": 0.5959, + "step": 7700 + }, + { + "epoch": 0.03998534730498759, + "grad_norm": 23740.5390625, + "learning_rate": 7.749e-05, + "loss": 0.594, + "step": 7750 + }, + { + "epoch": 0.040243317287600416, + "grad_norm": 23406.4296875, + "learning_rate": 7.799e-05, + "loss": 0.6048, + "step": 7800 + }, + { + "epoch": 0.04050128727021324, + "grad_norm": 23423.201171875, + "learning_rate": 7.849e-05, + "loss": 0.5944, + "step": 7850 + }, + { + "epoch": 0.04075925725282606, + "grad_norm": 23187.76171875, + "learning_rate": 7.899000000000001e-05, + "loss": 0.5944, + "step": 7900 + }, + { + "epoch": 0.04101722723543889, + "grad_norm": 25532.4375, + "learning_rate": 7.949000000000001e-05, + "loss": 0.5978, + "step": 7950 + }, + { + "epoch": 0.04127519721805171, + "grad_norm": 23045.28515625, + "learning_rate": 7.999000000000001e-05, + "loss": 0.5968, + "step": 8000 + }, + { + "epoch": 0.04153316720066453, + "grad_norm": 22853.826171875, + "learning_rate": 8.049e-05, + "loss": 0.5915, + "step": 8050 + }, + { + "epoch": 0.04179113718327735, + "grad_norm": 21853.658203125, + "learning_rate": 8.099e-05, + "loss": 0.5932, + "step": 8100 + }, + { + "epoch": 0.04204910716589018, + "grad_norm": 22395.74609375, + "learning_rate": 8.149e-05, + "loss": 0.5925, + "step": 8150 + }, + { + "epoch": 0.042307077148503, + "grad_norm": 23933.40625, + "learning_rate": 8.199e-05, + "loss": 0.5878, + "step": 8200 + }, + { + "epoch": 0.04256504713111582, + "grad_norm": 21773.087890625, + "learning_rate": 8.249e-05, + "loss": 0.5916, + "step": 8250 + }, + { + "epoch": 0.04282301711372865, + "grad_norm": 22665.11328125, + "learning_rate": 8.299e-05, + "loss": 0.5906, + "step": 8300 + }, + { + "epoch": 0.04308098709634147, + "grad_norm": 22157.091796875, + "learning_rate": 8.349e-05, + "loss": 0.5873, + "step": 8350 + }, + { + "epoch": 0.04333895707895429, + "grad_norm": 21506.8125, + "learning_rate": 8.399e-05, + "loss": 0.5927, + "step": 8400 + }, + { + "epoch": 0.04359692706156711, + "grad_norm": 22143.341796875, + "learning_rate": 8.449e-05, + "loss": 0.5828, + "step": 8450 + }, + { + "epoch": 0.04385489704417994, + "grad_norm": 23341.23828125, + "learning_rate": 8.499e-05, + "loss": 0.5885, + "step": 8500 + }, + { + "epoch": 0.04411286702679276, + "grad_norm": 21876.96484375, + "learning_rate": 8.549000000000001e-05, + "loss": 0.5913, + "step": 8550 + }, + { + "epoch": 0.044370837009405584, + "grad_norm": 22307.29296875, + "learning_rate": 8.599000000000001e-05, + "loss": 0.583, + "step": 8600 + }, + { + "epoch": 0.04462880699201841, + "grad_norm": 22859.017578125, + "learning_rate": 8.649000000000001e-05, + "loss": 0.5889, + "step": 8650 + }, + { + "epoch": 0.04488677697463123, + "grad_norm": 22058.24609375, + "learning_rate": 8.699e-05, + "loss": 0.5848, + "step": 8700 + }, + { + "epoch": 0.045144746957244054, + "grad_norm": 22116.837890625, + "learning_rate": 8.749e-05, + "loss": 0.5858, + "step": 8750 + }, + { + "epoch": 0.045402716939856876, + "grad_norm": 23110.17578125, + "learning_rate": 8.799e-05, + "loss": 0.5855, + "step": 8800 + }, + { + "epoch": 0.045660686922469704, + "grad_norm": 24173.064453125, + "learning_rate": 8.849e-05, + "loss": 0.5878, + "step": 8850 + }, + { + "epoch": 0.045918656905082525, + "grad_norm": 21521.48046875, + "learning_rate": 8.899e-05, + "loss": 0.5914, + "step": 8900 + }, + { + "epoch": 0.046176626887695346, + "grad_norm": 24516.0, + "learning_rate": 8.949000000000001e-05, + "loss": 0.5849, + "step": 8950 + }, + { + "epoch": 0.046434596870308174, + "grad_norm": 22074.9609375, + "learning_rate": 8.999000000000001e-05, + "loss": 0.5848, + "step": 9000 + }, + { + "epoch": 0.046692566852920996, + "grad_norm": 21495.4140625, + "learning_rate": 9.049000000000001e-05, + "loss": 0.579, + "step": 9050 + }, + { + "epoch": 0.04695053683553382, + "grad_norm": 23548.224609375, + "learning_rate": 9.099000000000001e-05, + "loss": 0.5826, + "step": 9100 + }, + { + "epoch": 0.04720850681814664, + "grad_norm": 22144.51953125, + "learning_rate": 9.149e-05, + "loss": 0.5879, + "step": 9150 + }, + { + "epoch": 0.047466476800759466, + "grad_norm": 20656.185546875, + "learning_rate": 9.199e-05, + "loss": 0.5806, + "step": 9200 + }, + { + "epoch": 0.04772444678337229, + "grad_norm": 21228.814453125, + "learning_rate": 9.249e-05, + "loss": 0.5858, + "step": 9250 + }, + { + "epoch": 0.04798241676598511, + "grad_norm": 20801.869140625, + "learning_rate": 9.299e-05, + "loss": 0.5816, + "step": 9300 + }, + { + "epoch": 0.04824038674859793, + "grad_norm": 24044.283203125, + "learning_rate": 9.349e-05, + "loss": 0.5811, + "step": 9350 + }, + { + "epoch": 0.04849835673121076, + "grad_norm": 22395.47265625, + "learning_rate": 9.399e-05, + "loss": 0.5782, + "step": 9400 + }, + { + "epoch": 0.04875632671382358, + "grad_norm": 22353.078125, + "learning_rate": 9.449e-05, + "loss": 0.5758, + "step": 9450 + }, + { + "epoch": 0.0490142966964364, + "grad_norm": 22520.72265625, + "learning_rate": 9.499e-05, + "loss": 0.5752, + "step": 9500 + }, + { + "epoch": 0.04927226667904923, + "grad_norm": 22016.951171875, + "learning_rate": 9.549e-05, + "loss": 0.5764, + "step": 9550 + }, + { + "epoch": 0.04953023666166205, + "grad_norm": 20046.615234375, + "learning_rate": 9.599000000000001e-05, + "loss": 0.5759, + "step": 9600 + }, + { + "epoch": 0.04978820664427487, + "grad_norm": 21346.029296875, + "learning_rate": 9.649e-05, + "loss": 0.5798, + "step": 9650 + }, + { + "epoch": 0.05004617662688769, + "grad_norm": 22449.796875, + "learning_rate": 9.699e-05, + "loss": 0.5829, + "step": 9700 + }, + { + "epoch": 0.05030414660950052, + "grad_norm": 20538.751953125, + "learning_rate": 9.749e-05, + "loss": 0.5809, + "step": 9750 + }, + { + "epoch": 0.05056211659211334, + "grad_norm": 21123.19921875, + "learning_rate": 9.799e-05, + "loss": 0.5726, + "step": 9800 + }, + { + "epoch": 0.05082008657472616, + "grad_norm": 20853.08203125, + "learning_rate": 9.849e-05, + "loss": 0.5726, + "step": 9850 + }, + { + "epoch": 0.05107805655733899, + "grad_norm": 22160.841796875, + "learning_rate": 9.899e-05, + "loss": 0.5783, + "step": 9900 + }, + { + "epoch": 0.05133602653995181, + "grad_norm": 19711.109375, + "learning_rate": 9.949000000000001e-05, + "loss": 0.5722, + "step": 9950 + }, + { + "epoch": 0.051593996522564634, + "grad_norm": 21442.310546875, + "learning_rate": 9.999000000000001e-05, + "loss": 0.5773, + "step": 10000 + }, + { + "epoch": 0.051593996522564634, + "eval_loss": 0.5661358833312988, + "eval_runtime": 3272.6524, + "eval_samples_per_second": 947.586, + "eval_steps_per_second": 1.851, + "step": 10000 + }, + { + "epoch": 0.051851966505177455, + "grad_norm": 21442.943359375, + "learning_rate": 9.999998718392692e-05, + "loss": 0.5727, + "step": 10050 + }, + { + "epoch": 0.05210993648779028, + "grad_norm": 21711.177734375, + "learning_rate": 9.999994768416664e-05, + "loss": 0.5707, + "step": 10100 + }, + { + "epoch": 0.052367906470403104, + "grad_norm": 21793.666015625, + "learning_rate": 9.999988149540251e-05, + "loss": 0.5727, + "step": 10150 + }, + { + "epoch": 0.052625876453015925, + "grad_norm": 18847.970703125, + "learning_rate": 9.999978861766983e-05, + "loss": 0.5726, + "step": 10200 + }, + { + "epoch": 0.052883846435628754, + "grad_norm": 22870.91796875, + "learning_rate": 9.999966905101816e-05, + "loss": 0.5751, + "step": 10250 + }, + { + "epoch": 0.053141816418241575, + "grad_norm": 23970.431640625, + "learning_rate": 9.999952279551135e-05, + "loss": 0.5745, + "step": 10300 + }, + { + "epoch": 0.053399786400854396, + "grad_norm": 19482.65625, + "learning_rate": 9.999934985122746e-05, + "loss": 0.5734, + "step": 10350 + }, + { + "epoch": 0.05365775638346722, + "grad_norm": 19720.65625, + "learning_rate": 9.999915021825879e-05, + "loss": 0.5697, + "step": 10400 + }, + { + "epoch": 0.053915726366080045, + "grad_norm": 21484.8203125, + "learning_rate": 9.99989238967119e-05, + "loss": 0.5678, + "step": 10450 + }, + { + "epoch": 0.05417369634869287, + "grad_norm": 20198.669921875, + "learning_rate": 9.999867088670762e-05, + "loss": 0.5731, + "step": 10500 + }, + { + "epoch": 0.05443166633130569, + "grad_norm": 19887.86328125, + "learning_rate": 9.999839118838099e-05, + "loss": 0.5711, + "step": 10550 + }, + { + "epoch": 0.05468963631391851, + "grad_norm": 21250.41796875, + "learning_rate": 9.999808480188131e-05, + "loss": 0.5653, + "step": 10600 + }, + { + "epoch": 0.05494760629653134, + "grad_norm": 21179.904296875, + "learning_rate": 9.999775172737211e-05, + "loss": 0.5666, + "step": 10650 + }, + { + "epoch": 0.05520557627914416, + "grad_norm": 21106.083984375, + "learning_rate": 9.999739196503119e-05, + "loss": 0.5656, + "step": 10700 + }, + { + "epoch": 0.05546354626175698, + "grad_norm": 19393.994140625, + "learning_rate": 9.999700551505057e-05, + "loss": 0.566, + "step": 10750 + }, + { + "epoch": 0.05572151624436981, + "grad_norm": 22788.060546875, + "learning_rate": 9.999659237763656e-05, + "loss": 0.5681, + "step": 10800 + }, + { + "epoch": 0.05597948622698263, + "grad_norm": 20106.75390625, + "learning_rate": 9.999615255300966e-05, + "loss": 0.5668, + "step": 10850 + }, + { + "epoch": 0.05623745620959545, + "grad_norm": 22390.466796875, + "learning_rate": 9.999568604140464e-05, + "loss": 0.5665, + "step": 10900 + }, + { + "epoch": 0.05649542619220827, + "grad_norm": 21145.044921875, + "learning_rate": 9.999519284307053e-05, + "loss": 0.5645, + "step": 10950 + }, + { + "epoch": 0.0567533961748211, + "grad_norm": 22501.64453125, + "learning_rate": 9.999467295827059e-05, + "loss": 0.5663, + "step": 11000 + }, + { + "epoch": 0.05701136615743392, + "grad_norm": 21079.431640625, + "learning_rate": 9.999412638728229e-05, + "loss": 0.5605, + "step": 11050 + }, + { + "epoch": 0.05726933614004674, + "grad_norm": 21501.4375, + "learning_rate": 9.999355313039742e-05, + "loss": 0.5643, + "step": 11100 + }, + { + "epoch": 0.05752730612265957, + "grad_norm": 22092.6328125, + "learning_rate": 9.999295318792194e-05, + "loss": 0.5602, + "step": 11150 + }, + { + "epoch": 0.05778527610527239, + "grad_norm": 19948.81640625, + "learning_rate": 9.999232656017613e-05, + "loss": 0.5649, + "step": 11200 + }, + { + "epoch": 0.05804324608788521, + "grad_norm": 20543.5859375, + "learning_rate": 9.999167324749443e-05, + "loss": 0.5598, + "step": 11250 + }, + { + "epoch": 0.058301216070498034, + "grad_norm": 20948.060546875, + "learning_rate": 9.99909932502256e-05, + "loss": 0.5631, + "step": 11300 + }, + { + "epoch": 0.05855918605311086, + "grad_norm": 20384.732421875, + "learning_rate": 9.999028656873257e-05, + "loss": 0.5592, + "step": 11350 + }, + { + "epoch": 0.05881715603572368, + "grad_norm": 20027.615234375, + "learning_rate": 9.99895532033926e-05, + "loss": 0.5658, + "step": 11400 + }, + { + "epoch": 0.059075126018336505, + "grad_norm": 20702.263671875, + "learning_rate": 9.99887931545971e-05, + "loss": 0.56, + "step": 11450 + }, + { + "epoch": 0.05933309600094933, + "grad_norm": 21589.52734375, + "learning_rate": 9.99880064227518e-05, + "loss": 0.5595, + "step": 11500 + }, + { + "epoch": 0.059591065983562154, + "grad_norm": 20375.181640625, + "learning_rate": 9.998719300827663e-05, + "loss": 0.5627, + "step": 11550 + }, + { + "epoch": 0.059849035966174975, + "grad_norm": 20207.677734375, + "learning_rate": 9.998635291160577e-05, + "loss": 0.5615, + "step": 11600 + }, + { + "epoch": 0.060107005948787796, + "grad_norm": 20898.291015625, + "learning_rate": 9.998548613318767e-05, + "loss": 0.5594, + "step": 11650 + }, + { + "epoch": 0.060364975931400625, + "grad_norm": 20133.822265625, + "learning_rate": 9.998459267348497e-05, + "loss": 0.5631, + "step": 11700 + }, + { + "epoch": 0.060622945914013446, + "grad_norm": 19021.533203125, + "learning_rate": 9.99836725329746e-05, + "loss": 0.5576, + "step": 11750 + }, + { + "epoch": 0.06088091589662627, + "grad_norm": 19088.32421875, + "learning_rate": 9.998272571214772e-05, + "loss": 0.5619, + "step": 11800 + }, + { + "epoch": 0.061138885879239095, + "grad_norm": 19742.841796875, + "learning_rate": 9.99817522115097e-05, + "loss": 0.5626, + "step": 11850 + }, + { + "epoch": 0.061396855861851916, + "grad_norm": 21584.271484375, + "learning_rate": 9.99807520315802e-05, + "loss": 0.555, + "step": 11900 + }, + { + "epoch": 0.06165482584446474, + "grad_norm": 19766.76953125, + "learning_rate": 9.997972517289309e-05, + "loss": 0.5584, + "step": 11950 + }, + { + "epoch": 0.06191279582707756, + "grad_norm": 19821.556640625, + "learning_rate": 9.997867163599646e-05, + "loss": 0.5623, + "step": 12000 + }, + { + "epoch": 0.06217076580969039, + "grad_norm": 19488.490234375, + "learning_rate": 9.997759142145271e-05, + "loss": 0.5591, + "step": 12050 + }, + { + "epoch": 0.06242873579230321, + "grad_norm": 20093.806640625, + "learning_rate": 9.997648452983842e-05, + "loss": 0.5597, + "step": 12100 + }, + { + "epoch": 0.06268670577491603, + "grad_norm": 20202.154296875, + "learning_rate": 9.997535096174441e-05, + "loss": 0.5542, + "step": 12150 + }, + { + "epoch": 0.06294467575752885, + "grad_norm": 19978.154296875, + "learning_rate": 9.99741907177758e-05, + "loss": 0.5629, + "step": 12200 + }, + { + "epoch": 0.06320264574014167, + "grad_norm": 19697.005859375, + "learning_rate": 9.997300379855186e-05, + "loss": 0.5571, + "step": 12250 + }, + { + "epoch": 0.06346061572275451, + "grad_norm": 20384.287109375, + "learning_rate": 9.997179020470618e-05, + "loss": 0.5526, + "step": 12300 + }, + { + "epoch": 0.06371858570536733, + "grad_norm": 18652.044921875, + "learning_rate": 9.997054993688651e-05, + "loss": 0.5531, + "step": 12350 + }, + { + "epoch": 0.06397655568798015, + "grad_norm": 20133.990234375, + "learning_rate": 9.996928299575493e-05, + "loss": 0.5561, + "step": 12400 + }, + { + "epoch": 0.06423452567059297, + "grad_norm": 20575.875, + "learning_rate": 9.996798938198766e-05, + "loss": 0.5559, + "step": 12450 + }, + { + "epoch": 0.06449249565320579, + "grad_norm": 19524.828125, + "learning_rate": 9.996666909627525e-05, + "loss": 0.5437, + "step": 12500 + }, + { + "epoch": 0.06475046563581861, + "grad_norm": 22106.927734375, + "learning_rate": 9.996532213932242e-05, + "loss": 0.5691, + "step": 12550 + }, + { + "epoch": 0.06500843561843143, + "grad_norm": 18443.4609375, + "learning_rate": 9.996394851184814e-05, + "loss": 0.553, + "step": 12600 + }, + { + "epoch": 0.06526640560104426, + "grad_norm": 21786.943359375, + "learning_rate": 9.996254821458565e-05, + "loss": 0.562, + "step": 12650 + }, + { + "epoch": 0.06552437558365709, + "grad_norm": 22699.578125, + "learning_rate": 9.996112124828241e-05, + "loss": 0.5526, + "step": 12700 + }, + { + "epoch": 0.06578234556626991, + "grad_norm": 18522.822265625, + "learning_rate": 9.995966761370006e-05, + "loss": 0.5525, + "step": 12750 + }, + { + "epoch": 0.06604031554888273, + "grad_norm": 19723.44140625, + "learning_rate": 9.995818731161458e-05, + "loss": 0.5555, + "step": 12800 + }, + { + "epoch": 0.06629828553149555, + "grad_norm": 20643.173828125, + "learning_rate": 9.995668034281606e-05, + "loss": 0.5506, + "step": 12850 + }, + { + "epoch": 0.06655625551410838, + "grad_norm": 19303.68359375, + "learning_rate": 9.995514670810896e-05, + "loss": 0.5599, + "step": 12900 + }, + { + "epoch": 0.0668142254967212, + "grad_norm": 19837.240234375, + "learning_rate": 9.995358640831187e-05, + "loss": 0.5514, + "step": 12950 + }, + { + "epoch": 0.06707219547933402, + "grad_norm": 19212.25390625, + "learning_rate": 9.995199944425764e-05, + "loss": 0.5542, + "step": 13000 + }, + { + "epoch": 0.06733016546194685, + "grad_norm": 19908.70703125, + "learning_rate": 9.995038581679337e-05, + "loss": 0.5421, + "step": 13050 + }, + { + "epoch": 0.06758813544455967, + "grad_norm": 18933.306640625, + "learning_rate": 9.994874552678038e-05, + "loss": 0.549, + "step": 13100 + }, + { + "epoch": 0.0678461054271725, + "grad_norm": 19313.990234375, + "learning_rate": 9.994707857509422e-05, + "loss": 0.5569, + "step": 13150 + }, + { + "epoch": 0.06810407540978532, + "grad_norm": 20800.984375, + "learning_rate": 9.99453849626247e-05, + "loss": 0.5518, + "step": 13200 + }, + { + "epoch": 0.06836204539239814, + "grad_norm": 18623.361328125, + "learning_rate": 9.994366469027583e-05, + "loss": 0.5549, + "step": 13250 + }, + { + "epoch": 0.06862001537501096, + "grad_norm": 19761.654296875, + "learning_rate": 9.994191775896584e-05, + "loss": 0.5467, + "step": 13300 + }, + { + "epoch": 0.06887798535762378, + "grad_norm": 20618.501953125, + "learning_rate": 9.994014416962723e-05, + "loss": 0.5554, + "step": 13350 + }, + { + "epoch": 0.06913595534023662, + "grad_norm": 19279.791015625, + "learning_rate": 9.993834392320668e-05, + "loss": 0.5567, + "step": 13400 + }, + { + "epoch": 0.06939392532284944, + "grad_norm": 18802.34375, + "learning_rate": 9.993651702066516e-05, + "loss": 0.5608, + "step": 13450 + }, + { + "epoch": 0.06965189530546226, + "grad_norm": 20132.15625, + "learning_rate": 9.993466346297779e-05, + "loss": 0.547, + "step": 13500 + }, + { + "epoch": 0.06990986528807508, + "grad_norm": 19165.26171875, + "learning_rate": 9.993278325113403e-05, + "loss": 0.5485, + "step": 13550 + }, + { + "epoch": 0.0701678352706879, + "grad_norm": 18493.01171875, + "learning_rate": 9.993087638613743e-05, + "loss": 0.5455, + "step": 13600 + }, + { + "epoch": 0.07042580525330072, + "grad_norm": 18225.78125, + "learning_rate": 9.992894286900589e-05, + "loss": 0.5499, + "step": 13650 + }, + { + "epoch": 0.07068377523591354, + "grad_norm": 20189.802734375, + "learning_rate": 9.992698270077146e-05, + "loss": 0.5468, + "step": 13700 + }, + { + "epoch": 0.07094174521852638, + "grad_norm": 20861.2734375, + "learning_rate": 9.992499588248043e-05, + "loss": 0.5588, + "step": 13750 + }, + { + "epoch": 0.0711997152011392, + "grad_norm": 19876.689453125, + "learning_rate": 9.992298241519335e-05, + "loss": 0.5486, + "step": 13800 + }, + { + "epoch": 0.07145768518375202, + "grad_norm": 18371.142578125, + "learning_rate": 9.992094229998497e-05, + "loss": 0.5475, + "step": 13850 + }, + { + "epoch": 0.07171565516636484, + "grad_norm": 18274.396484375, + "learning_rate": 9.991887553794423e-05, + "loss": 0.549, + "step": 13900 + }, + { + "epoch": 0.07197362514897766, + "grad_norm": 18204.947265625, + "learning_rate": 9.991678213017437e-05, + "loss": 0.5419, + "step": 13950 + }, + { + "epoch": 0.07223159513159048, + "grad_norm": 18634.162109375, + "learning_rate": 9.991466207779278e-05, + "loss": 0.5528, + "step": 14000 + }, + { + "epoch": 0.0724895651142033, + "grad_norm": 21840.685546875, + "learning_rate": 9.991251538193112e-05, + "loss": 0.5492, + "step": 14050 + }, + { + "epoch": 0.07274753509681614, + "grad_norm": 18888.935546875, + "learning_rate": 9.991034204373524e-05, + "loss": 0.5504, + "step": 14100 + }, + { + "epoch": 0.07300550507942896, + "grad_norm": 19353.263671875, + "learning_rate": 9.990814206436524e-05, + "loss": 0.5425, + "step": 14150 + }, + { + "epoch": 0.07326347506204178, + "grad_norm": 18891.79296875, + "learning_rate": 9.990591544499543e-05, + "loss": 0.551, + "step": 14200 + }, + { + "epoch": 0.0735214450446546, + "grad_norm": 17878.33203125, + "learning_rate": 9.99036621868143e-05, + "loss": 0.5403, + "step": 14250 + }, + { + "epoch": 0.07377941502726743, + "grad_norm": 18997.544921875, + "learning_rate": 9.990138229102465e-05, + "loss": 0.5458, + "step": 14300 + }, + { + "epoch": 0.07403738500988025, + "grad_norm": 22162.03125, + "learning_rate": 9.989907575884341e-05, + "loss": 0.5482, + "step": 14350 + }, + { + "epoch": 0.07429535499249307, + "grad_norm": 17026.828125, + "learning_rate": 9.989674259150177e-05, + "loss": 0.5487, + "step": 14400 + }, + { + "epoch": 0.0745533249751059, + "grad_norm": 18335.169921875, + "learning_rate": 9.989438279024513e-05, + "loss": 0.5459, + "step": 14450 + }, + { + "epoch": 0.07481129495771872, + "grad_norm": 19508.666015625, + "learning_rate": 9.989199635633309e-05, + "loss": 0.5456, + "step": 14500 + }, + { + "epoch": 0.07506926494033155, + "grad_norm": 20281.28515625, + "learning_rate": 9.98895832910395e-05, + "loss": 0.5455, + "step": 14550 + }, + { + "epoch": 0.07532723492294437, + "grad_norm": 20196.259765625, + "learning_rate": 9.98871435956524e-05, + "loss": 0.5474, + "step": 14600 + }, + { + "epoch": 0.07558520490555719, + "grad_norm": 18934.544921875, + "learning_rate": 9.988467727147409e-05, + "loss": 0.546, + "step": 14650 + }, + { + "epoch": 0.07584317488817001, + "grad_norm": 20257.126953125, + "learning_rate": 9.988218431982098e-05, + "loss": 0.5443, + "step": 14700 + }, + { + "epoch": 0.07610114487078283, + "grad_norm": 20330.86328125, + "learning_rate": 9.98796647420238e-05, + "loss": 0.5423, + "step": 14750 + }, + { + "epoch": 0.07635911485339567, + "grad_norm": 19077.765625, + "learning_rate": 9.987711853942745e-05, + "loss": 0.5446, + "step": 14800 + }, + { + "epoch": 0.07661708483600849, + "grad_norm": 20855.169921875, + "learning_rate": 9.987454571339103e-05, + "loss": 0.5427, + "step": 14850 + }, + { + "epoch": 0.07687505481862131, + "grad_norm": 20556.005859375, + "learning_rate": 9.987194626528788e-05, + "loss": 0.5417, + "step": 14900 + }, + { + "epoch": 0.07713302480123413, + "grad_norm": 19028.7421875, + "learning_rate": 9.986932019650553e-05, + "loss": 0.5412, + "step": 14950 + }, + { + "epoch": 0.07739099478384695, + "grad_norm": 18669.166015625, + "learning_rate": 9.986666750844572e-05, + "loss": 0.5404, + "step": 15000 + }, + { + "epoch": 0.07739099478384695, + "eval_loss": 0.5350670218467712, + "eval_runtime": 3217.7876, + "eval_samples_per_second": 963.743, + "eval_steps_per_second": 1.882, + "step": 15000 + }, + { + "epoch": 0.07764896476645977, + "grad_norm": 19965.779296875, + "learning_rate": 9.98639882025244e-05, + "loss": 0.5439, + "step": 15050 + }, + { + "epoch": 0.07790693474907259, + "grad_norm": 18329.9921875, + "learning_rate": 9.986128228017173e-05, + "loss": 0.5425, + "step": 15100 + }, + { + "epoch": 0.07816490473168543, + "grad_norm": 20102.005859375, + "learning_rate": 9.985854974283211e-05, + "loss": 0.5444, + "step": 15150 + }, + { + "epoch": 0.07842287471429825, + "grad_norm": 19234.671875, + "learning_rate": 9.985579059196406e-05, + "loss": 0.5443, + "step": 15200 + }, + { + "epoch": 0.07868084469691107, + "grad_norm": 18324.298828125, + "learning_rate": 9.985300482904041e-05, + "loss": 0.5419, + "step": 15250 + }, + { + "epoch": 0.07893881467952389, + "grad_norm": 18766.2734375, + "learning_rate": 9.985019245554814e-05, + "loss": 0.5412, + "step": 15300 + }, + { + "epoch": 0.07919678466213671, + "grad_norm": 18805.765625, + "learning_rate": 9.984735347298841e-05, + "loss": 0.5443, + "step": 15350 + }, + { + "epoch": 0.07945475464474953, + "grad_norm": 17677.30078125, + "learning_rate": 9.984448788287665e-05, + "loss": 0.5421, + "step": 15400 + }, + { + "epoch": 0.07971272462736236, + "grad_norm": 19851.3515625, + "learning_rate": 9.984159568674243e-05, + "loss": 0.5426, + "step": 15450 + }, + { + "epoch": 0.07997069460997518, + "grad_norm": 18453.05859375, + "learning_rate": 9.983867688612956e-05, + "loss": 0.5445, + "step": 15500 + }, + { + "epoch": 0.08022866459258801, + "grad_norm": 17366.869140625, + "learning_rate": 9.983573148259603e-05, + "loss": 0.5451, + "step": 15550 + }, + { + "epoch": 0.08048663457520083, + "grad_norm": 18628.716796875, + "learning_rate": 9.983275947771407e-05, + "loss": 0.5373, + "step": 15600 + }, + { + "epoch": 0.08074460455781365, + "grad_norm": 19403.87890625, + "learning_rate": 9.982976087307003e-05, + "loss": 0.5489, + "step": 15650 + }, + { + "epoch": 0.08100257454042648, + "grad_norm": 18485.71875, + "learning_rate": 9.982673567026455e-05, + "loss": 0.538, + "step": 15700 + }, + { + "epoch": 0.0812605445230393, + "grad_norm": 19837.1796875, + "learning_rate": 9.982368387091241e-05, + "loss": 0.5356, + "step": 15750 + }, + { + "epoch": 0.08151851450565212, + "grad_norm": 19505.34375, + "learning_rate": 9.982060547664258e-05, + "loss": 0.5356, + "step": 15800 + }, + { + "epoch": 0.08177648448826494, + "grad_norm": 18645.48828125, + "learning_rate": 9.981750048909828e-05, + "loss": 0.5381, + "step": 15850 + }, + { + "epoch": 0.08203445447087777, + "grad_norm": 20191.73828125, + "learning_rate": 9.981436890993689e-05, + "loss": 0.535, + "step": 15900 + }, + { + "epoch": 0.0822924244534906, + "grad_norm": 18908.15625, + "learning_rate": 9.981121074082995e-05, + "loss": 0.5405, + "step": 15950 + }, + { + "epoch": 0.08255039443610342, + "grad_norm": 19517.73828125, + "learning_rate": 9.980802598346326e-05, + "loss": 0.5407, + "step": 16000 + }, + { + "epoch": 0.08280836441871624, + "grad_norm": 18368.16015625, + "learning_rate": 9.980481463953679e-05, + "loss": 0.5391, + "step": 16050 + }, + { + "epoch": 0.08306633440132906, + "grad_norm": 19727.35546875, + "learning_rate": 9.980157671076466e-05, + "loss": 0.537, + "step": 16100 + }, + { + "epoch": 0.08332430438394188, + "grad_norm": 20757.890625, + "learning_rate": 9.979831219887525e-05, + "loss": 0.5408, + "step": 16150 + }, + { + "epoch": 0.0835822743665547, + "grad_norm": 19334.708984375, + "learning_rate": 9.979502110561108e-05, + "loss": 0.5371, + "step": 16200 + }, + { + "epoch": 0.08384024434916754, + "grad_norm": 19338.498046875, + "learning_rate": 9.979170343272886e-05, + "loss": 0.531, + "step": 16250 + }, + { + "epoch": 0.08409821433178036, + "grad_norm": 18722.365234375, + "learning_rate": 9.978835918199949e-05, + "loss": 0.5398, + "step": 16300 + }, + { + "epoch": 0.08435618431439318, + "grad_norm": 18026.109375, + "learning_rate": 9.97849883552081e-05, + "loss": 0.5423, + "step": 16350 + }, + { + "epoch": 0.084614154297006, + "grad_norm": 19646.78125, + "learning_rate": 9.978159095415396e-05, + "loss": 0.5387, + "step": 16400 + }, + { + "epoch": 0.08487212427961882, + "grad_norm": 20091.552734375, + "learning_rate": 9.977816698065052e-05, + "loss": 0.5376, + "step": 16450 + }, + { + "epoch": 0.08513009426223164, + "grad_norm": 20539.73046875, + "learning_rate": 9.977471643652546e-05, + "loss": 0.5333, + "step": 16500 + }, + { + "epoch": 0.08538806424484446, + "grad_norm": 18306.24609375, + "learning_rate": 9.977123932362059e-05, + "loss": 0.5405, + "step": 16550 + }, + { + "epoch": 0.0856460342274573, + "grad_norm": 20133.513671875, + "learning_rate": 9.976773564379193e-05, + "loss": 0.541, + "step": 16600 + }, + { + "epoch": 0.08590400421007012, + "grad_norm": 19533.50390625, + "learning_rate": 9.976420539890969e-05, + "loss": 0.5333, + "step": 16650 + }, + { + "epoch": 0.08616197419268294, + "grad_norm": 19509.087890625, + "learning_rate": 9.976064859085822e-05, + "loss": 0.5347, + "step": 16700 + }, + { + "epoch": 0.08641994417529576, + "grad_norm": 19590.818359375, + "learning_rate": 9.97570652215361e-05, + "loss": 0.5377, + "step": 16750 + }, + { + "epoch": 0.08667791415790858, + "grad_norm": 19510.705078125, + "learning_rate": 9.975345529285605e-05, + "loss": 0.5367, + "step": 16800 + }, + { + "epoch": 0.0869358841405214, + "grad_norm": 20015.8046875, + "learning_rate": 9.974981880674499e-05, + "loss": 0.5386, + "step": 16850 + }, + { + "epoch": 0.08719385412313423, + "grad_norm": 18704.03125, + "learning_rate": 9.974615576514399e-05, + "loss": 0.5361, + "step": 16900 + }, + { + "epoch": 0.08745182410574706, + "grad_norm": 18257.869140625, + "learning_rate": 9.974246617000832e-05, + "loss": 0.5304, + "step": 16950 + }, + { + "epoch": 0.08770979408835988, + "grad_norm": 18150.517578125, + "learning_rate": 9.973875002330743e-05, + "loss": 0.5289, + "step": 17000 + }, + { + "epoch": 0.0879677640709727, + "grad_norm": 18326.041015625, + "learning_rate": 9.97350073270249e-05, + "loss": 0.5347, + "step": 17050 + }, + { + "epoch": 0.08822573405358553, + "grad_norm": 18199.224609375, + "learning_rate": 9.973123808315852e-05, + "loss": 0.5269, + "step": 17100 + }, + { + "epoch": 0.08848370403619835, + "grad_norm": 20351.447265625, + "learning_rate": 9.972744229372025e-05, + "loss": 0.5334, + "step": 17150 + }, + { + "epoch": 0.08874167401881117, + "grad_norm": 19200.703125, + "learning_rate": 9.97236199607362e-05, + "loss": 0.5316, + "step": 17200 + }, + { + "epoch": 0.08899964400142399, + "grad_norm": 18855.7890625, + "learning_rate": 9.971977108624664e-05, + "loss": 0.5342, + "step": 17250 + }, + { + "epoch": 0.08925761398403682, + "grad_norm": 18889.56640625, + "learning_rate": 9.971589567230606e-05, + "loss": 0.5361, + "step": 17300 + }, + { + "epoch": 0.08951558396664965, + "grad_norm": 18003.9921875, + "learning_rate": 9.971199372098304e-05, + "loss": 0.5353, + "step": 17350 + }, + { + "epoch": 0.08977355394926247, + "grad_norm": 19555.30078125, + "learning_rate": 9.970806523436041e-05, + "loss": 0.5306, + "step": 17400 + }, + { + "epoch": 0.09003152393187529, + "grad_norm": 19433.37890625, + "learning_rate": 9.97041102145351e-05, + "loss": 0.5341, + "step": 17450 + }, + { + "epoch": 0.09028949391448811, + "grad_norm": 19238.341796875, + "learning_rate": 9.97001286636182e-05, + "loss": 0.5372, + "step": 17500 + }, + { + "epoch": 0.09054746389710093, + "grad_norm": 18698.78125, + "learning_rate": 9.969612058373502e-05, + "loss": 0.5356, + "step": 17550 + }, + { + "epoch": 0.09080543387971375, + "grad_norm": 17953.580078125, + "learning_rate": 9.969208597702497e-05, + "loss": 0.529, + "step": 17600 + }, + { + "epoch": 0.09106340386232659, + "grad_norm": 17678.716796875, + "learning_rate": 9.968802484564168e-05, + "loss": 0.5329, + "step": 17650 + }, + { + "epoch": 0.09132137384493941, + "grad_norm": 20412.287109375, + "learning_rate": 9.968393719175286e-05, + "loss": 0.534, + "step": 17700 + }, + { + "epoch": 0.09157934382755223, + "grad_norm": 20080.16015625, + "learning_rate": 9.967982301754044e-05, + "loss": 0.5307, + "step": 17750 + }, + { + "epoch": 0.09183731381016505, + "grad_norm": 18570.314453125, + "learning_rate": 9.96756823252005e-05, + "loss": 0.526, + "step": 17800 + }, + { + "epoch": 0.09209528379277787, + "grad_norm": 18329.107421875, + "learning_rate": 9.967151511694324e-05, + "loss": 0.5273, + "step": 17850 + }, + { + "epoch": 0.09235325377539069, + "grad_norm": 19036.18359375, + "learning_rate": 9.966732139499304e-05, + "loss": 0.5275, + "step": 17900 + }, + { + "epoch": 0.09261122375800351, + "grad_norm": 18708.826171875, + "learning_rate": 9.966310116158844e-05, + "loss": 0.5313, + "step": 17950 + }, + { + "epoch": 0.09286919374061635, + "grad_norm": 18660.791015625, + "learning_rate": 9.96588544189821e-05, + "loss": 0.5303, + "step": 18000 + }, + { + "epoch": 0.09312716372322917, + "grad_norm": 19709.181640625, + "learning_rate": 9.965458116944086e-05, + "loss": 0.5347, + "step": 18050 + }, + { + "epoch": 0.09338513370584199, + "grad_norm": 19683.798828125, + "learning_rate": 9.96502814152457e-05, + "loss": 0.5359, + "step": 18100 + }, + { + "epoch": 0.09364310368845481, + "grad_norm": 19533.09765625, + "learning_rate": 9.964595515869175e-05, + "loss": 0.5263, + "step": 18150 + }, + { + "epoch": 0.09390107367106763, + "grad_norm": 20254.892578125, + "learning_rate": 9.964160240208826e-05, + "loss": 0.5307, + "step": 18200 + }, + { + "epoch": 0.09415904365368045, + "grad_norm": 21316.876953125, + "learning_rate": 9.963722314775868e-05, + "loss": 0.5316, + "step": 18250 + }, + { + "epoch": 0.09441701363629328, + "grad_norm": 20027.03515625, + "learning_rate": 9.963281739804054e-05, + "loss": 0.5274, + "step": 18300 + }, + { + "epoch": 0.0946749836189061, + "grad_norm": 18551.994140625, + "learning_rate": 9.962838515528554e-05, + "loss": 0.5339, + "step": 18350 + }, + { + "epoch": 0.09493295360151893, + "grad_norm": 17779.97265625, + "learning_rate": 9.962392642185956e-05, + "loss": 0.5301, + "step": 18400 + }, + { + "epoch": 0.09519092358413175, + "grad_norm": 20620.232421875, + "learning_rate": 9.961944120014256e-05, + "loss": 0.522, + "step": 18450 + }, + { + "epoch": 0.09544889356674457, + "grad_norm": 18669.73046875, + "learning_rate": 9.961492949252868e-05, + "loss": 0.5261, + "step": 18500 + }, + { + "epoch": 0.0957068635493574, + "grad_norm": 19528.4765625, + "learning_rate": 9.961039130142617e-05, + "loss": 0.5276, + "step": 18550 + }, + { + "epoch": 0.09596483353197022, + "grad_norm": 19643.099609375, + "learning_rate": 9.960582662925744e-05, + "loss": 0.5332, + "step": 18600 + }, + { + "epoch": 0.09622280351458304, + "grad_norm": 19024.4375, + "learning_rate": 9.960123547845901e-05, + "loss": 0.529, + "step": 18650 + }, + { + "epoch": 0.09648077349719586, + "grad_norm": 20228.248046875, + "learning_rate": 9.959661785148155e-05, + "loss": 0.5322, + "step": 18700 + }, + { + "epoch": 0.0967387434798087, + "grad_norm": 20120.126953125, + "learning_rate": 9.959197375078986e-05, + "loss": 0.5256, + "step": 18750 + }, + { + "epoch": 0.09699671346242152, + "grad_norm": 19894.423828125, + "learning_rate": 9.95873031788629e-05, + "loss": 0.5257, + "step": 18800 + }, + { + "epoch": 0.09725468344503434, + "grad_norm": 18450.8671875, + "learning_rate": 9.958260613819367e-05, + "loss": 0.5268, + "step": 18850 + }, + { + "epoch": 0.09751265342764716, + "grad_norm": 22775.53125, + "learning_rate": 9.95778826312894e-05, + "loss": 0.5293, + "step": 18900 + }, + { + "epoch": 0.09777062341025998, + "grad_norm": 17769.38671875, + "learning_rate": 9.95731326606714e-05, + "loss": 0.5281, + "step": 18950 + }, + { + "epoch": 0.0980285933928728, + "grad_norm": 20731.322265625, + "learning_rate": 9.956835622887514e-05, + "loss": 0.5327, + "step": 19000 + }, + { + "epoch": 0.09828656337548562, + "grad_norm": 20059.11328125, + "learning_rate": 9.956355333845014e-05, + "loss": 0.5279, + "step": 19050 + }, + { + "epoch": 0.09854453335809846, + "grad_norm": 17477.626953125, + "learning_rate": 9.955872399196012e-05, + "loss": 0.5257, + "step": 19100 + }, + { + "epoch": 0.09880250334071128, + "grad_norm": 20293.232421875, + "learning_rate": 9.955386819198287e-05, + "loss": 0.5258, + "step": 19150 + }, + { + "epoch": 0.0990604733233241, + "grad_norm": 19330.4140625, + "learning_rate": 9.954898594111035e-05, + "loss": 0.5231, + "step": 19200 + }, + { + "epoch": 0.09931844330593692, + "grad_norm": 19410.818359375, + "learning_rate": 9.954407724194858e-05, + "loss": 0.5286, + "step": 19250 + }, + { + "epoch": 0.09957641328854974, + "grad_norm": 18320.552734375, + "learning_rate": 9.953914209711775e-05, + "loss": 0.5287, + "step": 19300 + }, + { + "epoch": 0.09983438327116256, + "grad_norm": 17585.583984375, + "learning_rate": 9.953418050925213e-05, + "loss": 0.5265, + "step": 19350 + }, + { + "epoch": 0.10009235325377538, + "grad_norm": 20318.298828125, + "learning_rate": 9.952919248100012e-05, + "loss": 0.5292, + "step": 19400 + }, + { + "epoch": 0.10035032323638822, + "grad_norm": 20239.33984375, + "learning_rate": 9.952417801502426e-05, + "loss": 0.522, + "step": 19450 + }, + { + "epoch": 0.10060829321900104, + "grad_norm": 18922.158203125, + "learning_rate": 9.951913711400115e-05, + "loss": 0.5275, + "step": 19500 + }, + { + "epoch": 0.10086626320161386, + "grad_norm": 18332.673828125, + "learning_rate": 9.951406978062153e-05, + "loss": 0.5282, + "step": 19550 + }, + { + "epoch": 0.10112423318422668, + "grad_norm": 19321.662109375, + "learning_rate": 9.950897601759024e-05, + "loss": 0.5236, + "step": 19600 + }, + { + "epoch": 0.1013822031668395, + "grad_norm": 19050.42578125, + "learning_rate": 9.950385582762624e-05, + "loss": 0.5269, + "step": 19650 + }, + { + "epoch": 0.10164017314945233, + "grad_norm": 18592.8125, + "learning_rate": 9.949870921346259e-05, + "loss": 0.5294, + "step": 19700 + }, + { + "epoch": 0.10189814313206515, + "grad_norm": 17702.080078125, + "learning_rate": 9.949353617784644e-05, + "loss": 0.5321, + "step": 19750 + }, + { + "epoch": 0.10215611311467798, + "grad_norm": 18935.71875, + "learning_rate": 9.948833672353907e-05, + "loss": 0.5279, + "step": 19800 + }, + { + "epoch": 0.1024140830972908, + "grad_norm": 19814.96484375, + "learning_rate": 9.948311085331585e-05, + "loss": 0.5174, + "step": 19850 + }, + { + "epoch": 0.10267205307990362, + "grad_norm": 18945.4375, + "learning_rate": 9.947785856996623e-05, + "loss": 0.525, + "step": 19900 + }, + { + "epoch": 0.10293002306251645, + "grad_norm": 19162.28125, + "learning_rate": 9.947257987629379e-05, + "loss": 0.5268, + "step": 19950 + }, + { + "epoch": 0.10318799304512927, + "grad_norm": 18814.861328125, + "learning_rate": 9.94672747751162e-05, + "loss": 0.5191, + "step": 20000 + }, + { + "epoch": 0.10318799304512927, + "eval_loss": 0.5160176157951355, + "eval_runtime": 3272.5369, + "eval_samples_per_second": 947.62, + "eval_steps_per_second": 1.851, + "step": 20000 + }, + { + "epoch": 0.10344596302774209, + "grad_norm": 19089.77734375, + "learning_rate": 9.94619432692652e-05, + "loss": 0.5254, + "step": 20050 + }, + { + "epoch": 0.10370393301035491, + "grad_norm": 19005.53125, + "learning_rate": 9.945658536158667e-05, + "loss": 0.525, + "step": 20100 + }, + { + "epoch": 0.10396190299296774, + "grad_norm": 20896.8125, + "learning_rate": 9.945120105494054e-05, + "loss": 0.5173, + "step": 20150 + }, + { + "epoch": 0.10421987297558057, + "grad_norm": 19254.22265625, + "learning_rate": 9.944579035220085e-05, + "loss": 0.5195, + "step": 20200 + }, + { + "epoch": 0.10447784295819339, + "grad_norm": 19317.572265625, + "learning_rate": 9.944035325625573e-05, + "loss": 0.5239, + "step": 20250 + }, + { + "epoch": 0.10473581294080621, + "grad_norm": 18661.330078125, + "learning_rate": 9.94348897700074e-05, + "loss": 0.5243, + "step": 20300 + }, + { + "epoch": 0.10499378292341903, + "grad_norm": 18914.298828125, + "learning_rate": 9.942939989637216e-05, + "loss": 0.5247, + "step": 20350 + }, + { + "epoch": 0.10525175290603185, + "grad_norm": 17788.77734375, + "learning_rate": 9.942388363828041e-05, + "loss": 0.5205, + "step": 20400 + }, + { + "epoch": 0.10550972288864467, + "grad_norm": 17314.578125, + "learning_rate": 9.941834099867659e-05, + "loss": 0.5182, + "step": 20450 + }, + { + "epoch": 0.10576769287125751, + "grad_norm": 18627.068359375, + "learning_rate": 9.941277198051931e-05, + "loss": 0.5208, + "step": 20500 + }, + { + "epoch": 0.10602566285387033, + "grad_norm": 18274.4609375, + "learning_rate": 9.940717658678113e-05, + "loss": 0.5244, + "step": 20550 + }, + { + "epoch": 0.10628363283648315, + "grad_norm": 18668.767578125, + "learning_rate": 9.940155482044884e-05, + "loss": 0.5237, + "step": 20600 + }, + { + "epoch": 0.10654160281909597, + "grad_norm": 17703.703125, + "learning_rate": 9.939590668452316e-05, + "loss": 0.5148, + "step": 20650 + }, + { + "epoch": 0.10679957280170879, + "grad_norm": 18372.7578125, + "learning_rate": 9.939023218201901e-05, + "loss": 0.522, + "step": 20700 + }, + { + "epoch": 0.10705754278432161, + "grad_norm": 18439.521484375, + "learning_rate": 9.93845313159653e-05, + "loss": 0.5177, + "step": 20750 + }, + { + "epoch": 0.10731551276693443, + "grad_norm": 18812.10546875, + "learning_rate": 9.937880408940504e-05, + "loss": 0.5161, + "step": 20800 + }, + { + "epoch": 0.10757348274954727, + "grad_norm": 19163.4296875, + "learning_rate": 9.937305050539534e-05, + "loss": 0.5175, + "step": 20850 + }, + { + "epoch": 0.10783145273216009, + "grad_norm": 19459.3984375, + "learning_rate": 9.936727056700732e-05, + "loss": 0.5257, + "step": 20900 + }, + { + "epoch": 0.10808942271477291, + "grad_norm": 20272.22265625, + "learning_rate": 9.93614642773262e-05, + "loss": 0.5244, + "step": 20950 + }, + { + "epoch": 0.10834739269738573, + "grad_norm": 19995.736328125, + "learning_rate": 9.93556316394513e-05, + "loss": 0.5179, + "step": 21000 + }, + { + "epoch": 0.10860536267999855, + "grad_norm": 20567.369140625, + "learning_rate": 9.934977265649594e-05, + "loss": 0.528, + "step": 21050 + }, + { + "epoch": 0.10886333266261138, + "grad_norm": 19328.57421875, + "learning_rate": 9.934388733158753e-05, + "loss": 0.5249, + "step": 21100 + }, + { + "epoch": 0.1091213026452242, + "grad_norm": 17305.19921875, + "learning_rate": 9.933797566786757e-05, + "loss": 0.5163, + "step": 21150 + }, + { + "epoch": 0.10937927262783702, + "grad_norm": 19983.99609375, + "learning_rate": 9.933203766849155e-05, + "loss": 0.5227, + "step": 21200 + }, + { + "epoch": 0.10963724261044985, + "grad_norm": 18918.16015625, + "learning_rate": 9.93260733366291e-05, + "loss": 0.521, + "step": 21250 + }, + { + "epoch": 0.10989521259306267, + "grad_norm": 19260.40625, + "learning_rate": 9.932008267546384e-05, + "loss": 0.5195, + "step": 21300 + }, + { + "epoch": 0.1101531825756755, + "grad_norm": 16713.015625, + "learning_rate": 9.931406568819348e-05, + "loss": 0.5187, + "step": 21350 + }, + { + "epoch": 0.11041115255828832, + "grad_norm": 19787.67578125, + "learning_rate": 9.930802237802976e-05, + "loss": 0.5152, + "step": 21400 + }, + { + "epoch": 0.11066912254090114, + "grad_norm": 20632.775390625, + "learning_rate": 9.93019527481985e-05, + "loss": 0.5158, + "step": 21450 + }, + { + "epoch": 0.11092709252351396, + "grad_norm": 18545.748046875, + "learning_rate": 9.929585680193951e-05, + "loss": 0.5161, + "step": 21500 + }, + { + "epoch": 0.11118506250612678, + "grad_norm": 18961.138671875, + "learning_rate": 9.928973454250674e-05, + "loss": 0.5192, + "step": 21550 + }, + { + "epoch": 0.11144303248873962, + "grad_norm": 18970.013671875, + "learning_rate": 9.928358597316812e-05, + "loss": 0.5211, + "step": 21600 + }, + { + "epoch": 0.11170100247135244, + "grad_norm": 20800.046875, + "learning_rate": 9.927741109720561e-05, + "loss": 0.5143, + "step": 21650 + }, + { + "epoch": 0.11195897245396526, + "grad_norm": 18738.564453125, + "learning_rate": 9.927120991791528e-05, + "loss": 0.5232, + "step": 21700 + }, + { + "epoch": 0.11221694243657808, + "grad_norm": 18495.798828125, + "learning_rate": 9.926498243860715e-05, + "loss": 0.5176, + "step": 21750 + }, + { + "epoch": 0.1124749124191909, + "grad_norm": 18129.375, + "learning_rate": 9.925872866260537e-05, + "loss": 0.5132, + "step": 21800 + }, + { + "epoch": 0.11273288240180372, + "grad_norm": 19332.751953125, + "learning_rate": 9.925244859324807e-05, + "loss": 0.5135, + "step": 21850 + }, + { + "epoch": 0.11299085238441654, + "grad_norm": 19395.544921875, + "learning_rate": 9.924614223388742e-05, + "loss": 0.5191, + "step": 21900 + }, + { + "epoch": 0.11324882236702938, + "grad_norm": 20292.890625, + "learning_rate": 9.923980958788964e-05, + "loss": 0.5212, + "step": 21950 + }, + { + "epoch": 0.1135067923496422, + "grad_norm": 20309.033203125, + "learning_rate": 9.923345065863498e-05, + "loss": 0.5134, + "step": 22000 + }, + { + "epoch": 0.11376476233225502, + "grad_norm": 17513.578125, + "learning_rate": 9.922706544951772e-05, + "loss": 0.5216, + "step": 22050 + }, + { + "epoch": 0.11402273231486784, + "grad_norm": 18886.10546875, + "learning_rate": 9.922065396394614e-05, + "loss": 0.5219, + "step": 22100 + }, + { + "epoch": 0.11428070229748066, + "grad_norm": 19656.1484375, + "learning_rate": 9.921421620534257e-05, + "loss": 0.5163, + "step": 22150 + }, + { + "epoch": 0.11453867228009348, + "grad_norm": 18463.068359375, + "learning_rate": 9.920775217714338e-05, + "loss": 0.5198, + "step": 22200 + }, + { + "epoch": 0.1147966422627063, + "grad_norm": 20666.400390625, + "learning_rate": 9.920126188279892e-05, + "loss": 0.5164, + "step": 22250 + }, + { + "epoch": 0.11505461224531914, + "grad_norm": 20401.681640625, + "learning_rate": 9.919474532577359e-05, + "loss": 0.5163, + "step": 22300 + }, + { + "epoch": 0.11531258222793196, + "grad_norm": 21289.541015625, + "learning_rate": 9.918820250954581e-05, + "loss": 0.5114, + "step": 22350 + }, + { + "epoch": 0.11557055221054478, + "grad_norm": 17559.50390625, + "learning_rate": 9.918163343760801e-05, + "loss": 0.5156, + "step": 22400 + }, + { + "epoch": 0.1158285221931576, + "grad_norm": 17041.087890625, + "learning_rate": 9.917503811346662e-05, + "loss": 0.5146, + "step": 22450 + }, + { + "epoch": 0.11608649217577043, + "grad_norm": 20508.087890625, + "learning_rate": 9.916841654064212e-05, + "loss": 0.5202, + "step": 22500 + }, + { + "epoch": 0.11634446215838325, + "grad_norm": 21307.646484375, + "learning_rate": 9.916176872266894e-05, + "loss": 0.5108, + "step": 22550 + }, + { + "epoch": 0.11660243214099607, + "grad_norm": 21765.580078125, + "learning_rate": 9.91550946630956e-05, + "loss": 0.5158, + "step": 22600 + }, + { + "epoch": 0.1168604021236089, + "grad_norm": 18173.646484375, + "learning_rate": 9.914839436548454e-05, + "loss": 0.5081, + "step": 22650 + }, + { + "epoch": 0.11711837210622172, + "grad_norm": 19044.880859375, + "learning_rate": 9.914166783341227e-05, + "loss": 0.5144, + "step": 22700 + }, + { + "epoch": 0.11737634208883455, + "grad_norm": 19291.37109375, + "learning_rate": 9.91349150704693e-05, + "loss": 0.5147, + "step": 22750 + }, + { + "epoch": 0.11763431207144737, + "grad_norm": 16757.376953125, + "learning_rate": 9.91281360802601e-05, + "loss": 0.5163, + "step": 22800 + }, + { + "epoch": 0.11789228205406019, + "grad_norm": 18870.287109375, + "learning_rate": 9.912133086640318e-05, + "loss": 0.512, + "step": 22850 + }, + { + "epoch": 0.11815025203667301, + "grad_norm": 20520.115234375, + "learning_rate": 9.911449943253102e-05, + "loss": 0.5175, + "step": 22900 + }, + { + "epoch": 0.11840822201928583, + "grad_norm": 20585.21484375, + "learning_rate": 9.910764178229011e-05, + "loss": 0.5114, + "step": 22950 + }, + { + "epoch": 0.11866619200189867, + "grad_norm": 18660.384765625, + "learning_rate": 9.910075791934092e-05, + "loss": 0.5115, + "step": 23000 + }, + { + "epoch": 0.11892416198451149, + "grad_norm": 19391.318359375, + "learning_rate": 9.909384784735794e-05, + "loss": 0.5198, + "step": 23050 + }, + { + "epoch": 0.11918213196712431, + "grad_norm": 18007.306640625, + "learning_rate": 9.908691157002962e-05, + "loss": 0.5125, + "step": 23100 + }, + { + "epoch": 0.11944010194973713, + "grad_norm": 20804.501953125, + "learning_rate": 9.907994909105842e-05, + "loss": 0.516, + "step": 23150 + }, + { + "epoch": 0.11969807193234995, + "grad_norm": 18307.63671875, + "learning_rate": 9.907296041416076e-05, + "loss": 0.5108, + "step": 23200 + }, + { + "epoch": 0.11995604191496277, + "grad_norm": 19694.552734375, + "learning_rate": 9.906594554306709e-05, + "loss": 0.5092, + "step": 23250 + }, + { + "epoch": 0.12021401189757559, + "grad_norm": 20234.0703125, + "learning_rate": 9.90589044815218e-05, + "loss": 0.515, + "step": 23300 + }, + { + "epoch": 0.12047198188018843, + "grad_norm": 18483.4296875, + "learning_rate": 9.905183723328327e-05, + "loss": 0.5127, + "step": 23350 + }, + { + "epoch": 0.12072995186280125, + "grad_norm": 17447.51953125, + "learning_rate": 9.904474380212384e-05, + "loss": 0.5107, + "step": 23400 + }, + { + "epoch": 0.12098792184541407, + "grad_norm": 18881.7109375, + "learning_rate": 9.903762419182986e-05, + "loss": 0.5177, + "step": 23450 + }, + { + "epoch": 0.12124589182802689, + "grad_norm": 17861.990234375, + "learning_rate": 9.903047840620168e-05, + "loss": 0.5128, + "step": 23500 + }, + { + "epoch": 0.12150386181063971, + "grad_norm": 19111.53515625, + "learning_rate": 9.902330644905351e-05, + "loss": 0.5134, + "step": 23550 + }, + { + "epoch": 0.12176183179325253, + "grad_norm": 18461.107421875, + "learning_rate": 9.901610832421366e-05, + "loss": 0.51, + "step": 23600 + }, + { + "epoch": 0.12201980177586536, + "grad_norm": 18103.701171875, + "learning_rate": 9.900888403552431e-05, + "loss": 0.5131, + "step": 23650 + }, + { + "epoch": 0.12227777175847819, + "grad_norm": 18334.755859375, + "learning_rate": 9.900163358684168e-05, + "loss": 0.511, + "step": 23700 + }, + { + "epoch": 0.12253574174109101, + "grad_norm": 17476.322265625, + "learning_rate": 9.89943569820359e-05, + "loss": 0.5151, + "step": 23750 + }, + { + "epoch": 0.12279371172370383, + "grad_norm": 18698.09765625, + "learning_rate": 9.898705422499107e-05, + "loss": 0.5146, + "step": 23800 + }, + { + "epoch": 0.12305168170631665, + "grad_norm": 18321.80859375, + "learning_rate": 9.897972531960528e-05, + "loss": 0.5109, + "step": 23850 + }, + { + "epoch": 0.12330965168892948, + "grad_norm": 18234.361328125, + "learning_rate": 9.897237026979056e-05, + "loss": 0.5115, + "step": 23900 + }, + { + "epoch": 0.1235676216715423, + "grad_norm": 19737.849609375, + "learning_rate": 9.896498907947287e-05, + "loss": 0.5155, + "step": 23950 + }, + { + "epoch": 0.12382559165415512, + "grad_norm": 19136.279296875, + "learning_rate": 9.895758175259218e-05, + "loss": 0.5162, + "step": 24000 + }, + { + "epoch": 0.12408356163676794, + "grad_norm": 18575.431640625, + "learning_rate": 9.895014829310235e-05, + "loss": 0.5141, + "step": 24050 + }, + { + "epoch": 0.12434153161938077, + "grad_norm": 17589.353515625, + "learning_rate": 9.894268870497121e-05, + "loss": 0.501, + "step": 24100 + }, + { + "epoch": 0.1245995016019936, + "grad_norm": 19781.830078125, + "learning_rate": 9.893520299218057e-05, + "loss": 0.5128, + "step": 24150 + }, + { + "epoch": 0.12485747158460642, + "grad_norm": 17501.150390625, + "learning_rate": 9.892769115872617e-05, + "loss": 0.5113, + "step": 24200 + }, + { + "epoch": 0.12511544156721924, + "grad_norm": 21107.34375, + "learning_rate": 9.892015320861762e-05, + "loss": 0.5041, + "step": 24250 + }, + { + "epoch": 0.12537341154983206, + "grad_norm": 17529.345703125, + "learning_rate": 9.89125891458786e-05, + "loss": 0.5093, + "step": 24300 + }, + { + "epoch": 0.12563138153244488, + "grad_norm": 18061.890625, + "learning_rate": 9.890499897454663e-05, + "loss": 0.5111, + "step": 24350 + }, + { + "epoch": 0.1258893515150577, + "grad_norm": 21213.177734375, + "learning_rate": 9.889738269867318e-05, + "loss": 0.5106, + "step": 24400 + }, + { + "epoch": 0.12614732149767052, + "grad_norm": 17838.625, + "learning_rate": 9.88897403223237e-05, + "loss": 0.5144, + "step": 24450 + }, + { + "epoch": 0.12640529148028334, + "grad_norm": 19047.787109375, + "learning_rate": 9.888207184957752e-05, + "loss": 0.5133, + "step": 24500 + }, + { + "epoch": 0.12666326146289617, + "grad_norm": 17355.26171875, + "learning_rate": 9.887437728452794e-05, + "loss": 0.5054, + "step": 24550 + }, + { + "epoch": 0.12692123144550901, + "grad_norm": 20496.369140625, + "learning_rate": 9.886665663128216e-05, + "loss": 0.51, + "step": 24600 + }, + { + "epoch": 0.12717920142812184, + "grad_norm": 19887.734375, + "learning_rate": 9.885890989396133e-05, + "loss": 0.5049, + "step": 24650 + }, + { + "epoch": 0.12743717141073466, + "grad_norm": 20027.69140625, + "learning_rate": 9.885113707670049e-05, + "loss": 0.5118, + "step": 24700 + }, + { + "epoch": 0.12769514139334748, + "grad_norm": 18888.92578125, + "learning_rate": 9.884333818364861e-05, + "loss": 0.5168, + "step": 24750 + }, + { + "epoch": 0.1279531113759603, + "grad_norm": 20906.673828125, + "learning_rate": 9.883551321896862e-05, + "loss": 0.5109, + "step": 24800 + }, + { + "epoch": 0.12821108135857312, + "grad_norm": 20228.833984375, + "learning_rate": 9.882766218683731e-05, + "loss": 0.5167, + "step": 24850 + }, + { + "epoch": 0.12846905134118594, + "grad_norm": 19832.4609375, + "learning_rate": 9.881978509144543e-05, + "loss": 0.5113, + "step": 24900 + }, + { + "epoch": 0.12872702132379876, + "grad_norm": 18049.193359375, + "learning_rate": 9.881188193699758e-05, + "loss": 0.5121, + "step": 24950 + }, + { + "epoch": 0.12898499130641158, + "grad_norm": 18765.033203125, + "learning_rate": 9.880395272771236e-05, + "loss": 0.5123, + "step": 25000 + }, + { + "epoch": 0.12898499130641158, + "eval_loss": 0.5013377666473389, + "eval_runtime": 3332.4061, + "eval_samples_per_second": 930.595, + "eval_steps_per_second": 1.818, + "step": 25000 + }, + { + "epoch": 0.1292429612890244, + "grad_norm": 18435.787109375, + "learning_rate": 9.879599746782221e-05, + "loss": 0.5096, + "step": 25050 + }, + { + "epoch": 0.12950093127163723, + "grad_norm": 18993.890625, + "learning_rate": 9.878801616157348e-05, + "loss": 0.5091, + "step": 25100 + }, + { + "epoch": 0.12975890125425005, + "grad_norm": 19766.783203125, + "learning_rate": 9.878000881322646e-05, + "loss": 0.5059, + "step": 25150 + }, + { + "epoch": 0.13001687123686287, + "grad_norm": 19316.537109375, + "learning_rate": 9.87719754270553e-05, + "loss": 0.5112, + "step": 25200 + }, + { + "epoch": 0.1302748412194757, + "grad_norm": 19288.64453125, + "learning_rate": 9.876391600734807e-05, + "loss": 0.5031, + "step": 25250 + }, + { + "epoch": 0.1305328112020885, + "grad_norm": 18962.7734375, + "learning_rate": 9.875583055840673e-05, + "loss": 0.5113, + "step": 25300 + }, + { + "epoch": 0.13079078118470136, + "grad_norm": 19399.21875, + "learning_rate": 9.874771908454714e-05, + "loss": 0.5177, + "step": 25350 + }, + { + "epoch": 0.13104875116731418, + "grad_norm": 20511.134765625, + "learning_rate": 9.873958159009904e-05, + "loss": 0.5049, + "step": 25400 + }, + { + "epoch": 0.131306721149927, + "grad_norm": 17669.00390625, + "learning_rate": 9.87314180794061e-05, + "loss": 0.5076, + "step": 25450 + }, + { + "epoch": 0.13156469113253982, + "grad_norm": 20254.75390625, + "learning_rate": 9.872322855682579e-05, + "loss": 0.5102, + "step": 25500 + }, + { + "epoch": 0.13182266111515265, + "grad_norm": 21859.880859375, + "learning_rate": 9.871501302672956e-05, + "loss": 0.5098, + "step": 25550 + }, + { + "epoch": 0.13208063109776547, + "grad_norm": 18794.90625, + "learning_rate": 9.870677149350268e-05, + "loss": 0.5078, + "step": 25600 + }, + { + "epoch": 0.1323386010803783, + "grad_norm": 19909.65625, + "learning_rate": 9.869850396154434e-05, + "loss": 0.5129, + "step": 25650 + }, + { + "epoch": 0.1325965710629911, + "grad_norm": 17887.99609375, + "learning_rate": 9.869021043526756e-05, + "loss": 0.508, + "step": 25700 + }, + { + "epoch": 0.13285454104560393, + "grad_norm": 17189.033203125, + "learning_rate": 9.868189091909929e-05, + "loss": 0.5114, + "step": 25750 + }, + { + "epoch": 0.13311251102821675, + "grad_norm": 21320.78125, + "learning_rate": 9.867354541748033e-05, + "loss": 0.5081, + "step": 25800 + }, + { + "epoch": 0.13337048101082957, + "grad_norm": 19035.33984375, + "learning_rate": 9.866517393486532e-05, + "loss": 0.5065, + "step": 25850 + }, + { + "epoch": 0.1336284509934424, + "grad_norm": 19038.876953125, + "learning_rate": 9.86567764757228e-05, + "loss": 0.5055, + "step": 25900 + }, + { + "epoch": 0.13388642097605521, + "grad_norm": 20425.6875, + "learning_rate": 9.86483530445352e-05, + "loss": 0.5091, + "step": 25950 + }, + { + "epoch": 0.13414439095866804, + "grad_norm": 19947.34765625, + "learning_rate": 9.863990364579876e-05, + "loss": 0.5062, + "step": 26000 + }, + { + "epoch": 0.13440236094128089, + "grad_norm": 18758.7890625, + "learning_rate": 9.863142828402361e-05, + "loss": 0.5099, + "step": 26050 + }, + { + "epoch": 0.1346603309238937, + "grad_norm": 18494.076171875, + "learning_rate": 9.862292696373372e-05, + "loss": 0.5043, + "step": 26100 + }, + { + "epoch": 0.13491830090650653, + "grad_norm": 19646.841796875, + "learning_rate": 9.861439968946696e-05, + "loss": 0.508, + "step": 26150 + }, + { + "epoch": 0.13517627088911935, + "grad_norm": 19356.009765625, + "learning_rate": 9.8605846465775e-05, + "loss": 0.5015, + "step": 26200 + }, + { + "epoch": 0.13543424087173217, + "grad_norm": 19243.1875, + "learning_rate": 9.859726729722341e-05, + "loss": 0.5086, + "step": 26250 + }, + { + "epoch": 0.135692210854345, + "grad_norm": 20116.43359375, + "learning_rate": 9.858866218839156e-05, + "loss": 0.5074, + "step": 26300 + }, + { + "epoch": 0.1359501808369578, + "grad_norm": 18592.1015625, + "learning_rate": 9.858003114387269e-05, + "loss": 0.5054, + "step": 26350 + }, + { + "epoch": 0.13620815081957063, + "grad_norm": 19552.505859375, + "learning_rate": 9.85713741682739e-05, + "loss": 0.5042, + "step": 26400 + }, + { + "epoch": 0.13646612080218345, + "grad_norm": 18818.142578125, + "learning_rate": 9.856269126621611e-05, + "loss": 0.5106, + "step": 26450 + }, + { + "epoch": 0.13672409078479628, + "grad_norm": 21973.685546875, + "learning_rate": 9.855398244233407e-05, + "loss": 0.5116, + "step": 26500 + }, + { + "epoch": 0.1369820607674091, + "grad_norm": 19296.7890625, + "learning_rate": 9.854524770127641e-05, + "loss": 0.5103, + "step": 26550 + }, + { + "epoch": 0.13724003075002192, + "grad_norm": 18975.22265625, + "learning_rate": 9.853648704770554e-05, + "loss": 0.5093, + "step": 26600 + }, + { + "epoch": 0.13749800073263474, + "grad_norm": 20003.19140625, + "learning_rate": 9.852770048629776e-05, + "loss": 0.5094, + "step": 26650 + }, + { + "epoch": 0.13775597071524756, + "grad_norm": 19885.341796875, + "learning_rate": 9.851888802174312e-05, + "loss": 0.502, + "step": 26700 + }, + { + "epoch": 0.1380139406978604, + "grad_norm": 18030.115234375, + "learning_rate": 9.851004965874557e-05, + "loss": 0.5045, + "step": 26750 + }, + { + "epoch": 0.13827191068047323, + "grad_norm": 19143.369140625, + "learning_rate": 9.850118540202286e-05, + "loss": 0.5068, + "step": 26800 + }, + { + "epoch": 0.13852988066308605, + "grad_norm": 18902.5390625, + "learning_rate": 9.849229525630656e-05, + "loss": 0.4984, + "step": 26850 + }, + { + "epoch": 0.13878785064569887, + "grad_norm": 18523.115234375, + "learning_rate": 9.848337922634206e-05, + "loss": 0.5099, + "step": 26900 + }, + { + "epoch": 0.1390458206283117, + "grad_norm": 19873.283203125, + "learning_rate": 9.847443731688852e-05, + "loss": 0.5039, + "step": 26950 + }, + { + "epoch": 0.13930379061092452, + "grad_norm": 20202.23046875, + "learning_rate": 9.846546953271902e-05, + "loss": 0.507, + "step": 27000 + }, + { + "epoch": 0.13956176059353734, + "grad_norm": 17484.572265625, + "learning_rate": 9.845647587862034e-05, + "loss": 0.5113, + "step": 27050 + }, + { + "epoch": 0.13981973057615016, + "grad_norm": 17931.634765625, + "learning_rate": 9.844745635939316e-05, + "loss": 0.5051, + "step": 27100 + }, + { + "epoch": 0.14007770055876298, + "grad_norm": 20536.693359375, + "learning_rate": 9.843841097985191e-05, + "loss": 0.5044, + "step": 27150 + }, + { + "epoch": 0.1403356705413758, + "grad_norm": 18379.619140625, + "learning_rate": 9.842933974482482e-05, + "loss": 0.5071, + "step": 27200 + }, + { + "epoch": 0.14059364052398862, + "grad_norm": 19097.240234375, + "learning_rate": 9.842024265915397e-05, + "loss": 0.5046, + "step": 27250 + }, + { + "epoch": 0.14085161050660144, + "grad_norm": 22569.80859375, + "learning_rate": 9.841111972769517e-05, + "loss": 0.5022, + "step": 27300 + }, + { + "epoch": 0.14110958048921426, + "grad_norm": 17499.166015625, + "learning_rate": 9.84019709553181e-05, + "loss": 0.5014, + "step": 27350 + }, + { + "epoch": 0.14136755047182709, + "grad_norm": 20447.25, + "learning_rate": 9.839279634690619e-05, + "loss": 0.5065, + "step": 27400 + }, + { + "epoch": 0.14162552045443993, + "grad_norm": 20977.70703125, + "learning_rate": 9.838359590735665e-05, + "loss": 0.5042, + "step": 27450 + }, + { + "epoch": 0.14188349043705276, + "grad_norm": 18168.962890625, + "learning_rate": 9.83743696415805e-05, + "loss": 0.5043, + "step": 27500 + }, + { + "epoch": 0.14214146041966558, + "grad_norm": 18671.841796875, + "learning_rate": 9.836511755450256e-05, + "loss": 0.5054, + "step": 27550 + }, + { + "epoch": 0.1423994304022784, + "grad_norm": 17737.90625, + "learning_rate": 9.835583965106141e-05, + "loss": 0.507, + "step": 27600 + }, + { + "epoch": 0.14265740038489122, + "grad_norm": 23218.873046875, + "learning_rate": 9.834653593620939e-05, + "loss": 0.5055, + "step": 27650 + }, + { + "epoch": 0.14291537036750404, + "grad_norm": 20013.341796875, + "learning_rate": 9.833720641491269e-05, + "loss": 0.5008, + "step": 27700 + }, + { + "epoch": 0.14317334035011686, + "grad_norm": 21755.08203125, + "learning_rate": 9.832785109215119e-05, + "loss": 0.5029, + "step": 27750 + }, + { + "epoch": 0.14343131033272968, + "grad_norm": 18450.541015625, + "learning_rate": 9.831846997291859e-05, + "loss": 0.5086, + "step": 27800 + }, + { + "epoch": 0.1436892803153425, + "grad_norm": 17578.990234375, + "learning_rate": 9.830906306222235e-05, + "loss": 0.498, + "step": 27850 + }, + { + "epoch": 0.14394725029795533, + "grad_norm": 18771.2578125, + "learning_rate": 9.82996303650837e-05, + "loss": 0.5006, + "step": 27900 + }, + { + "epoch": 0.14420522028056815, + "grad_norm": 19841.912109375, + "learning_rate": 9.829017188653763e-05, + "loss": 0.5003, + "step": 27950 + }, + { + "epoch": 0.14446319026318097, + "grad_norm": 19089.384765625, + "learning_rate": 9.82806876316329e-05, + "loss": 0.5028, + "step": 28000 + }, + { + "epoch": 0.1447211602457938, + "grad_norm": 17971.998046875, + "learning_rate": 9.827117760543198e-05, + "loss": 0.5103, + "step": 28050 + }, + { + "epoch": 0.1449791302284066, + "grad_norm": 19590.46875, + "learning_rate": 9.826164181301121e-05, + "loss": 0.5075, + "step": 28100 + }, + { + "epoch": 0.14523710021101943, + "grad_norm": 19316.150390625, + "learning_rate": 9.825208025946056e-05, + "loss": 0.4971, + "step": 28150 + }, + { + "epoch": 0.14549507019363228, + "grad_norm": 19814.3125, + "learning_rate": 9.82424929498838e-05, + "loss": 0.501, + "step": 28200 + }, + { + "epoch": 0.1457530401762451, + "grad_norm": 18669.203125, + "learning_rate": 9.823287988939847e-05, + "loss": 0.5027, + "step": 28250 + }, + { + "epoch": 0.14601101015885792, + "grad_norm": 20375.48828125, + "learning_rate": 9.822324108313585e-05, + "loss": 0.4966, + "step": 28300 + }, + { + "epoch": 0.14626898014147074, + "grad_norm": 19665.4296875, + "learning_rate": 9.82135765362409e-05, + "loss": 0.4966, + "step": 28350 + }, + { + "epoch": 0.14652695012408357, + "grad_norm": 19579.771484375, + "learning_rate": 9.820388625387242e-05, + "loss": 0.5028, + "step": 28400 + }, + { + "epoch": 0.1467849201066964, + "grad_norm": 20270.564453125, + "learning_rate": 9.819417024120285e-05, + "loss": 0.4972, + "step": 28450 + }, + { + "epoch": 0.1470428900893092, + "grad_norm": 20025.6328125, + "learning_rate": 9.818442850341845e-05, + "loss": 0.5082, + "step": 28500 + }, + { + "epoch": 0.14730086007192203, + "grad_norm": 19062.525390625, + "learning_rate": 9.817466104571915e-05, + "loss": 0.4983, + "step": 28550 + }, + { + "epoch": 0.14755883005453485, + "grad_norm": 18558.0390625, + "learning_rate": 9.816486787331862e-05, + "loss": 0.5004, + "step": 28600 + }, + { + "epoch": 0.14781680003714767, + "grad_norm": 20880.6875, + "learning_rate": 9.815504899144428e-05, + "loss": 0.5036, + "step": 28650 + }, + { + "epoch": 0.1480747700197605, + "grad_norm": 19120.3359375, + "learning_rate": 9.814520440533726e-05, + "loss": 0.5004, + "step": 28700 + }, + { + "epoch": 0.14833274000237331, + "grad_norm": 17185.451171875, + "learning_rate": 9.813533412025242e-05, + "loss": 0.5047, + "step": 28750 + }, + { + "epoch": 0.14859070998498614, + "grad_norm": 21795.697265625, + "learning_rate": 9.81254381414583e-05, + "loss": 0.5033, + "step": 28800 + }, + { + "epoch": 0.14884867996759896, + "grad_norm": 21923.44140625, + "learning_rate": 9.811551647423718e-05, + "loss": 0.4957, + "step": 28850 + }, + { + "epoch": 0.1491066499502118, + "grad_norm": 18988.30078125, + "learning_rate": 9.810556912388509e-05, + "loss": 0.4979, + "step": 28900 + }, + { + "epoch": 0.14936461993282463, + "grad_norm": 18479.74609375, + "learning_rate": 9.809559609571169e-05, + "loss": 0.5003, + "step": 28950 + }, + { + "epoch": 0.14962258991543745, + "grad_norm": 20426.57421875, + "learning_rate": 9.808559739504043e-05, + "loss": 0.5019, + "step": 29000 + }, + { + "epoch": 0.14988055989805027, + "grad_norm": 20044.365234375, + "learning_rate": 9.80755730272084e-05, + "loss": 0.5012, + "step": 29050 + }, + { + "epoch": 0.1501385298806631, + "grad_norm": 18321.439453125, + "learning_rate": 9.806552299756641e-05, + "loss": 0.4918, + "step": 29100 + }, + { + "epoch": 0.1503964998632759, + "grad_norm": 20315.681640625, + "learning_rate": 9.805544731147899e-05, + "loss": 0.5015, + "step": 29150 + }, + { + "epoch": 0.15065446984588873, + "grad_norm": 20399.990234375, + "learning_rate": 9.804534597432432e-05, + "loss": 0.4967, + "step": 29200 + }, + { + "epoch": 0.15091243982850155, + "grad_norm": 19539.91796875, + "learning_rate": 9.803521899149432e-05, + "loss": 0.5002, + "step": 29250 + }, + { + "epoch": 0.15117040981111438, + "grad_norm": 20317.970703125, + "learning_rate": 9.802506636839457e-05, + "loss": 0.4988, + "step": 29300 + }, + { + "epoch": 0.1514283797937272, + "grad_norm": 18728.32421875, + "learning_rate": 9.801488811044434e-05, + "loss": 0.5007, + "step": 29350 + }, + { + "epoch": 0.15168634977634002, + "grad_norm": 21256.51171875, + "learning_rate": 9.80046842230766e-05, + "loss": 0.5066, + "step": 29400 + }, + { + "epoch": 0.15194431975895284, + "grad_norm": 18871.8828125, + "learning_rate": 9.799445471173799e-05, + "loss": 0.502, + "step": 29450 + }, + { + "epoch": 0.15220228974156566, + "grad_norm": 18434.251953125, + "learning_rate": 9.798419958188878e-05, + "loss": 0.5018, + "step": 29500 + }, + { + "epoch": 0.15246025972417848, + "grad_norm": 18562.412109375, + "learning_rate": 9.7973918839003e-05, + "loss": 0.4978, + "step": 29550 + }, + { + "epoch": 0.15271822970679133, + "grad_norm": 20020.7890625, + "learning_rate": 9.796361248856832e-05, + "loss": 0.4989, + "step": 29600 + }, + { + "epoch": 0.15297619968940415, + "grad_norm": 20026.6015625, + "learning_rate": 9.795328053608606e-05, + "loss": 0.5002, + "step": 29650 + }, + { + "epoch": 0.15323416967201697, + "grad_norm": 20098.703125, + "learning_rate": 9.794292298707119e-05, + "loss": 0.4938, + "step": 29700 + }, + { + "epoch": 0.1534921396546298, + "grad_norm": 18960.154296875, + "learning_rate": 9.793253984705239e-05, + "loss": 0.4956, + "step": 29750 + }, + { + "epoch": 0.15375010963724262, + "grad_norm": 20478.669921875, + "learning_rate": 9.7922131121572e-05, + "loss": 0.4998, + "step": 29800 + }, + { + "epoch": 0.15400807961985544, + "grad_norm": 20406.701171875, + "learning_rate": 9.791169681618596e-05, + "loss": 0.5083, + "step": 29850 + }, + { + "epoch": 0.15426604960246826, + "grad_norm": 17598.75390625, + "learning_rate": 9.790123693646391e-05, + "loss": 0.4968, + "step": 29900 + }, + { + "epoch": 0.15452401958508108, + "grad_norm": 19622.521484375, + "learning_rate": 9.789075148798915e-05, + "loss": 0.4881, + "step": 29950 + }, + { + "epoch": 0.1547819895676939, + "grad_norm": 20092.87109375, + "learning_rate": 9.78802404763586e-05, + "loss": 0.4994, + "step": 30000 + }, + { + "epoch": 0.1547819895676939, + "eval_loss": 0.4904574453830719, + "eval_runtime": 3267.2095, + "eval_samples_per_second": 949.165, + "eval_steps_per_second": 1.854, + "step": 30000 + }, + { + "epoch": 0.15503995955030672, + "grad_norm": 19136.84765625, + "learning_rate": 9.786970390718282e-05, + "loss": 0.4963, + "step": 30050 + }, + { + "epoch": 0.15529792953291954, + "grad_norm": 20464.998046875, + "learning_rate": 9.785914178608603e-05, + "loss": 0.4994, + "step": 30100 + }, + { + "epoch": 0.15555589951553236, + "grad_norm": 23388.55859375, + "learning_rate": 9.784855411870611e-05, + "loss": 0.5036, + "step": 30150 + }, + { + "epoch": 0.15581386949814519, + "grad_norm": 20002.095703125, + "learning_rate": 9.783794091069451e-05, + "loss": 0.5021, + "step": 30200 + }, + { + "epoch": 0.156071839480758, + "grad_norm": 19565.419921875, + "learning_rate": 9.782730216771641e-05, + "loss": 0.4929, + "step": 30250 + }, + { + "epoch": 0.15632980946337086, + "grad_norm": 20284.173828125, + "learning_rate": 9.781663789545052e-05, + "loss": 0.4889, + "step": 30300 + }, + { + "epoch": 0.15658777944598368, + "grad_norm": 18613.439453125, + "learning_rate": 9.780594809958922e-05, + "loss": 0.496, + "step": 30350 + }, + { + "epoch": 0.1568457494285965, + "grad_norm": 19419.1640625, + "learning_rate": 9.779523278583855e-05, + "loss": 0.4977, + "step": 30400 + }, + { + "epoch": 0.15710371941120932, + "grad_norm": 21695.361328125, + "learning_rate": 9.778449195991813e-05, + "loss": 0.4998, + "step": 30450 + }, + { + "epoch": 0.15736168939382214, + "grad_norm": 21914.3828125, + "learning_rate": 9.777372562756117e-05, + "loss": 0.4936, + "step": 30500 + }, + { + "epoch": 0.15761965937643496, + "grad_norm": 22384.525390625, + "learning_rate": 9.776293379451458e-05, + "loss": 0.5034, + "step": 30550 + }, + { + "epoch": 0.15787762935904778, + "grad_norm": 21174.220703125, + "learning_rate": 9.775211646653879e-05, + "loss": 0.4928, + "step": 30600 + }, + { + "epoch": 0.1581355993416606, + "grad_norm": 19809.953125, + "learning_rate": 9.77412736494079e-05, + "loss": 0.5014, + "step": 30650 + }, + { + "epoch": 0.15839356932427343, + "grad_norm": 19657.048828125, + "learning_rate": 9.773040534890958e-05, + "loss": 0.5022, + "step": 30700 + }, + { + "epoch": 0.15865153930688625, + "grad_norm": 20559.490234375, + "learning_rate": 9.771951157084514e-05, + "loss": 0.4923, + "step": 30750 + }, + { + "epoch": 0.15890950928949907, + "grad_norm": 19473.294921875, + "learning_rate": 9.770859232102946e-05, + "loss": 0.4991, + "step": 30800 + }, + { + "epoch": 0.1591674792721119, + "grad_norm": 19243.509765625, + "learning_rate": 9.769764760529102e-05, + "loss": 0.4934, + "step": 30850 + }, + { + "epoch": 0.1594254492547247, + "grad_norm": 20882.853515625, + "learning_rate": 9.768667742947189e-05, + "loss": 0.4989, + "step": 30900 + }, + { + "epoch": 0.15968341923733753, + "grad_norm": 19654.17578125, + "learning_rate": 9.767568179942776e-05, + "loss": 0.501, + "step": 30950 + }, + { + "epoch": 0.15994138921995035, + "grad_norm": 20069.412109375, + "learning_rate": 9.766466072102786e-05, + "loss": 0.5001, + "step": 31000 + }, + { + "epoch": 0.1601993592025632, + "grad_norm": 19730.416015625, + "learning_rate": 9.765361420015506e-05, + "loss": 0.4947, + "step": 31050 + }, + { + "epoch": 0.16045732918517602, + "grad_norm": 19825.43359375, + "learning_rate": 9.764254224270573e-05, + "loss": 0.5012, + "step": 31100 + }, + { + "epoch": 0.16071529916778884, + "grad_norm": 19111.859375, + "learning_rate": 9.763144485458992e-05, + "loss": 0.4946, + "step": 31150 + }, + { + "epoch": 0.16097326915040167, + "grad_norm": 20071.7578125, + "learning_rate": 9.762032204173116e-05, + "loss": 0.4961, + "step": 31200 + }, + { + "epoch": 0.1612312391330145, + "grad_norm": 18780.638671875, + "learning_rate": 9.76091738100666e-05, + "loss": 0.4952, + "step": 31250 + }, + { + "epoch": 0.1614892091156273, + "grad_norm": 20192.69140625, + "learning_rate": 9.759800016554699e-05, + "loss": 0.4919, + "step": 31300 + }, + { + "epoch": 0.16174717909824013, + "grad_norm": 18430.57421875, + "learning_rate": 9.758680111413653e-05, + "loss": 0.4953, + "step": 31350 + }, + { + "epoch": 0.16200514908085295, + "grad_norm": 18921.740234375, + "learning_rate": 9.757557666181314e-05, + "loss": 0.5013, + "step": 31400 + }, + { + "epoch": 0.16226311906346577, + "grad_norm": 18918.857421875, + "learning_rate": 9.756432681456815e-05, + "loss": 0.4976, + "step": 31450 + }, + { + "epoch": 0.1625210890460786, + "grad_norm": 21373.814453125, + "learning_rate": 9.755305157840655e-05, + "loss": 0.4975, + "step": 31500 + }, + { + "epoch": 0.16277905902869141, + "grad_norm": 19509.482421875, + "learning_rate": 9.754175095934684e-05, + "loss": 0.4966, + "step": 31550 + }, + { + "epoch": 0.16303702901130424, + "grad_norm": 18362.125, + "learning_rate": 9.753042496342103e-05, + "loss": 0.505, + "step": 31600 + }, + { + "epoch": 0.16329499899391706, + "grad_norm": 20344.11328125, + "learning_rate": 9.751907359667476e-05, + "loss": 0.4988, + "step": 31650 + }, + { + "epoch": 0.16355296897652988, + "grad_norm": 21398.97265625, + "learning_rate": 9.750769686516715e-05, + "loss": 0.493, + "step": 31700 + }, + { + "epoch": 0.16381093895914273, + "grad_norm": 21106.955078125, + "learning_rate": 9.74962947749709e-05, + "loss": 0.4999, + "step": 31750 + }, + { + "epoch": 0.16406890894175555, + "grad_norm": 19787.216796875, + "learning_rate": 9.74848673321722e-05, + "loss": 0.4932, + "step": 31800 + }, + { + "epoch": 0.16432687892436837, + "grad_norm": 19198.83984375, + "learning_rate": 9.747341454287082e-05, + "loss": 0.4919, + "step": 31850 + }, + { + "epoch": 0.1645848489069812, + "grad_norm": 18460.92578125, + "learning_rate": 9.746193641318002e-05, + "loss": 0.495, + "step": 31900 + }, + { + "epoch": 0.164842818889594, + "grad_norm": 18591.427734375, + "learning_rate": 9.74504329492266e-05, + "loss": 0.4888, + "step": 31950 + }, + { + "epoch": 0.16510078887220683, + "grad_norm": 21651.3515625, + "learning_rate": 9.743890415715091e-05, + "loss": 0.4909, + "step": 32000 + }, + { + "epoch": 0.16535875885481965, + "grad_norm": 18884.486328125, + "learning_rate": 9.742735004310677e-05, + "loss": 0.4981, + "step": 32050 + }, + { + "epoch": 0.16561672883743248, + "grad_norm": 19223.658203125, + "learning_rate": 9.741577061326157e-05, + "loss": 0.4961, + "step": 32100 + }, + { + "epoch": 0.1658746988200453, + "grad_norm": 18266.560546875, + "learning_rate": 9.740416587379615e-05, + "loss": 0.4914, + "step": 32150 + }, + { + "epoch": 0.16613266880265812, + "grad_norm": 19871.509765625, + "learning_rate": 9.739253583090493e-05, + "loss": 0.499, + "step": 32200 + }, + { + "epoch": 0.16639063878527094, + "grad_norm": 19524.298828125, + "learning_rate": 9.738088049079577e-05, + "loss": 0.4944, + "step": 32250 + }, + { + "epoch": 0.16664860876788376, + "grad_norm": 20308.685546875, + "learning_rate": 9.73691998596901e-05, + "loss": 0.4941, + "step": 32300 + }, + { + "epoch": 0.16690657875049658, + "grad_norm": 19125.52734375, + "learning_rate": 9.735749394382278e-05, + "loss": 0.4968, + "step": 32350 + }, + { + "epoch": 0.1671645487331094, + "grad_norm": 18792.716796875, + "learning_rate": 9.734576274944223e-05, + "loss": 0.4959, + "step": 32400 + }, + { + "epoch": 0.16742251871572225, + "grad_norm": 18521.54296875, + "learning_rate": 9.73340062828103e-05, + "loss": 0.4913, + "step": 32450 + }, + { + "epoch": 0.16768048869833507, + "grad_norm": 19540.41796875, + "learning_rate": 9.732222455020241e-05, + "loss": 0.4999, + "step": 32500 + }, + { + "epoch": 0.1679384586809479, + "grad_norm": 18682.84375, + "learning_rate": 9.73104175579074e-05, + "loss": 0.4991, + "step": 32550 + }, + { + "epoch": 0.16819642866356072, + "grad_norm": 20134.8984375, + "learning_rate": 9.72985853122276e-05, + "loss": 0.4839, + "step": 32600 + }, + { + "epoch": 0.16845439864617354, + "grad_norm": 20375.1484375, + "learning_rate": 9.728672781947883e-05, + "loss": 0.4941, + "step": 32650 + }, + { + "epoch": 0.16871236862878636, + "grad_norm": 19720.98046875, + "learning_rate": 9.727484508599042e-05, + "loss": 0.4856, + "step": 32700 + }, + { + "epoch": 0.16897033861139918, + "grad_norm": 19408.7734375, + "learning_rate": 9.726293711810513e-05, + "loss": 0.4942, + "step": 32750 + }, + { + "epoch": 0.169228308594012, + "grad_norm": 20136.892578125, + "learning_rate": 9.725100392217919e-05, + "loss": 0.4942, + "step": 32800 + }, + { + "epoch": 0.16948627857662482, + "grad_norm": 20555.27734375, + "learning_rate": 9.723904550458232e-05, + "loss": 0.4907, + "step": 32850 + }, + { + "epoch": 0.16974424855923764, + "grad_norm": 18876.787109375, + "learning_rate": 9.722706187169769e-05, + "loss": 0.4951, + "step": 32900 + }, + { + "epoch": 0.17000221854185046, + "grad_norm": 19918.4296875, + "learning_rate": 9.721505302992194e-05, + "loss": 0.4871, + "step": 32950 + }, + { + "epoch": 0.17026018852446329, + "grad_norm": 18593.453125, + "learning_rate": 9.720301898566513e-05, + "loss": 0.4889, + "step": 33000 + }, + { + "epoch": 0.1705181585070761, + "grad_norm": 21007.5625, + "learning_rate": 9.719095974535084e-05, + "loss": 0.4936, + "step": 33050 + }, + { + "epoch": 0.17077612848968893, + "grad_norm": 21749.849609375, + "learning_rate": 9.717887531541601e-05, + "loss": 0.4915, + "step": 33100 + }, + { + "epoch": 0.17103409847230178, + "grad_norm": 19097.896484375, + "learning_rate": 9.716676570231114e-05, + "loss": 0.4857, + "step": 33150 + }, + { + "epoch": 0.1712920684549146, + "grad_norm": 18509.107421875, + "learning_rate": 9.715463091250003e-05, + "loss": 0.487, + "step": 33200 + }, + { + "epoch": 0.17155003843752742, + "grad_norm": 21414.916015625, + "learning_rate": 9.714247095246007e-05, + "loss": 0.4929, + "step": 33250 + }, + { + "epoch": 0.17180800842014024, + "grad_norm": 19836.978515625, + "learning_rate": 9.713028582868196e-05, + "loss": 0.4948, + "step": 33300 + }, + { + "epoch": 0.17206597840275306, + "grad_norm": 18013.787109375, + "learning_rate": 9.71180755476699e-05, + "loss": 0.4945, + "step": 33350 + }, + { + "epoch": 0.17232394838536588, + "grad_norm": 18498.1640625, + "learning_rate": 9.71058401159415e-05, + "loss": 0.4961, + "step": 33400 + }, + { + "epoch": 0.1725819183679787, + "grad_norm": 19871.404296875, + "learning_rate": 9.709357954002778e-05, + "loss": 0.4896, + "step": 33450 + }, + { + "epoch": 0.17283988835059153, + "grad_norm": 20794.05859375, + "learning_rate": 9.708129382647324e-05, + "loss": 0.4855, + "step": 33500 + }, + { + "epoch": 0.17309785833320435, + "grad_norm": 19775.6328125, + "learning_rate": 9.706898298183573e-05, + "loss": 0.4899, + "step": 33550 + }, + { + "epoch": 0.17335582831581717, + "grad_norm": 24329.740234375, + "learning_rate": 9.705664701268652e-05, + "loss": 0.4879, + "step": 33600 + }, + { + "epoch": 0.17361379829843, + "grad_norm": 19666.697265625, + "learning_rate": 9.704428592561037e-05, + "loss": 0.493, + "step": 33650 + }, + { + "epoch": 0.1738717682810428, + "grad_norm": 20382.115234375, + "learning_rate": 9.703189972720532e-05, + "loss": 0.4922, + "step": 33700 + }, + { + "epoch": 0.17412973826365563, + "grad_norm": 20240.46875, + "learning_rate": 9.701948842408293e-05, + "loss": 0.4908, + "step": 33750 + }, + { + "epoch": 0.17438770824626845, + "grad_norm": 18531.224609375, + "learning_rate": 9.700705202286811e-05, + "loss": 0.489, + "step": 33800 + }, + { + "epoch": 0.17464567822888127, + "grad_norm": 19121.0625, + "learning_rate": 9.699459053019912e-05, + "loss": 0.4884, + "step": 33850 + }, + { + "epoch": 0.17490364821149412, + "grad_norm": 19959.931640625, + "learning_rate": 9.698210395272773e-05, + "loss": 0.4912, + "step": 33900 + }, + { + "epoch": 0.17516161819410694, + "grad_norm": 18255.732421875, + "learning_rate": 9.696959229711901e-05, + "loss": 0.4888, + "step": 33950 + }, + { + "epoch": 0.17541958817671977, + "grad_norm": 21808.8671875, + "learning_rate": 9.695705557005142e-05, + "loss": 0.4945, + "step": 34000 + }, + { + "epoch": 0.1756775581593326, + "grad_norm": 18687.521484375, + "learning_rate": 9.694449377821685e-05, + "loss": 0.4891, + "step": 34050 + }, + { + "epoch": 0.1759355281419454, + "grad_norm": 18309.859375, + "learning_rate": 9.693190692832053e-05, + "loss": 0.4888, + "step": 34100 + }, + { + "epoch": 0.17619349812455823, + "grad_norm": 19453.705078125, + "learning_rate": 9.691929502708106e-05, + "loss": 0.4852, + "step": 34150 + }, + { + "epoch": 0.17645146810717105, + "grad_norm": 20964.595703125, + "learning_rate": 9.690665808123046e-05, + "loss": 0.4931, + "step": 34200 + }, + { + "epoch": 0.17670943808978387, + "grad_norm": 20170.5703125, + "learning_rate": 9.689399609751405e-05, + "loss": 0.4908, + "step": 34250 + }, + { + "epoch": 0.1769674080723967, + "grad_norm": 18276.19140625, + "learning_rate": 9.688130908269058e-05, + "loss": 0.4906, + "step": 34300 + }, + { + "epoch": 0.1772253780550095, + "grad_norm": 21062.56640625, + "learning_rate": 9.686859704353212e-05, + "loss": 0.4911, + "step": 34350 + }, + { + "epoch": 0.17748334803762233, + "grad_norm": 21678.6796875, + "learning_rate": 9.685585998682414e-05, + "loss": 0.4894, + "step": 34400 + }, + { + "epoch": 0.17774131802023516, + "grad_norm": 17795.384765625, + "learning_rate": 9.684309791936539e-05, + "loss": 0.4893, + "step": 34450 + }, + { + "epoch": 0.17799928800284798, + "grad_norm": 21536.837890625, + "learning_rate": 9.683031084796803e-05, + "loss": 0.4889, + "step": 34500 + }, + { + "epoch": 0.1782572579854608, + "grad_norm": 20554.423828125, + "learning_rate": 9.681749877945756e-05, + "loss": 0.4843, + "step": 34550 + }, + { + "epoch": 0.17851522796807365, + "grad_norm": 22045.376953125, + "learning_rate": 9.680466172067282e-05, + "loss": 0.4895, + "step": 34600 + }, + { + "epoch": 0.17877319795068647, + "grad_norm": 21406.853515625, + "learning_rate": 9.679179967846597e-05, + "loss": 0.4914, + "step": 34650 + }, + { + "epoch": 0.1790311679332993, + "grad_norm": 20971.037109375, + "learning_rate": 9.677891265970252e-05, + "loss": 0.485, + "step": 34700 + }, + { + "epoch": 0.1792891379159121, + "grad_norm": 20256.73828125, + "learning_rate": 9.676600067126129e-05, + "loss": 0.4918, + "step": 34750 + }, + { + "epoch": 0.17954710789852493, + "grad_norm": 19123.048828125, + "learning_rate": 9.67530637200345e-05, + "loss": 0.49, + "step": 34800 + }, + { + "epoch": 0.17980507788113775, + "grad_norm": 20799.748046875, + "learning_rate": 9.674010181292761e-05, + "loss": 0.4889, + "step": 34850 + }, + { + "epoch": 0.18006304786375057, + "grad_norm": 19569.609375, + "learning_rate": 9.672711495685945e-05, + "loss": 0.4882, + "step": 34900 + }, + { + "epoch": 0.1803210178463634, + "grad_norm": 18339.76171875, + "learning_rate": 9.671410315876213e-05, + "loss": 0.4884, + "step": 34950 + }, + { + "epoch": 0.18057898782897622, + "grad_norm": 20066.099609375, + "learning_rate": 9.670106642558111e-05, + "loss": 0.4866, + "step": 35000 + }, + { + "epoch": 0.18057898782897622, + "eval_loss": 0.48020538687705994, + "eval_runtime": 3265.3619, + "eval_samples_per_second": 949.702, + "eval_steps_per_second": 1.855, + "step": 35000 + }, + { + "epoch": 0.18083695781158904, + "grad_norm": 18703.037109375, + "learning_rate": 9.668800476427515e-05, + "loss": 0.4953, + "step": 35050 + }, + { + "epoch": 0.18109492779420186, + "grad_norm": 19886.177734375, + "learning_rate": 9.667491818181631e-05, + "loss": 0.4845, + "step": 35100 + }, + { + "epoch": 0.18135289777681468, + "grad_norm": 19349.08203125, + "learning_rate": 9.666180668518993e-05, + "loss": 0.493, + "step": 35150 + }, + { + "epoch": 0.1816108677594275, + "grad_norm": 19786.404296875, + "learning_rate": 9.664867028139473e-05, + "loss": 0.4815, + "step": 35200 + }, + { + "epoch": 0.18186883774204032, + "grad_norm": 21271.05859375, + "learning_rate": 9.66355089774426e-05, + "loss": 0.4907, + "step": 35250 + }, + { + "epoch": 0.18212680772465317, + "grad_norm": 19096.3125, + "learning_rate": 9.662232278035885e-05, + "loss": 0.4865, + "step": 35300 + }, + { + "epoch": 0.182384777707266, + "grad_norm": 20136.935546875, + "learning_rate": 9.660911169718196e-05, + "loss": 0.4824, + "step": 35350 + }, + { + "epoch": 0.18264274768987881, + "grad_norm": 19532.361328125, + "learning_rate": 9.65958757349638e-05, + "loss": 0.4857, + "step": 35400 + }, + { + "epoch": 0.18290071767249164, + "grad_norm": 18227.626953125, + "learning_rate": 9.658261490076944e-05, + "loss": 0.4871, + "step": 35450 + }, + { + "epoch": 0.18315868765510446, + "grad_norm": 21021.564453125, + "learning_rate": 9.656932920167727e-05, + "loss": 0.485, + "step": 35500 + }, + { + "epoch": 0.18341665763771728, + "grad_norm": 19943.9765625, + "learning_rate": 9.655601864477893e-05, + "loss": 0.4908, + "step": 35550 + }, + { + "epoch": 0.1836746276203301, + "grad_norm": 19356.8203125, + "learning_rate": 9.654268323717934e-05, + "loss": 0.4849, + "step": 35600 + }, + { + "epoch": 0.18393259760294292, + "grad_norm": 19431.9453125, + "learning_rate": 9.652932298599671e-05, + "loss": 0.4927, + "step": 35650 + }, + { + "epoch": 0.18419056758555574, + "grad_norm": 18860.0625, + "learning_rate": 9.651593789836242e-05, + "loss": 0.4879, + "step": 35700 + }, + { + "epoch": 0.18444853756816856, + "grad_norm": 18524.46875, + "learning_rate": 9.650252798142123e-05, + "loss": 0.4877, + "step": 35750 + }, + { + "epoch": 0.18470650755078138, + "grad_norm": 18897.322265625, + "learning_rate": 9.648909324233107e-05, + "loss": 0.4906, + "step": 35800 + }, + { + "epoch": 0.1849644775333942, + "grad_norm": 21080.552734375, + "learning_rate": 9.647563368826313e-05, + "loss": 0.4895, + "step": 35850 + }, + { + "epoch": 0.18522244751600703, + "grad_norm": 20014.828125, + "learning_rate": 9.64621493264019e-05, + "loss": 0.4816, + "step": 35900 + }, + { + "epoch": 0.18548041749861985, + "grad_norm": 19470.3984375, + "learning_rate": 9.644864016394504e-05, + "loss": 0.4812, + "step": 35950 + }, + { + "epoch": 0.1857383874812327, + "grad_norm": 21915.400390625, + "learning_rate": 9.643510620810348e-05, + "loss": 0.4859, + "step": 36000 + }, + { + "epoch": 0.18599635746384552, + "grad_norm": 19367.009765625, + "learning_rate": 9.642154746610139e-05, + "loss": 0.4905, + "step": 36050 + }, + { + "epoch": 0.18625432744645834, + "grad_norm": 18379.70703125, + "learning_rate": 9.640796394517616e-05, + "loss": 0.4878, + "step": 36100 + }, + { + "epoch": 0.18651229742907116, + "grad_norm": 18933.455078125, + "learning_rate": 9.639435565257842e-05, + "loss": 0.4877, + "step": 36150 + }, + { + "epoch": 0.18677026741168398, + "grad_norm": 19026.484375, + "learning_rate": 9.638072259557201e-05, + "loss": 0.4873, + "step": 36200 + }, + { + "epoch": 0.1870282373942968, + "grad_norm": 21111.09375, + "learning_rate": 9.636706478143398e-05, + "loss": 0.4815, + "step": 36250 + }, + { + "epoch": 0.18728620737690962, + "grad_norm": 19362.541015625, + "learning_rate": 9.635338221745462e-05, + "loss": 0.4854, + "step": 36300 + }, + { + "epoch": 0.18754417735952245, + "grad_norm": 19861.58984375, + "learning_rate": 9.63396749109374e-05, + "loss": 0.4832, + "step": 36350 + }, + { + "epoch": 0.18780214734213527, + "grad_norm": 18793.623046875, + "learning_rate": 9.632594286919905e-05, + "loss": 0.4811, + "step": 36400 + }, + { + "epoch": 0.1880601173247481, + "grad_norm": 20452.26953125, + "learning_rate": 9.631218609956943e-05, + "loss": 0.4872, + "step": 36450 + }, + { + "epoch": 0.1883180873073609, + "grad_norm": 19237.203125, + "learning_rate": 9.629840460939165e-05, + "loss": 0.4941, + "step": 36500 + }, + { + "epoch": 0.18857605728997373, + "grad_norm": 19828.84765625, + "learning_rate": 9.628459840602202e-05, + "loss": 0.4869, + "step": 36550 + }, + { + "epoch": 0.18883402727258655, + "grad_norm": 18171.08203125, + "learning_rate": 9.627076749683e-05, + "loss": 0.4915, + "step": 36600 + }, + { + "epoch": 0.18909199725519937, + "grad_norm": 21346.9375, + "learning_rate": 9.625691188919827e-05, + "loss": 0.4913, + "step": 36650 + }, + { + "epoch": 0.1893499672378122, + "grad_norm": 20066.7890625, + "learning_rate": 9.62430315905227e-05, + "loss": 0.4809, + "step": 36700 + }, + { + "epoch": 0.18960793722042504, + "grad_norm": 20736.546875, + "learning_rate": 9.622912660821231e-05, + "loss": 0.4849, + "step": 36750 + }, + { + "epoch": 0.18986590720303786, + "grad_norm": 20891.958984375, + "learning_rate": 9.62151969496893e-05, + "loss": 0.4831, + "step": 36800 + }, + { + "epoch": 0.19012387718565069, + "grad_norm": 21394.1953125, + "learning_rate": 9.620124262238908e-05, + "loss": 0.4855, + "step": 36850 + }, + { + "epoch": 0.1903818471682635, + "grad_norm": 19725.89453125, + "learning_rate": 9.618726363376016e-05, + "loss": 0.48, + "step": 36900 + }, + { + "epoch": 0.19063981715087633, + "grad_norm": 21622.78125, + "learning_rate": 9.617325999126429e-05, + "loss": 0.4832, + "step": 36950 + }, + { + "epoch": 0.19089778713348915, + "grad_norm": 22529.548828125, + "learning_rate": 9.615923170237633e-05, + "loss": 0.4852, + "step": 37000 + }, + { + "epoch": 0.19115575711610197, + "grad_norm": 21136.404296875, + "learning_rate": 9.614517877458428e-05, + "loss": 0.4816, + "step": 37050 + }, + { + "epoch": 0.1914137270987148, + "grad_norm": 19039.330078125, + "learning_rate": 9.61311012153894e-05, + "loss": 0.4835, + "step": 37100 + }, + { + "epoch": 0.1916716970813276, + "grad_norm": 19755.974609375, + "learning_rate": 9.611699903230594e-05, + "loss": 0.4846, + "step": 37150 + }, + { + "epoch": 0.19192966706394043, + "grad_norm": 19061.28515625, + "learning_rate": 9.610287223286139e-05, + "loss": 0.4816, + "step": 37200 + }, + { + "epoch": 0.19218763704655326, + "grad_norm": 21649.275390625, + "learning_rate": 9.608872082459639e-05, + "loss": 0.4837, + "step": 37250 + }, + { + "epoch": 0.19244560702916608, + "grad_norm": 19856.759765625, + "learning_rate": 9.607454481506466e-05, + "loss": 0.4848, + "step": 37300 + }, + { + "epoch": 0.1927035770117789, + "grad_norm": 19442.810546875, + "learning_rate": 9.60603442118331e-05, + "loss": 0.4828, + "step": 37350 + }, + { + "epoch": 0.19296154699439172, + "grad_norm": 20076.44140625, + "learning_rate": 9.604611902248168e-05, + "loss": 0.4896, + "step": 37400 + }, + { + "epoch": 0.19321951697700457, + "grad_norm": 18413.908203125, + "learning_rate": 9.603186925460359e-05, + "loss": 0.4806, + "step": 37450 + }, + { + "epoch": 0.1934774869596174, + "grad_norm": 19618.3984375, + "learning_rate": 9.601759491580503e-05, + "loss": 0.4864, + "step": 37500 + }, + { + "epoch": 0.1937354569422302, + "grad_norm": 20347.177734375, + "learning_rate": 9.600329601370539e-05, + "loss": 0.489, + "step": 37550 + }, + { + "epoch": 0.19399342692484303, + "grad_norm": 19288.380859375, + "learning_rate": 9.598897255593713e-05, + "loss": 0.4829, + "step": 37600 + }, + { + "epoch": 0.19425139690745585, + "grad_norm": 20326.1484375, + "learning_rate": 9.597462455014585e-05, + "loss": 0.4856, + "step": 37650 + }, + { + "epoch": 0.19450936689006867, + "grad_norm": 19598.14453125, + "learning_rate": 9.596025200399024e-05, + "loss": 0.4831, + "step": 37700 + }, + { + "epoch": 0.1947673368726815, + "grad_norm": 20041.28125, + "learning_rate": 9.594585492514205e-05, + "loss": 0.4822, + "step": 37750 + }, + { + "epoch": 0.19502530685529432, + "grad_norm": 20853.201171875, + "learning_rate": 9.593143332128623e-05, + "loss": 0.4874, + "step": 37800 + }, + { + "epoch": 0.19528327683790714, + "grad_norm": 21364.455078125, + "learning_rate": 9.591698720012068e-05, + "loss": 0.482, + "step": 37850 + }, + { + "epoch": 0.19554124682051996, + "grad_norm": 18795.447265625, + "learning_rate": 9.590251656935652e-05, + "loss": 0.489, + "step": 37900 + }, + { + "epoch": 0.19579921680313278, + "grad_norm": 23039.455078125, + "learning_rate": 9.588802143671784e-05, + "loss": 0.4879, + "step": 37950 + }, + { + "epoch": 0.1960571867857456, + "grad_norm": 19842.263671875, + "learning_rate": 9.58735018099419e-05, + "loss": 0.4869, + "step": 38000 + }, + { + "epoch": 0.19631515676835842, + "grad_norm": 21241.00390625, + "learning_rate": 9.585895769677897e-05, + "loss": 0.4746, + "step": 38050 + }, + { + "epoch": 0.19657312675097124, + "grad_norm": 19803.2265625, + "learning_rate": 9.584438910499245e-05, + "loss": 0.4824, + "step": 38100 + }, + { + "epoch": 0.1968310967335841, + "grad_norm": 18873.744140625, + "learning_rate": 9.582979604235873e-05, + "loss": 0.4817, + "step": 38150 + }, + { + "epoch": 0.19708906671619691, + "grad_norm": 19128.8828125, + "learning_rate": 9.581517851666734e-05, + "loss": 0.482, + "step": 38200 + }, + { + "epoch": 0.19734703669880974, + "grad_norm": 20514.16796875, + "learning_rate": 9.580053653572081e-05, + "loss": 0.4781, + "step": 38250 + }, + { + "epoch": 0.19760500668142256, + "grad_norm": 19135.58984375, + "learning_rate": 9.578587010733475e-05, + "loss": 0.4815, + "step": 38300 + }, + { + "epoch": 0.19786297666403538, + "grad_norm": 22849.197265625, + "learning_rate": 9.577117923933782e-05, + "loss": 0.4794, + "step": 38350 + }, + { + "epoch": 0.1981209466466482, + "grad_norm": 21278.736328125, + "learning_rate": 9.575646393957173e-05, + "loss": 0.4832, + "step": 38400 + }, + { + "epoch": 0.19837891662926102, + "grad_norm": 19292.162109375, + "learning_rate": 9.57417242158912e-05, + "loss": 0.4876, + "step": 38450 + }, + { + "epoch": 0.19863688661187384, + "grad_norm": 17778.423828125, + "learning_rate": 9.572696007616402e-05, + "loss": 0.4842, + "step": 38500 + }, + { + "epoch": 0.19889485659448666, + "grad_norm": 18855.140625, + "learning_rate": 9.5712171528271e-05, + "loss": 0.4846, + "step": 38550 + }, + { + "epoch": 0.19915282657709948, + "grad_norm": 21640.8203125, + "learning_rate": 9.5697358580106e-05, + "loss": 0.4829, + "step": 38600 + }, + { + "epoch": 0.1994107965597123, + "grad_norm": 19358.3828125, + "learning_rate": 9.568252123957586e-05, + "loss": 0.4806, + "step": 38650 + }, + { + "epoch": 0.19966876654232513, + "grad_norm": 20781.98828125, + "learning_rate": 9.566765951460046e-05, + "loss": 0.4849, + "step": 38700 + }, + { + "epoch": 0.19992673652493795, + "grad_norm": 20604.7265625, + "learning_rate": 9.565277341311271e-05, + "loss": 0.4856, + "step": 38750 + }, + { + "epoch": 0.20018470650755077, + "grad_norm": 20930.048828125, + "learning_rate": 9.563786294305854e-05, + "loss": 0.4812, + "step": 38800 + }, + { + "epoch": 0.20044267649016362, + "grad_norm": 22721.259765625, + "learning_rate": 9.562292811239686e-05, + "loss": 0.4857, + "step": 38850 + }, + { + "epoch": 0.20070064647277644, + "grad_norm": 19667.57421875, + "learning_rate": 9.560796892909957e-05, + "loss": 0.483, + "step": 38900 + }, + { + "epoch": 0.20095861645538926, + "grad_norm": 18259.19140625, + "learning_rate": 9.559298540115164e-05, + "loss": 0.4851, + "step": 38950 + }, + { + "epoch": 0.20121658643800208, + "grad_norm": 20980.18359375, + "learning_rate": 9.557797753655096e-05, + "loss": 0.4815, + "step": 39000 + }, + { + "epoch": 0.2014745564206149, + "grad_norm": 19840.025390625, + "learning_rate": 9.556294534330841e-05, + "loss": 0.4878, + "step": 39050 + }, + { + "epoch": 0.20173252640322772, + "grad_norm": 20406.69921875, + "learning_rate": 9.554788882944792e-05, + "loss": 0.481, + "step": 39100 + }, + { + "epoch": 0.20199049638584055, + "grad_norm": 19177.447265625, + "learning_rate": 9.553280800300637e-05, + "loss": 0.4857, + "step": 39150 + }, + { + "epoch": 0.20224846636845337, + "grad_norm": 21242.21875, + "learning_rate": 9.551770287203359e-05, + "loss": 0.4889, + "step": 39200 + }, + { + "epoch": 0.2025064363510662, + "grad_norm": 19343.58203125, + "learning_rate": 9.550257344459241e-05, + "loss": 0.482, + "step": 39250 + }, + { + "epoch": 0.202764406333679, + "grad_norm": 21327.587890625, + "learning_rate": 9.548741972875863e-05, + "loss": 0.4802, + "step": 39300 + }, + { + "epoch": 0.20302237631629183, + "grad_norm": 21366.98828125, + "learning_rate": 9.547224173262102e-05, + "loss": 0.4779, + "step": 39350 + }, + { + "epoch": 0.20328034629890465, + "grad_norm": 20876.39453125, + "learning_rate": 9.545703946428128e-05, + "loss": 0.4843, + "step": 39400 + }, + { + "epoch": 0.20353831628151747, + "grad_norm": 21280.873046875, + "learning_rate": 9.544181293185413e-05, + "loss": 0.4805, + "step": 39450 + }, + { + "epoch": 0.2037962862641303, + "grad_norm": 19546.134765625, + "learning_rate": 9.542656214346713e-05, + "loss": 0.4753, + "step": 39500 + }, + { + "epoch": 0.20405425624674312, + "grad_norm": 19179.05859375, + "learning_rate": 9.541128710726091e-05, + "loss": 0.4812, + "step": 39550 + }, + { + "epoch": 0.20431222622935596, + "grad_norm": 23525.50390625, + "learning_rate": 9.539598783138897e-05, + "loss": 0.4843, + "step": 39600 + }, + { + "epoch": 0.20457019621196879, + "grad_norm": 19369.103515625, + "learning_rate": 9.538066432401775e-05, + "loss": 0.4788, + "step": 39650 + }, + { + "epoch": 0.2048281661945816, + "grad_norm": 20777.119140625, + "learning_rate": 9.536531659332667e-05, + "loss": 0.4779, + "step": 39700 + }, + { + "epoch": 0.20508613617719443, + "grad_norm": 18987.701171875, + "learning_rate": 9.534994464750806e-05, + "loss": 0.4807, + "step": 39750 + }, + { + "epoch": 0.20534410615980725, + "grad_norm": 19523.873046875, + "learning_rate": 9.533454849476712e-05, + "loss": 0.4798, + "step": 39800 + }, + { + "epoch": 0.20560207614242007, + "grad_norm": 21302.05859375, + "learning_rate": 9.531912814332206e-05, + "loss": 0.4811, + "step": 39850 + }, + { + "epoch": 0.2058600461250329, + "grad_norm": 21545.626953125, + "learning_rate": 9.530368360140394e-05, + "loss": 0.4814, + "step": 39900 + }, + { + "epoch": 0.2061180161076457, + "grad_norm": 22709.7265625, + "learning_rate": 9.528821487725678e-05, + "loss": 0.4827, + "step": 39950 + }, + { + "epoch": 0.20637598609025853, + "grad_norm": 20853.228515625, + "learning_rate": 9.527272197913746e-05, + "loss": 0.4838, + "step": 40000 + }, + { + "epoch": 0.20637598609025853, + "eval_loss": 0.47092095017433167, + "eval_runtime": 3339.7722, + "eval_samples_per_second": 928.542, + "eval_steps_per_second": 1.814, + "step": 40000 + }, + { + "epoch": 0.20663395607287136, + "grad_norm": 18389.748046875, + "learning_rate": 9.525720491531581e-05, + "loss": 0.4809, + "step": 40050 + }, + { + "epoch": 0.20689192605548418, + "grad_norm": 20328.59765625, + "learning_rate": 9.524166369407453e-05, + "loss": 0.4827, + "step": 40100 + }, + { + "epoch": 0.207149896038097, + "grad_norm": 21094.966796875, + "learning_rate": 9.522609832370924e-05, + "loss": 0.484, + "step": 40150 + }, + { + "epoch": 0.20740786602070982, + "grad_norm": 22630.64453125, + "learning_rate": 9.52105088125284e-05, + "loss": 0.4829, + "step": 40200 + }, + { + "epoch": 0.20766583600332264, + "grad_norm": 19477.7265625, + "learning_rate": 9.51948951688534e-05, + "loss": 0.4793, + "step": 40250 + }, + { + "epoch": 0.2079238059859355, + "grad_norm": 20242.53125, + "learning_rate": 9.517925740101851e-05, + "loss": 0.4797, + "step": 40300 + }, + { + "epoch": 0.2081817759685483, + "grad_norm": 19952.421875, + "learning_rate": 9.516359551737087e-05, + "loss": 0.4785, + "step": 40350 + }, + { + "epoch": 0.20843974595116113, + "grad_norm": 19216.220703125, + "learning_rate": 9.514790952627049e-05, + "loss": 0.4753, + "step": 40400 + }, + { + "epoch": 0.20869771593377395, + "grad_norm": 20297.515625, + "learning_rate": 9.513219943609024e-05, + "loss": 0.4792, + "step": 40450 + }, + { + "epoch": 0.20895568591638677, + "grad_norm": 19528.7890625, + "learning_rate": 9.511646525521585e-05, + "loss": 0.4801, + "step": 40500 + }, + { + "epoch": 0.2092136558989996, + "grad_norm": 18037.7890625, + "learning_rate": 9.510070699204597e-05, + "loss": 0.483, + "step": 40550 + }, + { + "epoch": 0.20947162588161242, + "grad_norm": 20636.4296875, + "learning_rate": 9.508492465499199e-05, + "loss": 0.4761, + "step": 40600 + }, + { + "epoch": 0.20972959586422524, + "grad_norm": 20096.857421875, + "learning_rate": 9.506911825247827e-05, + "loss": 0.4804, + "step": 40650 + }, + { + "epoch": 0.20998756584683806, + "grad_norm": 20855.619140625, + "learning_rate": 9.505328779294192e-05, + "loss": 0.4823, + "step": 40700 + }, + { + "epoch": 0.21024553582945088, + "grad_norm": 19640.521484375, + "learning_rate": 9.503743328483296e-05, + "loss": 0.4818, + "step": 40750 + }, + { + "epoch": 0.2105035058120637, + "grad_norm": 20990.525390625, + "learning_rate": 9.50215547366142e-05, + "loss": 0.4804, + "step": 40800 + }, + { + "epoch": 0.21076147579467652, + "grad_norm": 18773.564453125, + "learning_rate": 9.500565215676132e-05, + "loss": 0.4798, + "step": 40850 + }, + { + "epoch": 0.21101944577728934, + "grad_norm": 18688.7265625, + "learning_rate": 9.498972555376282e-05, + "loss": 0.4773, + "step": 40900 + }, + { + "epoch": 0.21127741575990217, + "grad_norm": 22649.3671875, + "learning_rate": 9.497377493611998e-05, + "loss": 0.478, + "step": 40950 + }, + { + "epoch": 0.21153538574251501, + "grad_norm": 19575.95703125, + "learning_rate": 9.495780031234694e-05, + "loss": 0.4809, + "step": 41000 + }, + { + "epoch": 0.21179335572512784, + "grad_norm": 18587.681640625, + "learning_rate": 9.494180169097067e-05, + "loss": 0.4805, + "step": 41050 + }, + { + "epoch": 0.21205132570774066, + "grad_norm": 19466.5703125, + "learning_rate": 9.492577908053089e-05, + "loss": 0.4772, + "step": 41100 + }, + { + "epoch": 0.21230929569035348, + "grad_norm": 21085.15234375, + "learning_rate": 9.490973248958018e-05, + "loss": 0.4787, + "step": 41150 + }, + { + "epoch": 0.2125672656729663, + "grad_norm": 21866.95703125, + "learning_rate": 9.489366192668388e-05, + "loss": 0.4803, + "step": 41200 + }, + { + "epoch": 0.21282523565557912, + "grad_norm": 20759.609375, + "learning_rate": 9.487756740042015e-05, + "loss": 0.4782, + "step": 41250 + }, + { + "epoch": 0.21308320563819194, + "grad_norm": 20565.51171875, + "learning_rate": 9.486144891937997e-05, + "loss": 0.4765, + "step": 41300 + }, + { + "epoch": 0.21334117562080476, + "grad_norm": 21536.017578125, + "learning_rate": 9.484530649216705e-05, + "loss": 0.4753, + "step": 41350 + }, + { + "epoch": 0.21359914560341758, + "grad_norm": 19452.001953125, + "learning_rate": 9.482914012739788e-05, + "loss": 0.4807, + "step": 41400 + }, + { + "epoch": 0.2138571155860304, + "grad_norm": 21220.927734375, + "learning_rate": 9.481294983370179e-05, + "loss": 0.4803, + "step": 41450 + }, + { + "epoch": 0.21411508556864323, + "grad_norm": 18278.884765625, + "learning_rate": 9.479673561972082e-05, + "loss": 0.4807, + "step": 41500 + }, + { + "epoch": 0.21437305555125605, + "grad_norm": 21568.13671875, + "learning_rate": 9.478049749410983e-05, + "loss": 0.4751, + "step": 41550 + }, + { + "epoch": 0.21463102553386887, + "grad_norm": 21004.734375, + "learning_rate": 9.47642354655364e-05, + "loss": 0.4828, + "step": 41600 + }, + { + "epoch": 0.2148889955164817, + "grad_norm": 20709.193359375, + "learning_rate": 9.474794954268089e-05, + "loss": 0.477, + "step": 41650 + }, + { + "epoch": 0.21514696549909454, + "grad_norm": 21408.3671875, + "learning_rate": 9.47316397342364e-05, + "loss": 0.4783, + "step": 41700 + }, + { + "epoch": 0.21540493548170736, + "grad_norm": 18606.6328125, + "learning_rate": 9.47153060489088e-05, + "loss": 0.4771, + "step": 41750 + }, + { + "epoch": 0.21566290546432018, + "grad_norm": 19498.20703125, + "learning_rate": 9.469894849541667e-05, + "loss": 0.4782, + "step": 41800 + }, + { + "epoch": 0.215920875446933, + "grad_norm": 20441.9765625, + "learning_rate": 9.46825670824914e-05, + "loss": 0.4769, + "step": 41850 + }, + { + "epoch": 0.21617884542954582, + "grad_norm": 20925.109375, + "learning_rate": 9.466616181887704e-05, + "loss": 0.4858, + "step": 41900 + }, + { + "epoch": 0.21643681541215865, + "grad_norm": 21410.38671875, + "learning_rate": 9.464973271333042e-05, + "loss": 0.4791, + "step": 41950 + }, + { + "epoch": 0.21669478539477147, + "grad_norm": 19169.583984375, + "learning_rate": 9.463327977462106e-05, + "loss": 0.4783, + "step": 42000 + }, + { + "epoch": 0.2169527553773843, + "grad_norm": 19487.3359375, + "learning_rate": 9.461680301153124e-05, + "loss": 0.4792, + "step": 42050 + }, + { + "epoch": 0.2172107253599971, + "grad_norm": 21303.861328125, + "learning_rate": 9.460030243285592e-05, + "loss": 0.4811, + "step": 42100 + }, + { + "epoch": 0.21746869534260993, + "grad_norm": 21529.490234375, + "learning_rate": 9.458377804740279e-05, + "loss": 0.4761, + "step": 42150 + }, + { + "epoch": 0.21772666532522275, + "grad_norm": 21356.505859375, + "learning_rate": 9.456722986399227e-05, + "loss": 0.477, + "step": 42200 + }, + { + "epoch": 0.21798463530783557, + "grad_norm": 19551.33203125, + "learning_rate": 9.455065789145742e-05, + "loss": 0.4777, + "step": 42250 + }, + { + "epoch": 0.2182426052904484, + "grad_norm": 21424.58984375, + "learning_rate": 9.453406213864408e-05, + "loss": 0.4759, + "step": 42300 + }, + { + "epoch": 0.21850057527306121, + "grad_norm": 18835.1953125, + "learning_rate": 9.451744261441072e-05, + "loss": 0.4749, + "step": 42350 + }, + { + "epoch": 0.21875854525567404, + "grad_norm": 20333.490234375, + "learning_rate": 9.450079932762852e-05, + "loss": 0.4786, + "step": 42400 + }, + { + "epoch": 0.21901651523828689, + "grad_norm": 18957.232421875, + "learning_rate": 9.448413228718134e-05, + "loss": 0.4778, + "step": 42450 + }, + { + "epoch": 0.2192744852208997, + "grad_norm": 20251.939453125, + "learning_rate": 9.446744150196574e-05, + "loss": 0.4759, + "step": 42500 + }, + { + "epoch": 0.21953245520351253, + "grad_norm": 20740.82421875, + "learning_rate": 9.445072698089091e-05, + "loss": 0.4782, + "step": 42550 + }, + { + "epoch": 0.21979042518612535, + "grad_norm": 19501.91015625, + "learning_rate": 9.443398873287877e-05, + "loss": 0.479, + "step": 42600 + }, + { + "epoch": 0.22004839516873817, + "grad_norm": 20895.58984375, + "learning_rate": 9.441722676686386e-05, + "loss": 0.4754, + "step": 42650 + }, + { + "epoch": 0.220306365151351, + "grad_norm": 19932.66796875, + "learning_rate": 9.440044109179338e-05, + "loss": 0.4778, + "step": 42700 + }, + { + "epoch": 0.2205643351339638, + "grad_norm": 20158.693359375, + "learning_rate": 9.438363171662722e-05, + "loss": 0.4755, + "step": 42750 + }, + { + "epoch": 0.22082230511657663, + "grad_norm": 19128.953125, + "learning_rate": 9.436679865033789e-05, + "loss": 0.4744, + "step": 42800 + }, + { + "epoch": 0.22108027509918945, + "grad_norm": 19743.517578125, + "learning_rate": 9.434994190191054e-05, + "loss": 0.4781, + "step": 42850 + }, + { + "epoch": 0.22133824508180228, + "grad_norm": 17826.703125, + "learning_rate": 9.4333061480343e-05, + "loss": 0.4762, + "step": 42900 + }, + { + "epoch": 0.2215962150644151, + "grad_norm": 20606.48046875, + "learning_rate": 9.43161573946457e-05, + "loss": 0.4741, + "step": 42950 + }, + { + "epoch": 0.22185418504702792, + "grad_norm": 20116.66796875, + "learning_rate": 9.429922965384172e-05, + "loss": 0.4766, + "step": 43000 + }, + { + "epoch": 0.22211215502964074, + "grad_norm": 20560.970703125, + "learning_rate": 9.428227826696674e-05, + "loss": 0.481, + "step": 43050 + }, + { + "epoch": 0.22237012501225356, + "grad_norm": 20832.01953125, + "learning_rate": 9.42653032430691e-05, + "loss": 0.4806, + "step": 43100 + }, + { + "epoch": 0.2226280949948664, + "grad_norm": 18686.953125, + "learning_rate": 9.424830459120974e-05, + "loss": 0.4796, + "step": 43150 + }, + { + "epoch": 0.22288606497747923, + "grad_norm": 21061.240234375, + "learning_rate": 9.423128232046223e-05, + "loss": 0.474, + "step": 43200 + }, + { + "epoch": 0.22314403496009205, + "grad_norm": 21862.25, + "learning_rate": 9.421423643991267e-05, + "loss": 0.4721, + "step": 43250 + }, + { + "epoch": 0.22340200494270487, + "grad_norm": 18299.23828125, + "learning_rate": 9.419716695865988e-05, + "loss": 0.4744, + "step": 43300 + }, + { + "epoch": 0.2236599749253177, + "grad_norm": 20387.876953125, + "learning_rate": 9.418007388581517e-05, + "loss": 0.4748, + "step": 43350 + }, + { + "epoch": 0.22391794490793052, + "grad_norm": 21721.740234375, + "learning_rate": 9.416295723050254e-05, + "loss": 0.4782, + "step": 43400 + }, + { + "epoch": 0.22417591489054334, + "grad_norm": 20274.72265625, + "learning_rate": 9.414581700185851e-05, + "loss": 0.4734, + "step": 43450 + }, + { + "epoch": 0.22443388487315616, + "grad_norm": 22443.296875, + "learning_rate": 9.41286532090322e-05, + "loss": 0.4734, + "step": 43500 + }, + { + "epoch": 0.22469185485576898, + "grad_norm": 19874.8203125, + "learning_rate": 9.411146586118529e-05, + "loss": 0.4755, + "step": 43550 + }, + { + "epoch": 0.2249498248383818, + "grad_norm": 20362.3125, + "learning_rate": 9.409425496749209e-05, + "loss": 0.4776, + "step": 43600 + }, + { + "epoch": 0.22520779482099462, + "grad_norm": 22146.5078125, + "learning_rate": 9.40770205371394e-05, + "loss": 0.4784, + "step": 43650 + }, + { + "epoch": 0.22546576480360744, + "grad_norm": 19917.83203125, + "learning_rate": 9.405976257932667e-05, + "loss": 0.4744, + "step": 43700 + }, + { + "epoch": 0.22572373478622026, + "grad_norm": 19296.904296875, + "learning_rate": 9.404248110326583e-05, + "loss": 0.4766, + "step": 43750 + }, + { + "epoch": 0.22598170476883309, + "grad_norm": 20648.35546875, + "learning_rate": 9.402517611818142e-05, + "loss": 0.4801, + "step": 43800 + }, + { + "epoch": 0.22623967475144593, + "grad_norm": 21750.517578125, + "learning_rate": 9.40078476333105e-05, + "loss": 0.4752, + "step": 43850 + }, + { + "epoch": 0.22649764473405876, + "grad_norm": 21233.337890625, + "learning_rate": 9.399049565790266e-05, + "loss": 0.4758, + "step": 43900 + }, + { + "epoch": 0.22675561471667158, + "grad_norm": 21952.6796875, + "learning_rate": 9.397312020122006e-05, + "loss": 0.4755, + "step": 43950 + }, + { + "epoch": 0.2270135846992844, + "grad_norm": 18598.826171875, + "learning_rate": 9.39557212725374e-05, + "loss": 0.4725, + "step": 44000 + }, + { + "epoch": 0.22727155468189722, + "grad_norm": 20325.51171875, + "learning_rate": 9.393829888114188e-05, + "loss": 0.4789, + "step": 44050 + }, + { + "epoch": 0.22752952466451004, + "grad_norm": 17499.228515625, + "learning_rate": 9.392085303633323e-05, + "loss": 0.4738, + "step": 44100 + }, + { + "epoch": 0.22778749464712286, + "grad_norm": 21283.970703125, + "learning_rate": 9.39033837474237e-05, + "loss": 0.4743, + "step": 44150 + }, + { + "epoch": 0.22804546462973568, + "grad_norm": 19672.765625, + "learning_rate": 9.388589102373807e-05, + "loss": 0.4751, + "step": 44200 + }, + { + "epoch": 0.2283034346123485, + "grad_norm": 19722.314453125, + "learning_rate": 9.386837487461361e-05, + "loss": 0.4767, + "step": 44250 + }, + { + "epoch": 0.22856140459496133, + "grad_norm": 19948.154296875, + "learning_rate": 9.38508353094001e-05, + "loss": 0.4765, + "step": 44300 + }, + { + "epoch": 0.22881937457757415, + "grad_norm": 19880.611328125, + "learning_rate": 9.383327233745984e-05, + "loss": 0.4754, + "step": 44350 + }, + { + "epoch": 0.22907734456018697, + "grad_norm": 20052.91796875, + "learning_rate": 9.381568596816757e-05, + "loss": 0.4801, + "step": 44400 + }, + { + "epoch": 0.2293353145427998, + "grad_norm": 23129.869140625, + "learning_rate": 9.379807621091057e-05, + "loss": 0.4713, + "step": 44450 + }, + { + "epoch": 0.2295932845254126, + "grad_norm": 19922.0703125, + "learning_rate": 9.37804430750886e-05, + "loss": 0.4736, + "step": 44500 + }, + { + "epoch": 0.22985125450802546, + "grad_norm": 19704.24609375, + "learning_rate": 9.376278657011388e-05, + "loss": 0.4682, + "step": 44550 + }, + { + "epoch": 0.23010922449063828, + "grad_norm": 19080.125, + "learning_rate": 9.374510670541109e-05, + "loss": 0.4751, + "step": 44600 + }, + { + "epoch": 0.2303671944732511, + "grad_norm": 20858.388671875, + "learning_rate": 9.372740349041742e-05, + "loss": 0.4734, + "step": 44650 + }, + { + "epoch": 0.23062516445586392, + "grad_norm": 22074.056640625, + "learning_rate": 9.37096769345825e-05, + "loss": 0.4699, + "step": 44700 + }, + { + "epoch": 0.23088313443847674, + "grad_norm": 21852.623046875, + "learning_rate": 9.369192704736842e-05, + "loss": 0.47, + "step": 44750 + }, + { + "epoch": 0.23114110442108957, + "grad_norm": 20904.033203125, + "learning_rate": 9.367415383824974e-05, + "loss": 0.4736, + "step": 44800 + }, + { + "epoch": 0.2313990744037024, + "grad_norm": 18965.021484375, + "learning_rate": 9.365635731671343e-05, + "loss": 0.4687, + "step": 44850 + }, + { + "epoch": 0.2316570443863152, + "grad_norm": 16994.271484375, + "learning_rate": 9.363853749225894e-05, + "loss": 0.4747, + "step": 44900 + }, + { + "epoch": 0.23191501436892803, + "grad_norm": 19191.794921875, + "learning_rate": 9.362069437439814e-05, + "loss": 0.4689, + "step": 44950 + }, + { + "epoch": 0.23217298435154085, + "grad_norm": 19691.982421875, + "learning_rate": 9.360282797265537e-05, + "loss": 0.4683, + "step": 45000 + }, + { + "epoch": 0.23217298435154085, + "eval_loss": 0.4633353352546692, + "eval_runtime": 3256.5731, + "eval_samples_per_second": 952.265, + "eval_steps_per_second": 1.86, + "step": 45000 + }, + { + "epoch": 0.23243095433415367, + "grad_norm": 21778.20703125, + "learning_rate": 9.358493829656732e-05, + "loss": 0.4726, + "step": 45050 + }, + { + "epoch": 0.2326889243167665, + "grad_norm": 20281.802734375, + "learning_rate": 9.35670253556832e-05, + "loss": 0.4752, + "step": 45100 + }, + { + "epoch": 0.23294689429937931, + "grad_norm": 20620.580078125, + "learning_rate": 9.354908915956456e-05, + "loss": 0.474, + "step": 45150 + }, + { + "epoch": 0.23320486428199214, + "grad_norm": 21115.86328125, + "learning_rate": 9.353112971778542e-05, + "loss": 0.4763, + "step": 45200 + }, + { + "epoch": 0.23346283426460496, + "grad_norm": 19746.30859375, + "learning_rate": 9.351314703993215e-05, + "loss": 0.4792, + "step": 45250 + }, + { + "epoch": 0.2337208042472178, + "grad_norm": 21270.26171875, + "learning_rate": 9.349514113560358e-05, + "loss": 0.4726, + "step": 45300 + }, + { + "epoch": 0.23397877422983063, + "grad_norm": 20273.658203125, + "learning_rate": 9.347711201441092e-05, + "loss": 0.4683, + "step": 45350 + }, + { + "epoch": 0.23423674421244345, + "grad_norm": 19746.9609375, + "learning_rate": 9.345905968597773e-05, + "loss": 0.4778, + "step": 45400 + }, + { + "epoch": 0.23449471419505627, + "grad_norm": 22999.52734375, + "learning_rate": 9.344098415994003e-05, + "loss": 0.4799, + "step": 45450 + }, + { + "epoch": 0.2347526841776691, + "grad_norm": 19922.41015625, + "learning_rate": 9.342288544594617e-05, + "loss": 0.4773, + "step": 45500 + }, + { + "epoch": 0.2350106541602819, + "grad_norm": 19793.73828125, + "learning_rate": 9.340476355365688e-05, + "loss": 0.4743, + "step": 45550 + }, + { + "epoch": 0.23526862414289473, + "grad_norm": 19525.74609375, + "learning_rate": 9.33866184927453e-05, + "loss": 0.4729, + "step": 45600 + }, + { + "epoch": 0.23552659412550755, + "grad_norm": 26093.65625, + "learning_rate": 9.336845027289691e-05, + "loss": 0.4767, + "step": 45650 + }, + { + "epoch": 0.23578456410812038, + "grad_norm": 20045.16796875, + "learning_rate": 9.335025890380953e-05, + "loss": 0.4768, + "step": 45700 + }, + { + "epoch": 0.2360425340907332, + "grad_norm": 21272.36328125, + "learning_rate": 9.333204439519338e-05, + "loss": 0.4738, + "step": 45750 + }, + { + "epoch": 0.23630050407334602, + "grad_norm": 19174.44921875, + "learning_rate": 9.3313806756771e-05, + "loss": 0.4752, + "step": 45800 + }, + { + "epoch": 0.23655847405595884, + "grad_norm": 18446.640625, + "learning_rate": 9.32955459982773e-05, + "loss": 0.4747, + "step": 45850 + }, + { + "epoch": 0.23681644403857166, + "grad_norm": 23397.7109375, + "learning_rate": 9.327726212945953e-05, + "loss": 0.4723, + "step": 45900 + }, + { + "epoch": 0.23707441402118448, + "grad_norm": 20350.755859375, + "learning_rate": 9.325895516007725e-05, + "loss": 0.4671, + "step": 45950 + }, + { + "epoch": 0.23733238400379733, + "grad_norm": 21147.5546875, + "learning_rate": 9.324062509990235e-05, + "loss": 0.4689, + "step": 46000 + }, + { + "epoch": 0.23759035398641015, + "grad_norm": 19813.130859375, + "learning_rate": 9.322227195871909e-05, + "loss": 0.4723, + "step": 46050 + }, + { + "epoch": 0.23784832396902297, + "grad_norm": 22310.037109375, + "learning_rate": 9.320389574632399e-05, + "loss": 0.4727, + "step": 46100 + }, + { + "epoch": 0.2381062939516358, + "grad_norm": 19646.509765625, + "learning_rate": 9.318549647252596e-05, + "loss": 0.4723, + "step": 46150 + }, + { + "epoch": 0.23836426393424862, + "grad_norm": 20145.29296875, + "learning_rate": 9.316707414714614e-05, + "loss": 0.4652, + "step": 46200 + }, + { + "epoch": 0.23862223391686144, + "grad_norm": 19513.466796875, + "learning_rate": 9.314862878001803e-05, + "loss": 0.4774, + "step": 46250 + }, + { + "epoch": 0.23888020389947426, + "grad_norm": 20701.25390625, + "learning_rate": 9.313016038098739e-05, + "loss": 0.4721, + "step": 46300 + }, + { + "epoch": 0.23913817388208708, + "grad_norm": 18766.328125, + "learning_rate": 9.31116689599123e-05, + "loss": 0.4691, + "step": 46350 + }, + { + "epoch": 0.2393961438646999, + "grad_norm": 20925.5, + "learning_rate": 9.309315452666314e-05, + "loss": 0.4743, + "step": 46400 + }, + { + "epoch": 0.23965411384731272, + "grad_norm": 19413.0703125, + "learning_rate": 9.307461709112253e-05, + "loss": 0.469, + "step": 46450 + }, + { + "epoch": 0.23991208382992554, + "grad_norm": 18517.669921875, + "learning_rate": 9.305605666318543e-05, + "loss": 0.4769, + "step": 46500 + }, + { + "epoch": 0.24017005381253836, + "grad_norm": 20222.50390625, + "learning_rate": 9.3037473252759e-05, + "loss": 0.4701, + "step": 46550 + }, + { + "epoch": 0.24042802379515119, + "grad_norm": 21650.63671875, + "learning_rate": 9.301886686976272e-05, + "loss": 0.4693, + "step": 46600 + }, + { + "epoch": 0.240685993777764, + "grad_norm": 18923.498046875, + "learning_rate": 9.300023752412832e-05, + "loss": 0.4749, + "step": 46650 + }, + { + "epoch": 0.24094396376037686, + "grad_norm": 21353.748046875, + "learning_rate": 9.298158522579978e-05, + "loss": 0.4735, + "step": 46700 + }, + { + "epoch": 0.24120193374298968, + "grad_norm": 19405.5234375, + "learning_rate": 9.296290998473334e-05, + "loss": 0.4708, + "step": 46750 + }, + { + "epoch": 0.2414599037256025, + "grad_norm": 21692.3203125, + "learning_rate": 9.294421181089747e-05, + "loss": 0.4644, + "step": 46800 + }, + { + "epoch": 0.24171787370821532, + "grad_norm": 18488.671875, + "learning_rate": 9.292549071427291e-05, + "loss": 0.4668, + "step": 46850 + }, + { + "epoch": 0.24197584369082814, + "grad_norm": 21951.712890625, + "learning_rate": 9.29067467048526e-05, + "loss": 0.4749, + "step": 46900 + }, + { + "epoch": 0.24223381367344096, + "grad_norm": 20673.82421875, + "learning_rate": 9.288797979264176e-05, + "loss": 0.4687, + "step": 46950 + }, + { + "epoch": 0.24249178365605378, + "grad_norm": 18687.69140625, + "learning_rate": 9.286918998765776e-05, + "loss": 0.4731, + "step": 47000 + }, + { + "epoch": 0.2427497536386666, + "grad_norm": 18882.009765625, + "learning_rate": 9.285037729993027e-05, + "loss": 0.4699, + "step": 47050 + }, + { + "epoch": 0.24300772362127943, + "grad_norm": 22378.685546875, + "learning_rate": 9.283154173950112e-05, + "loss": 0.4678, + "step": 47100 + }, + { + "epoch": 0.24326569360389225, + "grad_norm": 19457.736328125, + "learning_rate": 9.281268331642439e-05, + "loss": 0.4665, + "step": 47150 + }, + { + "epoch": 0.24352366358650507, + "grad_norm": 19794.4296875, + "learning_rate": 9.279380204076631e-05, + "loss": 0.4683, + "step": 47200 + }, + { + "epoch": 0.2437816335691179, + "grad_norm": 18910.41796875, + "learning_rate": 9.277489792260536e-05, + "loss": 0.4683, + "step": 47250 + }, + { + "epoch": 0.2440396035517307, + "grad_norm": 21774.009765625, + "learning_rate": 9.275597097203216e-05, + "loss": 0.4729, + "step": 47300 + }, + { + "epoch": 0.24429757353434353, + "grad_norm": 21403.1796875, + "learning_rate": 9.273702119914962e-05, + "loss": 0.4681, + "step": 47350 + }, + { + "epoch": 0.24455554351695638, + "grad_norm": 20333.400390625, + "learning_rate": 9.271804861407269e-05, + "loss": 0.4713, + "step": 47400 + }, + { + "epoch": 0.2448135134995692, + "grad_norm": 22196.32421875, + "learning_rate": 9.269905322692862e-05, + "loss": 0.468, + "step": 47450 + }, + { + "epoch": 0.24507148348218202, + "grad_norm": 18356.623046875, + "learning_rate": 9.268003504785673e-05, + "loss": 0.4663, + "step": 47500 + }, + { + "epoch": 0.24532945346479484, + "grad_norm": 20337.546875, + "learning_rate": 9.266099408700859e-05, + "loss": 0.4657, + "step": 47550 + }, + { + "epoch": 0.24558742344740767, + "grad_norm": 20426.03515625, + "learning_rate": 9.264193035454789e-05, + "loss": 0.4677, + "step": 47600 + }, + { + "epoch": 0.2458453934300205, + "grad_norm": 20962.81640625, + "learning_rate": 9.262284386065047e-05, + "loss": 0.4759, + "step": 47650 + }, + { + "epoch": 0.2461033634126333, + "grad_norm": 20498.919921875, + "learning_rate": 9.260373461550435e-05, + "loss": 0.4647, + "step": 47700 + }, + { + "epoch": 0.24636133339524613, + "grad_norm": 21223.171875, + "learning_rate": 9.258460262930967e-05, + "loss": 0.4698, + "step": 47750 + }, + { + "epoch": 0.24661930337785895, + "grad_norm": 21146.671875, + "learning_rate": 9.256544791227871e-05, + "loss": 0.4727, + "step": 47800 + }, + { + "epoch": 0.24687727336047177, + "grad_norm": 19261.603515625, + "learning_rate": 9.254627047463588e-05, + "loss": 0.4734, + "step": 47850 + }, + { + "epoch": 0.2471352433430846, + "grad_norm": 21131.298828125, + "learning_rate": 9.252707032661774e-05, + "loss": 0.4686, + "step": 47900 + }, + { + "epoch": 0.24739321332569741, + "grad_norm": 22491.212890625, + "learning_rate": 9.250784747847294e-05, + "loss": 0.4701, + "step": 47950 + }, + { + "epoch": 0.24765118330831024, + "grad_norm": 20198.486328125, + "learning_rate": 9.248860194046228e-05, + "loss": 0.4657, + "step": 48000 + }, + { + "epoch": 0.24790915329092306, + "grad_norm": 21754.078125, + "learning_rate": 9.246933372285863e-05, + "loss": 0.4674, + "step": 48050 + }, + { + "epoch": 0.24816712327353588, + "grad_norm": 20948.244140625, + "learning_rate": 9.245004283594703e-05, + "loss": 0.4604, + "step": 48100 + }, + { + "epoch": 0.24842509325614873, + "grad_norm": 20916.3671875, + "learning_rate": 9.243072929002454e-05, + "loss": 0.4656, + "step": 48150 + }, + { + "epoch": 0.24868306323876155, + "grad_norm": 19935.021484375, + "learning_rate": 9.24113930954004e-05, + "loss": 0.4735, + "step": 48200 + }, + { + "epoch": 0.24894103322137437, + "grad_norm": 20075.96875, + "learning_rate": 9.239203426239585e-05, + "loss": 0.4679, + "step": 48250 + }, + { + "epoch": 0.2491990032039872, + "grad_norm": 20107.943359375, + "learning_rate": 9.23726528013443e-05, + "loss": 0.4773, + "step": 48300 + }, + { + "epoch": 0.2494569731866, + "grad_norm": 20341.1171875, + "learning_rate": 9.235324872259119e-05, + "loss": 0.4699, + "step": 48350 + }, + { + "epoch": 0.24971494316921283, + "grad_norm": 21787.4296875, + "learning_rate": 9.233382203649401e-05, + "loss": 0.4665, + "step": 48400 + }, + { + "epoch": 0.24997291315182565, + "grad_norm": 17707.583984375, + "learning_rate": 9.231437275342239e-05, + "loss": 0.4678, + "step": 48450 + }, + { + "epoch": 0.2502308831344385, + "grad_norm": 24467.810546875, + "learning_rate": 9.229490088375797e-05, + "loss": 0.466, + "step": 48500 + }, + { + "epoch": 0.2504888531170513, + "grad_norm": 20794.73828125, + "learning_rate": 9.227540643789446e-05, + "loss": 0.4711, + "step": 48550 + }, + { + "epoch": 0.2507468230996641, + "grad_norm": 20147.099609375, + "learning_rate": 9.225588942623758e-05, + "loss": 0.4689, + "step": 48600 + }, + { + "epoch": 0.25100479308227697, + "grad_norm": 20704.037109375, + "learning_rate": 9.223634985920517e-05, + "loss": 0.4687, + "step": 48650 + }, + { + "epoch": 0.25126276306488976, + "grad_norm": 19472.21875, + "learning_rate": 9.221678774722707e-05, + "loss": 0.4636, + "step": 48700 + }, + { + "epoch": 0.2515207330475026, + "grad_norm": 21352.755859375, + "learning_rate": 9.219720310074515e-05, + "loss": 0.4671, + "step": 48750 + }, + { + "epoch": 0.2517787030301154, + "grad_norm": 20956.146484375, + "learning_rate": 9.21775959302133e-05, + "loss": 0.4703, + "step": 48800 + }, + { + "epoch": 0.25203667301272825, + "grad_norm": 26295.541015625, + "learning_rate": 9.215796624609749e-05, + "loss": 0.4742, + "step": 48850 + }, + { + "epoch": 0.25229464299534105, + "grad_norm": 19862.15625, + "learning_rate": 9.213831405887564e-05, + "loss": 0.468, + "step": 48900 + }, + { + "epoch": 0.2525526129779539, + "grad_norm": 21760.404296875, + "learning_rate": 9.211863937903769e-05, + "loss": 0.4728, + "step": 48950 + }, + { + "epoch": 0.2528105829605667, + "grad_norm": 22488.1484375, + "learning_rate": 9.209894221708564e-05, + "loss": 0.4627, + "step": 49000 + }, + { + "epoch": 0.25306855294317954, + "grad_norm": 20244.5, + "learning_rate": 9.20792225835334e-05, + "loss": 0.4706, + "step": 49050 + }, + { + "epoch": 0.25332652292579233, + "grad_norm": 22642.44140625, + "learning_rate": 9.205948048890698e-05, + "loss": 0.4708, + "step": 49100 + }, + { + "epoch": 0.2535844929084052, + "grad_norm": 23121.501953125, + "learning_rate": 9.203971594374432e-05, + "loss": 0.4723, + "step": 49150 + }, + { + "epoch": 0.25384246289101803, + "grad_norm": 19514.916015625, + "learning_rate": 9.201992895859532e-05, + "loss": 0.4692, + "step": 49200 + }, + { + "epoch": 0.2541004328736308, + "grad_norm": 19467.662109375, + "learning_rate": 9.200011954402193e-05, + "loss": 0.4719, + "step": 49250 + }, + { + "epoch": 0.25435840285624367, + "grad_norm": 20737.7578125, + "learning_rate": 9.198028771059799e-05, + "loss": 0.4643, + "step": 49300 + }, + { + "epoch": 0.25461637283885646, + "grad_norm": 20229.341796875, + "learning_rate": 9.196043346890939e-05, + "loss": 0.462, + "step": 49350 + }, + { + "epoch": 0.2548743428214693, + "grad_norm": 23094.35546875, + "learning_rate": 9.194055682955392e-05, + "loss": 0.4701, + "step": 49400 + }, + { + "epoch": 0.2551323128040821, + "grad_norm": 21099.541015625, + "learning_rate": 9.192065780314132e-05, + "loss": 0.466, + "step": 49450 + }, + { + "epoch": 0.25539028278669496, + "grad_norm": 21500.302734375, + "learning_rate": 9.190073640029335e-05, + "loss": 0.4703, + "step": 49500 + }, + { + "epoch": 0.25564825276930775, + "grad_norm": 24272.228515625, + "learning_rate": 9.188079263164366e-05, + "loss": 0.4672, + "step": 49550 + }, + { + "epoch": 0.2559062227519206, + "grad_norm": 21129.013671875, + "learning_rate": 9.186082650783783e-05, + "loss": 0.4715, + "step": 49600 + }, + { + "epoch": 0.2561641927345334, + "grad_norm": 20696.32421875, + "learning_rate": 9.184083803953339e-05, + "loss": 0.4646, + "step": 49650 + }, + { + "epoch": 0.25642216271714624, + "grad_norm": 20142.7890625, + "learning_rate": 9.18208272373998e-05, + "loss": 0.4627, + "step": 49700 + }, + { + "epoch": 0.25668013269975903, + "grad_norm": 18810.43359375, + "learning_rate": 9.180079411211847e-05, + "loss": 0.4659, + "step": 49750 + }, + { + "epoch": 0.2569381026823719, + "grad_norm": 23121.84765625, + "learning_rate": 9.178073867438264e-05, + "loss": 0.4683, + "step": 49800 + }, + { + "epoch": 0.2571960726649847, + "grad_norm": 20432.021484375, + "learning_rate": 9.176066093489755e-05, + "loss": 0.4704, + "step": 49850 + }, + { + "epoch": 0.2574540426475975, + "grad_norm": 22056.09765625, + "learning_rate": 9.17405609043803e-05, + "loss": 0.4753, + "step": 49900 + }, + { + "epoch": 0.2577120126302104, + "grad_norm": 21094.931640625, + "learning_rate": 9.17204385935599e-05, + "loss": 0.4648, + "step": 49950 + }, + { + "epoch": 0.25796998261282317, + "grad_norm": 20127.525390625, + "learning_rate": 9.170029401317725e-05, + "loss": 0.4646, + "step": 50000 + }, + { + "epoch": 0.25796998261282317, + "eval_loss": 0.4567689299583435, + "eval_runtime": 3268.0543, + "eval_samples_per_second": 948.919, + "eval_steps_per_second": 1.853, + "step": 50000 + }, + { + "epoch": 0.258227952595436, + "grad_norm": 20947.306640625, + "learning_rate": 9.168012717398516e-05, + "loss": 0.4688, + "step": 50050 + }, + { + "epoch": 0.2584859225780488, + "grad_norm": 23591.646484375, + "learning_rate": 9.165993808674823e-05, + "loss": 0.4683, + "step": 50100 + }, + { + "epoch": 0.25874389256066166, + "grad_norm": 21227.677734375, + "learning_rate": 9.163972676224306e-05, + "loss": 0.4671, + "step": 50150 + }, + { + "epoch": 0.25900186254327445, + "grad_norm": 20084.953125, + "learning_rate": 9.161949321125807e-05, + "loss": 0.4598, + "step": 50200 + }, + { + "epoch": 0.2592598325258873, + "grad_norm": 21139.5, + "learning_rate": 9.159923744459349e-05, + "loss": 0.4707, + "step": 50250 + }, + { + "epoch": 0.2595178025085001, + "grad_norm": 20410.794921875, + "learning_rate": 9.15789594730615e-05, + "loss": 0.4675, + "step": 50300 + }, + { + "epoch": 0.25977577249111294, + "grad_norm": 20010.328125, + "learning_rate": 9.155865930748608e-05, + "loss": 0.4599, + "step": 50350 + }, + { + "epoch": 0.26003374247372574, + "grad_norm": 23502.890625, + "learning_rate": 9.153833695870304e-05, + "loss": 0.4664, + "step": 50400 + }, + { + "epoch": 0.2602917124563386, + "grad_norm": 20373.498046875, + "learning_rate": 9.151799243756008e-05, + "loss": 0.4655, + "step": 50450 + }, + { + "epoch": 0.2605496824389514, + "grad_norm": 21093.669921875, + "learning_rate": 9.149762575491671e-05, + "loss": 0.4623, + "step": 50500 + }, + { + "epoch": 0.26080765242156423, + "grad_norm": 22206.87890625, + "learning_rate": 9.147723692164427e-05, + "loss": 0.4687, + "step": 50550 + }, + { + "epoch": 0.261065622404177, + "grad_norm": 23264.875, + "learning_rate": 9.145682594862593e-05, + "loss": 0.4705, + "step": 50600 + }, + { + "epoch": 0.26132359238678987, + "grad_norm": 22029.849609375, + "learning_rate": 9.143639284675664e-05, + "loss": 0.4673, + "step": 50650 + }, + { + "epoch": 0.2615815623694027, + "grad_norm": 23016.955078125, + "learning_rate": 9.141593762694323e-05, + "loss": 0.4663, + "step": 50700 + }, + { + "epoch": 0.2618395323520155, + "grad_norm": 21590.80859375, + "learning_rate": 9.139546030010427e-05, + "loss": 0.4684, + "step": 50750 + }, + { + "epoch": 0.26209750233462836, + "grad_norm": 19839.986328125, + "learning_rate": 9.13749608771702e-05, + "loss": 0.4682, + "step": 50800 + }, + { + "epoch": 0.26235547231724116, + "grad_norm": 17922.802734375, + "learning_rate": 9.135443936908318e-05, + "loss": 0.4601, + "step": 50850 + }, + { + "epoch": 0.262613442299854, + "grad_norm": 21141.119140625, + "learning_rate": 9.133389578679723e-05, + "loss": 0.467, + "step": 50900 + }, + { + "epoch": 0.2628714122824668, + "grad_norm": 21858.158203125, + "learning_rate": 9.131333014127806e-05, + "loss": 0.4663, + "step": 50950 + }, + { + "epoch": 0.26312938226507965, + "grad_norm": 21516.46875, + "learning_rate": 9.129274244350326e-05, + "loss": 0.4656, + "step": 51000 + }, + { + "epoch": 0.26338735224769244, + "grad_norm": 21403.263671875, + "learning_rate": 9.127213270446213e-05, + "loss": 0.4717, + "step": 51050 + }, + { + "epoch": 0.2636453222303053, + "grad_norm": 20405.4296875, + "learning_rate": 9.125150093515575e-05, + "loss": 0.4656, + "step": 51100 + }, + { + "epoch": 0.2639032922129181, + "grad_norm": 21057.57421875, + "learning_rate": 9.123084714659698e-05, + "loss": 0.4655, + "step": 51150 + }, + { + "epoch": 0.26416126219553093, + "grad_norm": 19891.15234375, + "learning_rate": 9.121017134981036e-05, + "loss": 0.4706, + "step": 51200 + }, + { + "epoch": 0.2644192321781437, + "grad_norm": 20441.30078125, + "learning_rate": 9.118947355583228e-05, + "loss": 0.4707, + "step": 51250 + }, + { + "epoch": 0.2646772021607566, + "grad_norm": 22182.67578125, + "learning_rate": 9.11687537757108e-05, + "loss": 0.4633, + "step": 51300 + }, + { + "epoch": 0.2649351721433694, + "grad_norm": 18211.728515625, + "learning_rate": 9.114801202050574e-05, + "loss": 0.4677, + "step": 51350 + }, + { + "epoch": 0.2651931421259822, + "grad_norm": 20691.697265625, + "learning_rate": 9.112724830128865e-05, + "loss": 0.4634, + "step": 51400 + }, + { + "epoch": 0.26545111210859507, + "grad_norm": 19717.75390625, + "learning_rate": 9.110646262914279e-05, + "loss": 0.4647, + "step": 51450 + }, + { + "epoch": 0.26570908209120786, + "grad_norm": 19860.55078125, + "learning_rate": 9.108565501516318e-05, + "loss": 0.4665, + "step": 51500 + }, + { + "epoch": 0.2659670520738207, + "grad_norm": 20122.984375, + "learning_rate": 9.106482547045648e-05, + "loss": 0.4663, + "step": 51550 + }, + { + "epoch": 0.2662250220564335, + "grad_norm": 21214.724609375, + "learning_rate": 9.104397400614112e-05, + "loss": 0.4676, + "step": 51600 + }, + { + "epoch": 0.26648299203904635, + "grad_norm": 24545.041015625, + "learning_rate": 9.102310063334722e-05, + "loss": 0.4705, + "step": 51650 + }, + { + "epoch": 0.26674096202165914, + "grad_norm": 22479.380859375, + "learning_rate": 9.100220536321655e-05, + "loss": 0.4616, + "step": 51700 + }, + { + "epoch": 0.266998932004272, + "grad_norm": 20262.27734375, + "learning_rate": 9.098128820690264e-05, + "loss": 0.4569, + "step": 51750 + }, + { + "epoch": 0.2672569019868848, + "grad_norm": 20906.880859375, + "learning_rate": 9.096034917557062e-05, + "loss": 0.468, + "step": 51800 + }, + { + "epoch": 0.26751487196949764, + "grad_norm": 20986.455078125, + "learning_rate": 9.093938828039737e-05, + "loss": 0.4697, + "step": 51850 + }, + { + "epoch": 0.26777284195211043, + "grad_norm": 22425.681640625, + "learning_rate": 9.09184055325714e-05, + "loss": 0.4692, + "step": 51900 + }, + { + "epoch": 0.2680308119347233, + "grad_norm": 21817.744140625, + "learning_rate": 9.089740094329288e-05, + "loss": 0.4726, + "step": 51950 + }, + { + "epoch": 0.26828878191733607, + "grad_norm": 20527.017578125, + "learning_rate": 9.087637452377369e-05, + "loss": 0.459, + "step": 52000 + }, + { + "epoch": 0.2685467518999489, + "grad_norm": 24486.521484375, + "learning_rate": 9.08553262852373e-05, + "loss": 0.4624, + "step": 52050 + }, + { + "epoch": 0.26880472188256177, + "grad_norm": 20964.537109375, + "learning_rate": 9.083425623891885e-05, + "loss": 0.4657, + "step": 52100 + }, + { + "epoch": 0.26906269186517456, + "grad_norm": 20966.478515625, + "learning_rate": 9.081316439606513e-05, + "loss": 0.4723, + "step": 52150 + }, + { + "epoch": 0.2693206618477874, + "grad_norm": 20067.330078125, + "learning_rate": 9.079205076793457e-05, + "loss": 0.4644, + "step": 52200 + }, + { + "epoch": 0.2695786318304002, + "grad_norm": 21526.298828125, + "learning_rate": 9.077091536579719e-05, + "loss": 0.4602, + "step": 52250 + }, + { + "epoch": 0.26983660181301306, + "grad_norm": 20446.767578125, + "learning_rate": 9.074975820093468e-05, + "loss": 0.4671, + "step": 52300 + }, + { + "epoch": 0.27009457179562585, + "grad_norm": 19936.599609375, + "learning_rate": 9.072857928464029e-05, + "loss": 0.4626, + "step": 52350 + }, + { + "epoch": 0.2703525417782387, + "grad_norm": 21716.60546875, + "learning_rate": 9.070737862821896e-05, + "loss": 0.4642, + "step": 52400 + }, + { + "epoch": 0.2706105117608515, + "grad_norm": 17588.40625, + "learning_rate": 9.068615624298717e-05, + "loss": 0.4595, + "step": 52450 + }, + { + "epoch": 0.27086848174346434, + "grad_norm": 21721.138671875, + "learning_rate": 9.066491214027302e-05, + "loss": 0.4639, + "step": 52500 + }, + { + "epoch": 0.27112645172607713, + "grad_norm": 19480.875, + "learning_rate": 9.06436463314162e-05, + "loss": 0.4654, + "step": 52550 + }, + { + "epoch": 0.27138442170869, + "grad_norm": 22658.076171875, + "learning_rate": 9.062235882776797e-05, + "loss": 0.4653, + "step": 52600 + }, + { + "epoch": 0.2716423916913028, + "grad_norm": 22396.4140625, + "learning_rate": 9.060104964069121e-05, + "loss": 0.4634, + "step": 52650 + }, + { + "epoch": 0.2719003616739156, + "grad_norm": 22354.28125, + "learning_rate": 9.057971878156036e-05, + "loss": 0.4626, + "step": 52700 + }, + { + "epoch": 0.2721583316565285, + "grad_norm": 19845.22265625, + "learning_rate": 9.05583662617614e-05, + "loss": 0.4666, + "step": 52750 + }, + { + "epoch": 0.27241630163914127, + "grad_norm": 19933.978515625, + "learning_rate": 9.053699209269188e-05, + "loss": 0.4601, + "step": 52800 + }, + { + "epoch": 0.2726742716217541, + "grad_norm": 21288.86328125, + "learning_rate": 9.051559628576094e-05, + "loss": 0.4622, + "step": 52850 + }, + { + "epoch": 0.2729322416043669, + "grad_norm": 20604.05078125, + "learning_rate": 9.049417885238927e-05, + "loss": 0.4618, + "step": 52900 + }, + { + "epoch": 0.27319021158697976, + "grad_norm": 18641.544921875, + "learning_rate": 9.047273980400903e-05, + "loss": 0.46, + "step": 52950 + }, + { + "epoch": 0.27344818156959255, + "grad_norm": 22482.8125, + "learning_rate": 9.045127915206398e-05, + "loss": 0.4673, + "step": 53000 + }, + { + "epoch": 0.2737061515522054, + "grad_norm": 20967.9375, + "learning_rate": 9.042979690800943e-05, + "loss": 0.4607, + "step": 53050 + }, + { + "epoch": 0.2739641215348182, + "grad_norm": 22371.90234375, + "learning_rate": 9.040829308331216e-05, + "loss": 0.4624, + "step": 53100 + }, + { + "epoch": 0.27422209151743104, + "grad_norm": 19802.947265625, + "learning_rate": 9.03867676894505e-05, + "loss": 0.4542, + "step": 53150 + }, + { + "epoch": 0.27448006150004384, + "grad_norm": 21255.974609375, + "learning_rate": 9.03652207379143e-05, + "loss": 0.4636, + "step": 53200 + }, + { + "epoch": 0.2747380314826567, + "grad_norm": 21687.16796875, + "learning_rate": 9.034365224020489e-05, + "loss": 0.4626, + "step": 53250 + }, + { + "epoch": 0.2749960014652695, + "grad_norm": 21386.275390625, + "learning_rate": 9.032206220783512e-05, + "loss": 0.4659, + "step": 53300 + }, + { + "epoch": 0.27525397144788233, + "grad_norm": 19433.888671875, + "learning_rate": 9.030045065232935e-05, + "loss": 0.4585, + "step": 53350 + }, + { + "epoch": 0.2755119414304951, + "grad_norm": 20615.021484375, + "learning_rate": 9.027881758522339e-05, + "loss": 0.4619, + "step": 53400 + }, + { + "epoch": 0.27576991141310797, + "grad_norm": 20498.369140625, + "learning_rate": 9.025716301806454e-05, + "loss": 0.4658, + "step": 53450 + }, + { + "epoch": 0.2760278813957208, + "grad_norm": 20348.955078125, + "learning_rate": 9.023548696241162e-05, + "loss": 0.4637, + "step": 53500 + }, + { + "epoch": 0.2762858513783336, + "grad_norm": 18524.3203125, + "learning_rate": 9.021378942983487e-05, + "loss": 0.4636, + "step": 53550 + }, + { + "epoch": 0.27654382136094646, + "grad_norm": 20778.064453125, + "learning_rate": 9.019207043191602e-05, + "loss": 0.4604, + "step": 53600 + }, + { + "epoch": 0.27680179134355926, + "grad_norm": 19481.369140625, + "learning_rate": 9.017032998024823e-05, + "loss": 0.4629, + "step": 53650 + }, + { + "epoch": 0.2770597613261721, + "grad_norm": 20873.8515625, + "learning_rate": 9.014856808643617e-05, + "loss": 0.4647, + "step": 53700 + }, + { + "epoch": 0.2773177313087849, + "grad_norm": 21859.05078125, + "learning_rate": 9.012678476209591e-05, + "loss": 0.4621, + "step": 53750 + }, + { + "epoch": 0.27757570129139775, + "grad_norm": 20832.587890625, + "learning_rate": 9.010498001885492e-05, + "loss": 0.463, + "step": 53800 + }, + { + "epoch": 0.27783367127401054, + "grad_norm": 18435.703125, + "learning_rate": 9.00831538683522e-05, + "loss": 0.466, + "step": 53850 + }, + { + "epoch": 0.2780916412566234, + "grad_norm": 21496.61328125, + "learning_rate": 9.006130632223811e-05, + "loss": 0.4611, + "step": 53900 + }, + { + "epoch": 0.2783496112392362, + "grad_norm": 21796.873046875, + "learning_rate": 9.003943739217444e-05, + "loss": 0.4587, + "step": 53950 + }, + { + "epoch": 0.27860758122184903, + "grad_norm": 21053.099609375, + "learning_rate": 9.001754708983443e-05, + "loss": 0.4659, + "step": 54000 + }, + { + "epoch": 0.2788655512044618, + "grad_norm": 20332.98828125, + "learning_rate": 8.999563542690266e-05, + "loss": 0.4586, + "step": 54050 + }, + { + "epoch": 0.2791235211870747, + "grad_norm": 19829.93359375, + "learning_rate": 8.997370241507516e-05, + "loss": 0.4608, + "step": 54100 + }, + { + "epoch": 0.27938149116968747, + "grad_norm": 21215.3515625, + "learning_rate": 8.995174806605937e-05, + "loss": 0.4672, + "step": 54150 + }, + { + "epoch": 0.2796394611523003, + "grad_norm": 19068.890625, + "learning_rate": 8.992977239157408e-05, + "loss": 0.4637, + "step": 54200 + }, + { + "epoch": 0.27989743113491317, + "grad_norm": 20632.857421875, + "learning_rate": 8.99077754033495e-05, + "loss": 0.4615, + "step": 54250 + }, + { + "epoch": 0.28015540111752596, + "grad_norm": 20244.943359375, + "learning_rate": 8.988575711312714e-05, + "loss": 0.4665, + "step": 54300 + }, + { + "epoch": 0.2804133711001388, + "grad_norm": 21873.34375, + "learning_rate": 8.986371753266001e-05, + "loss": 0.4636, + "step": 54350 + }, + { + "epoch": 0.2806713410827516, + "grad_norm": 18075.001953125, + "learning_rate": 8.984165667371236e-05, + "loss": 0.4626, + "step": 54400 + }, + { + "epoch": 0.28092931106536445, + "grad_norm": 19815.0546875, + "learning_rate": 8.981957454805987e-05, + "loss": 0.4535, + "step": 54450 + }, + { + "epoch": 0.28118728104797724, + "grad_norm": 22713.48046875, + "learning_rate": 8.979747116748955e-05, + "loss": 0.4592, + "step": 54500 + }, + { + "epoch": 0.2814452510305901, + "grad_norm": 23360.1953125, + "learning_rate": 8.977534654379976e-05, + "loss": 0.4646, + "step": 54550 + }, + { + "epoch": 0.2817032210132029, + "grad_norm": 21626.36328125, + "learning_rate": 8.975320068880018e-05, + "loss": 0.4644, + "step": 54600 + }, + { + "epoch": 0.28196119099581574, + "grad_norm": 20061.873046875, + "learning_rate": 8.973103361431184e-05, + "loss": 0.4674, + "step": 54650 + }, + { + "epoch": 0.28221916097842853, + "grad_norm": 21295.0625, + "learning_rate": 8.970884533216713e-05, + "loss": 0.4674, + "step": 54700 + }, + { + "epoch": 0.2824771309610414, + "grad_norm": 19434.23828125, + "learning_rate": 8.968663585420967e-05, + "loss": 0.46, + "step": 54750 + }, + { + "epoch": 0.28273510094365417, + "grad_norm": 23654.849609375, + "learning_rate": 8.966440519229449e-05, + "loss": 0.4649, + "step": 54800 + }, + { + "epoch": 0.282993070926267, + "grad_norm": 22763.603515625, + "learning_rate": 8.964215335828787e-05, + "loss": 0.4578, + "step": 54850 + }, + { + "epoch": 0.28325104090887987, + "grad_norm": 23262.849609375, + "learning_rate": 8.961988036406741e-05, + "loss": 0.4674, + "step": 54900 + }, + { + "epoch": 0.28350901089149266, + "grad_norm": 20148.380859375, + "learning_rate": 8.959758622152201e-05, + "loss": 0.4642, + "step": 54950 + }, + { + "epoch": 0.2837669808741055, + "grad_norm": 22515.548828125, + "learning_rate": 8.957527094255186e-05, + "loss": 0.4697, + "step": 55000 + }, + { + "epoch": 0.2837669808741055, + "eval_loss": 0.4508056044578552, + "eval_runtime": 3347.9938, + "eval_samples_per_second": 926.262, + "eval_steps_per_second": 1.809, + "step": 55000 + }, + { + "epoch": 0.2840249508567183, + "grad_norm": 21158.09375, + "learning_rate": 8.95529345390684e-05, + "loss": 0.4617, + "step": 55050 + }, + { + "epoch": 0.28428292083933115, + "grad_norm": 20892.517578125, + "learning_rate": 8.953057702299437e-05, + "loss": 0.4612, + "step": 55100 + }, + { + "epoch": 0.28454089082194395, + "grad_norm": 21489.740234375, + "learning_rate": 8.950819840626381e-05, + "loss": 0.4578, + "step": 55150 + }, + { + "epoch": 0.2847988608045568, + "grad_norm": 20703.072265625, + "learning_rate": 8.948579870082197e-05, + "loss": 0.4632, + "step": 55200 + }, + { + "epoch": 0.2850568307871696, + "grad_norm": 21731.775390625, + "learning_rate": 8.946337791862537e-05, + "loss": 0.4621, + "step": 55250 + }, + { + "epoch": 0.28531480076978244, + "grad_norm": 24507.076171875, + "learning_rate": 8.94409360716418e-05, + "loss": 0.4542, + "step": 55300 + }, + { + "epoch": 0.28557277075239523, + "grad_norm": 20686.79296875, + "learning_rate": 8.94184731718503e-05, + "loss": 0.4575, + "step": 55350 + }, + { + "epoch": 0.2858307407350081, + "grad_norm": 20055.396484375, + "learning_rate": 8.93959892312411e-05, + "loss": 0.4595, + "step": 55400 + }, + { + "epoch": 0.2860887107176209, + "grad_norm": 21203.28515625, + "learning_rate": 8.93734842618157e-05, + "loss": 0.457, + "step": 55450 + }, + { + "epoch": 0.2863466807002337, + "grad_norm": 21738.6328125, + "learning_rate": 8.935095827558684e-05, + "loss": 0.4639, + "step": 55500 + }, + { + "epoch": 0.2866046506828465, + "grad_norm": 21593.056640625, + "learning_rate": 8.932841128457844e-05, + "loss": 0.4566, + "step": 55550 + }, + { + "epoch": 0.28686262066545937, + "grad_norm": 20362.564453125, + "learning_rate": 8.930584330082564e-05, + "loss": 0.4613, + "step": 55600 + }, + { + "epoch": 0.2871205906480722, + "grad_norm": 20415.390625, + "learning_rate": 8.928325433637482e-05, + "loss": 0.4591, + "step": 55650 + }, + { + "epoch": 0.287378560630685, + "grad_norm": 21615.1953125, + "learning_rate": 8.926064440328348e-05, + "loss": 0.4645, + "step": 55700 + }, + { + "epoch": 0.28763653061329786, + "grad_norm": 19537.873046875, + "learning_rate": 8.92380135136204e-05, + "loss": 0.4595, + "step": 55750 + }, + { + "epoch": 0.28789450059591065, + "grad_norm": 21288.21484375, + "learning_rate": 8.921536167946552e-05, + "loss": 0.4565, + "step": 55800 + }, + { + "epoch": 0.2881524705785235, + "grad_norm": 25019.783203125, + "learning_rate": 8.919268891290992e-05, + "loss": 0.4635, + "step": 55850 + }, + { + "epoch": 0.2884104405611363, + "grad_norm": 23099.5625, + "learning_rate": 8.916999522605592e-05, + "loss": 0.4561, + "step": 55900 + }, + { + "epoch": 0.28866841054374914, + "grad_norm": 22477.849609375, + "learning_rate": 8.914728063101694e-05, + "loss": 0.458, + "step": 55950 + }, + { + "epoch": 0.28892638052636194, + "grad_norm": 19823.103515625, + "learning_rate": 8.91245451399176e-05, + "loss": 0.457, + "step": 56000 + }, + { + "epoch": 0.2891843505089748, + "grad_norm": 20293.353515625, + "learning_rate": 8.910178876489368e-05, + "loss": 0.4614, + "step": 56050 + }, + { + "epoch": 0.2894423204915876, + "grad_norm": 19020.892578125, + "learning_rate": 8.907901151809205e-05, + "loss": 0.4597, + "step": 56100 + }, + { + "epoch": 0.28970029047420043, + "grad_norm": 20133.603515625, + "learning_rate": 8.905621341167082e-05, + "loss": 0.4577, + "step": 56150 + }, + { + "epoch": 0.2899582604568132, + "grad_norm": 21008.95703125, + "learning_rate": 8.903339445779915e-05, + "loss": 0.4596, + "step": 56200 + }, + { + "epoch": 0.29021623043942607, + "grad_norm": 21339.892578125, + "learning_rate": 8.901055466865735e-05, + "loss": 0.4631, + "step": 56250 + }, + { + "epoch": 0.29047420042203886, + "grad_norm": 20088.455078125, + "learning_rate": 8.898769405643686e-05, + "loss": 0.4571, + "step": 56300 + }, + { + "epoch": 0.2907321704046517, + "grad_norm": 21779.341796875, + "learning_rate": 8.896481263334023e-05, + "loss": 0.4541, + "step": 56350 + }, + { + "epoch": 0.29099014038726456, + "grad_norm": 24433.103515625, + "learning_rate": 8.894191041158113e-05, + "loss": 0.4627, + "step": 56400 + }, + { + "epoch": 0.29124811036987736, + "grad_norm": 22214.70703125, + "learning_rate": 8.891898740338432e-05, + "loss": 0.4585, + "step": 56450 + }, + { + "epoch": 0.2915060803524902, + "grad_norm": 20558.955078125, + "learning_rate": 8.889604362098567e-05, + "loss": 0.4547, + "step": 56500 + }, + { + "epoch": 0.291764050335103, + "grad_norm": 22438.3828125, + "learning_rate": 8.88730790766321e-05, + "loss": 0.4581, + "step": 56550 + }, + { + "epoch": 0.29202202031771585, + "grad_norm": 22429.658203125, + "learning_rate": 8.885009378258164e-05, + "loss": 0.4556, + "step": 56600 + }, + { + "epoch": 0.29227999030032864, + "grad_norm": 18076.814453125, + "learning_rate": 8.882708775110342e-05, + "loss": 0.4571, + "step": 56650 + }, + { + "epoch": 0.2925379602829415, + "grad_norm": 19816.873046875, + "learning_rate": 8.88040609944776e-05, + "loss": 0.4584, + "step": 56700 + }, + { + "epoch": 0.2927959302655543, + "grad_norm": 20448.5234375, + "learning_rate": 8.878101352499542e-05, + "loss": 0.4575, + "step": 56750 + }, + { + "epoch": 0.29305390024816713, + "grad_norm": 19950.4609375, + "learning_rate": 8.875794535495915e-05, + "loss": 0.4558, + "step": 56800 + }, + { + "epoch": 0.2933118702307799, + "grad_norm": 20185.0625, + "learning_rate": 8.873485649668218e-05, + "loss": 0.4523, + "step": 56850 + }, + { + "epoch": 0.2935698402133928, + "grad_norm": 22338.080078125, + "learning_rate": 8.871174696248888e-05, + "loss": 0.4648, + "step": 56900 + }, + { + "epoch": 0.29382781019600557, + "grad_norm": 22531.541015625, + "learning_rate": 8.868861676471463e-05, + "loss": 0.4628, + "step": 56950 + }, + { + "epoch": 0.2940857801786184, + "grad_norm": 19558.10546875, + "learning_rate": 8.866546591570592e-05, + "loss": 0.4565, + "step": 57000 + }, + { + "epoch": 0.29434375016123127, + "grad_norm": 20166.33203125, + "learning_rate": 8.864229442782023e-05, + "loss": 0.4527, + "step": 57050 + }, + { + "epoch": 0.29460172014384406, + "grad_norm": 20262.185546875, + "learning_rate": 8.861910231342603e-05, + "loss": 0.4575, + "step": 57100 + }, + { + "epoch": 0.2948596901264569, + "grad_norm": 19107.080078125, + "learning_rate": 8.859588958490283e-05, + "loss": 0.4564, + "step": 57150 + }, + { + "epoch": 0.2951176601090697, + "grad_norm": 19690.37109375, + "learning_rate": 8.857265625464113e-05, + "loss": 0.4576, + "step": 57200 + }, + { + "epoch": 0.29537563009168255, + "grad_norm": 21793.189453125, + "learning_rate": 8.854940233504245e-05, + "loss": 0.4616, + "step": 57250 + }, + { + "epoch": 0.29563360007429534, + "grad_norm": 21543.033203125, + "learning_rate": 8.852612783851926e-05, + "loss": 0.4559, + "step": 57300 + }, + { + "epoch": 0.2958915700569082, + "grad_norm": 21455.56640625, + "learning_rate": 8.850283277749504e-05, + "loss": 0.4583, + "step": 57350 + }, + { + "epoch": 0.296149540039521, + "grad_norm": 21236.935546875, + "learning_rate": 8.847951716440426e-05, + "loss": 0.46, + "step": 57400 + }, + { + "epoch": 0.29640751002213384, + "grad_norm": 22411.130859375, + "learning_rate": 8.845618101169232e-05, + "loss": 0.4563, + "step": 57450 + }, + { + "epoch": 0.29666548000474663, + "grad_norm": 19269.26171875, + "learning_rate": 8.843282433181561e-05, + "loss": 0.4634, + "step": 57500 + }, + { + "epoch": 0.2969234499873595, + "grad_norm": 22179.669921875, + "learning_rate": 8.840944713724149e-05, + "loss": 0.4582, + "step": 57550 + }, + { + "epoch": 0.29718141996997227, + "grad_norm": 19867.076171875, + "learning_rate": 8.838604944044825e-05, + "loss": 0.4591, + "step": 57600 + }, + { + "epoch": 0.2974393899525851, + "grad_norm": 19806.09375, + "learning_rate": 8.836263125392511e-05, + "loss": 0.4571, + "step": 57650 + }, + { + "epoch": 0.2976973599351979, + "grad_norm": 21762.22265625, + "learning_rate": 8.833919259017225e-05, + "loss": 0.4526, + "step": 57700 + }, + { + "epoch": 0.29795532991781076, + "grad_norm": 21031.263671875, + "learning_rate": 8.83157334617008e-05, + "loss": 0.4577, + "step": 57750 + }, + { + "epoch": 0.2982132999004236, + "grad_norm": 22886.556640625, + "learning_rate": 8.829225388103276e-05, + "loss": 0.4553, + "step": 57800 + }, + { + "epoch": 0.2984712698830364, + "grad_norm": 19710.173828125, + "learning_rate": 8.826875386070108e-05, + "loss": 0.4556, + "step": 57850 + }, + { + "epoch": 0.29872923986564925, + "grad_norm": 20607.244140625, + "learning_rate": 8.824523341324963e-05, + "loss": 0.458, + "step": 57900 + }, + { + "epoch": 0.29898720984826205, + "grad_norm": 20672.05859375, + "learning_rate": 8.822169255123317e-05, + "loss": 0.4531, + "step": 57950 + }, + { + "epoch": 0.2992451798308749, + "grad_norm": 21375.76953125, + "learning_rate": 8.819813128721732e-05, + "loss": 0.4602, + "step": 58000 + }, + { + "epoch": 0.2995031498134877, + "grad_norm": 20848.328125, + "learning_rate": 8.817454963377865e-05, + "loss": 0.4557, + "step": 58050 + }, + { + "epoch": 0.29976111979610054, + "grad_norm": 20778.619140625, + "learning_rate": 8.81509476035046e-05, + "loss": 0.4588, + "step": 58100 + }, + { + "epoch": 0.30001908977871333, + "grad_norm": 19791.296875, + "learning_rate": 8.812732520899347e-05, + "loss": 0.4609, + "step": 58150 + }, + { + "epoch": 0.3002770597613262, + "grad_norm": 21814.482421875, + "learning_rate": 8.810368246285445e-05, + "loss": 0.4597, + "step": 58200 + }, + { + "epoch": 0.300535029743939, + "grad_norm": 22417.65625, + "learning_rate": 8.808001937770755e-05, + "loss": 0.461, + "step": 58250 + }, + { + "epoch": 0.3007929997265518, + "grad_norm": 21347.53515625, + "learning_rate": 8.80563359661837e-05, + "loss": 0.4523, + "step": 58300 + }, + { + "epoch": 0.3010509697091646, + "grad_norm": 21612.689453125, + "learning_rate": 8.803263224092461e-05, + "loss": 0.4588, + "step": 58350 + }, + { + "epoch": 0.30130893969177747, + "grad_norm": 19139.7109375, + "learning_rate": 8.80089082145829e-05, + "loss": 0.4576, + "step": 58400 + }, + { + "epoch": 0.3015669096743903, + "grad_norm": 21629.78125, + "learning_rate": 8.798516389982197e-05, + "loss": 0.4514, + "step": 58450 + }, + { + "epoch": 0.3018248796570031, + "grad_norm": 20307.630859375, + "learning_rate": 8.79613993093161e-05, + "loss": 0.4606, + "step": 58500 + }, + { + "epoch": 0.30208284963961596, + "grad_norm": 17832.3359375, + "learning_rate": 8.793761445575037e-05, + "loss": 0.4654, + "step": 58550 + }, + { + "epoch": 0.30234081962222875, + "grad_norm": 19975.20703125, + "learning_rate": 8.791380935182065e-05, + "loss": 0.4519, + "step": 58600 + }, + { + "epoch": 0.3025987896048416, + "grad_norm": 23387.681640625, + "learning_rate": 8.788998401023365e-05, + "loss": 0.4576, + "step": 58650 + }, + { + "epoch": 0.3028567595874544, + "grad_norm": 18704.669921875, + "learning_rate": 8.78661384437069e-05, + "loss": 0.4634, + "step": 58700 + }, + { + "epoch": 0.30311472957006724, + "grad_norm": 21739.806640625, + "learning_rate": 8.784227266496868e-05, + "loss": 0.4471, + "step": 58750 + }, + { + "epoch": 0.30337269955268004, + "grad_norm": 22190.74609375, + "learning_rate": 8.781838668675806e-05, + "loss": 0.4508, + "step": 58800 + }, + { + "epoch": 0.3036306695352929, + "grad_norm": 19186.9609375, + "learning_rate": 8.779448052182495e-05, + "loss": 0.4575, + "step": 58850 + }, + { + "epoch": 0.3038886395179057, + "grad_norm": 21925.8984375, + "learning_rate": 8.777055418293e-05, + "loss": 0.4614, + "step": 58900 + }, + { + "epoch": 0.3041466095005185, + "grad_norm": 21280.16796875, + "learning_rate": 8.774660768284459e-05, + "loss": 0.4621, + "step": 58950 + }, + { + "epoch": 0.3044045794831313, + "grad_norm": 19872.3828125, + "learning_rate": 8.772264103435094e-05, + "loss": 0.4617, + "step": 59000 + }, + { + "epoch": 0.30466254946574417, + "grad_norm": 17518.58984375, + "learning_rate": 8.769865425024195e-05, + "loss": 0.4548, + "step": 59050 + }, + { + "epoch": 0.30492051944835696, + "grad_norm": 25605.537109375, + "learning_rate": 8.767464734332131e-05, + "loss": 0.4532, + "step": 59100 + }, + { + "epoch": 0.3051784894309698, + "grad_norm": 20151.53515625, + "learning_rate": 8.765062032640346e-05, + "loss": 0.4558, + "step": 59150 + }, + { + "epoch": 0.30543645941358266, + "grad_norm": 19346.048828125, + "learning_rate": 8.762657321231353e-05, + "loss": 0.4624, + "step": 59200 + }, + { + "epoch": 0.30569442939619546, + "grad_norm": 21447.115234375, + "learning_rate": 8.760250601388741e-05, + "loss": 0.4632, + "step": 59250 + }, + { + "epoch": 0.3059523993788083, + "grad_norm": 19053.896484375, + "learning_rate": 8.757841874397172e-05, + "loss": 0.454, + "step": 59300 + }, + { + "epoch": 0.3062103693614211, + "grad_norm": 20928.8515625, + "learning_rate": 8.755431141542376e-05, + "loss": 0.4509, + "step": 59350 + }, + { + "epoch": 0.30646833934403395, + "grad_norm": 20900.40234375, + "learning_rate": 8.753018404111157e-05, + "loss": 0.4523, + "step": 59400 + }, + { + "epoch": 0.30672630932664674, + "grad_norm": 19776.572265625, + "learning_rate": 8.750603663391385e-05, + "loss": 0.458, + "step": 59450 + }, + { + "epoch": 0.3069842793092596, + "grad_norm": 21503.505859375, + "learning_rate": 8.748186920672005e-05, + "loss": 0.4496, + "step": 59500 + }, + { + "epoch": 0.3072422492918724, + "grad_norm": 20588.5078125, + "learning_rate": 8.745768177243027e-05, + "loss": 0.4578, + "step": 59550 + }, + { + "epoch": 0.30750021927448523, + "grad_norm": 20516.150390625, + "learning_rate": 8.743347434395528e-05, + "loss": 0.46, + "step": 59600 + }, + { + "epoch": 0.307758189257098, + "grad_norm": 20487.498046875, + "learning_rate": 8.740924693421655e-05, + "loss": 0.4574, + "step": 59650 + }, + { + "epoch": 0.3080161592397109, + "grad_norm": 21070.3671875, + "learning_rate": 8.738499955614619e-05, + "loss": 0.4564, + "step": 59700 + }, + { + "epoch": 0.30827412922232367, + "grad_norm": 19067.427734375, + "learning_rate": 8.736073222268697e-05, + "loss": 0.4523, + "step": 59750 + }, + { + "epoch": 0.3085320992049365, + "grad_norm": 22084.68359375, + "learning_rate": 8.733644494679236e-05, + "loss": 0.4558, + "step": 59800 + }, + { + "epoch": 0.3087900691875493, + "grad_norm": 22324.9140625, + "learning_rate": 8.731213774142639e-05, + "loss": 0.4585, + "step": 59850 + }, + { + "epoch": 0.30904803917016216, + "grad_norm": 19219.47265625, + "learning_rate": 8.728781061956383e-05, + "loss": 0.4571, + "step": 59900 + }, + { + "epoch": 0.309306009152775, + "grad_norm": 20598.125, + "learning_rate": 8.726346359418998e-05, + "loss": 0.4581, + "step": 59950 + }, + { + "epoch": 0.3095639791353878, + "grad_norm": 22155.720703125, + "learning_rate": 8.723909667830082e-05, + "loss": 0.4578, + "step": 60000 + }, + { + "epoch": 0.3095639791353878, + "eval_loss": 0.44494956731796265, + "eval_runtime": 3261.5111, + "eval_samples_per_second": 950.823, + "eval_steps_per_second": 1.857, + "step": 60000 + }, + { + "epoch": 0.30982194911800065, + "grad_norm": 22012.822265625, + "learning_rate": 8.721470988490297e-05, + "loss": 0.4533, + "step": 60050 + }, + { + "epoch": 0.31007991910061344, + "grad_norm": 20934.453125, + "learning_rate": 8.719030322701358e-05, + "loss": 0.4538, + "step": 60100 + }, + { + "epoch": 0.3103378890832263, + "grad_norm": 20173.20703125, + "learning_rate": 8.716587671766049e-05, + "loss": 0.4559, + "step": 60150 + }, + { + "epoch": 0.3105958590658391, + "grad_norm": 19343.833984375, + "learning_rate": 8.714143036988208e-05, + "loss": 0.4579, + "step": 60200 + }, + { + "epoch": 0.31085382904845194, + "grad_norm": 20720.435546875, + "learning_rate": 8.711696419672734e-05, + "loss": 0.4529, + "step": 60250 + }, + { + "epoch": 0.31111179903106473, + "grad_norm": 22050.85546875, + "learning_rate": 8.709247821125583e-05, + "loss": 0.4505, + "step": 60300 + }, + { + "epoch": 0.3113697690136776, + "grad_norm": 22470.55078125, + "learning_rate": 8.706797242653773e-05, + "loss": 0.4616, + "step": 60350 + }, + { + "epoch": 0.31162773899629037, + "grad_norm": 21057.978515625, + "learning_rate": 8.70434468556537e-05, + "loss": 0.4568, + "step": 60400 + }, + { + "epoch": 0.3118857089789032, + "grad_norm": 21035.34375, + "learning_rate": 8.701890151169507e-05, + "loss": 0.4551, + "step": 60450 + }, + { + "epoch": 0.312143678961516, + "grad_norm": 20412.056640625, + "learning_rate": 8.699433640776363e-05, + "loss": 0.4521, + "step": 60500 + }, + { + "epoch": 0.31240164894412886, + "grad_norm": 19888.26953125, + "learning_rate": 8.696975155697175e-05, + "loss": 0.4565, + "step": 60550 + }, + { + "epoch": 0.3126596189267417, + "grad_norm": 22491.900390625, + "learning_rate": 8.694514697244238e-05, + "loss": 0.4578, + "step": 60600 + }, + { + "epoch": 0.3129175889093545, + "grad_norm": 20026.357421875, + "learning_rate": 8.692052266730897e-05, + "loss": 0.4554, + "step": 60650 + }, + { + "epoch": 0.31317555889196735, + "grad_norm": 22979.109375, + "learning_rate": 8.689587865471547e-05, + "loss": 0.461, + "step": 60700 + }, + { + "epoch": 0.31343352887458015, + "grad_norm": 21558.291015625, + "learning_rate": 8.68712149478164e-05, + "loss": 0.4546, + "step": 60750 + }, + { + "epoch": 0.313691498857193, + "grad_norm": 22115.384765625, + "learning_rate": 8.684653155977676e-05, + "loss": 0.4518, + "step": 60800 + }, + { + "epoch": 0.3139494688398058, + "grad_norm": 21422.41015625, + "learning_rate": 8.682182850377205e-05, + "loss": 0.4602, + "step": 60850 + }, + { + "epoch": 0.31420743882241864, + "grad_norm": 21101.02734375, + "learning_rate": 8.679710579298832e-05, + "loss": 0.4579, + "step": 60900 + }, + { + "epoch": 0.31446540880503143, + "grad_norm": 18844.361328125, + "learning_rate": 8.677236344062203e-05, + "loss": 0.4569, + "step": 60950 + }, + { + "epoch": 0.3147233787876443, + "grad_norm": 20492.796875, + "learning_rate": 8.67476014598802e-05, + "loss": 0.4542, + "step": 61000 + }, + { + "epoch": 0.3149813487702571, + "grad_norm": 28102.55078125, + "learning_rate": 8.67228198639803e-05, + "loss": 0.4516, + "step": 61050 + }, + { + "epoch": 0.3152393187528699, + "grad_norm": 20697.494140625, + "learning_rate": 8.669801866615024e-05, + "loss": 0.4551, + "step": 61100 + }, + { + "epoch": 0.3154972887354827, + "grad_norm": 20726.90625, + "learning_rate": 8.667319787962842e-05, + "loss": 0.4576, + "step": 61150 + }, + { + "epoch": 0.31575525871809557, + "grad_norm": 20007.04296875, + "learning_rate": 8.664835751766371e-05, + "loss": 0.4544, + "step": 61200 + }, + { + "epoch": 0.31601322870070836, + "grad_norm": 23061.224609375, + "learning_rate": 8.662349759351542e-05, + "loss": 0.458, + "step": 61250 + }, + { + "epoch": 0.3162711986833212, + "grad_norm": 19895.3125, + "learning_rate": 8.65986181204533e-05, + "loss": 0.4555, + "step": 61300 + }, + { + "epoch": 0.31652916866593406, + "grad_norm": 22702.5234375, + "learning_rate": 8.65737191117575e-05, + "loss": 0.4586, + "step": 61350 + }, + { + "epoch": 0.31678713864854685, + "grad_norm": 20045.404296875, + "learning_rate": 8.654880058071866e-05, + "loss": 0.4583, + "step": 61400 + }, + { + "epoch": 0.3170451086311597, + "grad_norm": 21180.455078125, + "learning_rate": 8.652386254063778e-05, + "loss": 0.4594, + "step": 61450 + }, + { + "epoch": 0.3173030786137725, + "grad_norm": 19104.767578125, + "learning_rate": 8.649890500482633e-05, + "loss": 0.4532, + "step": 61500 + }, + { + "epoch": 0.31756104859638534, + "grad_norm": 23137.869140625, + "learning_rate": 8.647392798660613e-05, + "loss": 0.4535, + "step": 61550 + }, + { + "epoch": 0.31781901857899814, + "grad_norm": 21784.001953125, + "learning_rate": 8.644893149930949e-05, + "loss": 0.4518, + "step": 61600 + }, + { + "epoch": 0.318076988561611, + "grad_norm": 20489.796875, + "learning_rate": 8.642391555627897e-05, + "loss": 0.4572, + "step": 61650 + }, + { + "epoch": 0.3183349585442238, + "grad_norm": 21743.728515625, + "learning_rate": 8.639888017086764e-05, + "loss": 0.4601, + "step": 61700 + }, + { + "epoch": 0.3185929285268366, + "grad_norm": 21714.6171875, + "learning_rate": 8.63738253564389e-05, + "loss": 0.4597, + "step": 61750 + }, + { + "epoch": 0.3188508985094494, + "grad_norm": 19896.208984375, + "learning_rate": 8.634875112636653e-05, + "loss": 0.4532, + "step": 61800 + }, + { + "epoch": 0.31910886849206227, + "grad_norm": 22215.173828125, + "learning_rate": 8.632365749403465e-05, + "loss": 0.4532, + "step": 61850 + }, + { + "epoch": 0.31936683847467506, + "grad_norm": 22466.958984375, + "learning_rate": 8.629854447283778e-05, + "loss": 0.4539, + "step": 61900 + }, + { + "epoch": 0.3196248084572879, + "grad_norm": 21345.197265625, + "learning_rate": 8.627341207618073e-05, + "loss": 0.4551, + "step": 61950 + }, + { + "epoch": 0.3198827784399007, + "grad_norm": 20988.8203125, + "learning_rate": 8.624826031747872e-05, + "loss": 0.4593, + "step": 62000 + }, + { + "epoch": 0.32014074842251355, + "grad_norm": 23295.70703125, + "learning_rate": 8.622308921015726e-05, + "loss": 0.4547, + "step": 62050 + }, + { + "epoch": 0.3203987184051264, + "grad_norm": 22620.431640625, + "learning_rate": 8.619789876765221e-05, + "loss": 0.4601, + "step": 62100 + }, + { + "epoch": 0.3206566883877392, + "grad_norm": 21914.44140625, + "learning_rate": 8.61726890034097e-05, + "loss": 0.4474, + "step": 62150 + }, + { + "epoch": 0.32091465837035205, + "grad_norm": 20521.265625, + "learning_rate": 8.614745993088626e-05, + "loss": 0.4565, + "step": 62200 + }, + { + "epoch": 0.32117262835296484, + "grad_norm": 22810.072265625, + "learning_rate": 8.612221156354868e-05, + "loss": 0.453, + "step": 62250 + }, + { + "epoch": 0.3214305983355777, + "grad_norm": 20862.349609375, + "learning_rate": 8.609694391487402e-05, + "loss": 0.4543, + "step": 62300 + }, + { + "epoch": 0.3216885683181905, + "grad_norm": 22115.298828125, + "learning_rate": 8.607165699834967e-05, + "loss": 0.453, + "step": 62350 + }, + { + "epoch": 0.32194653830080333, + "grad_norm": 22504.859375, + "learning_rate": 8.60463508274733e-05, + "loss": 0.4552, + "step": 62400 + }, + { + "epoch": 0.3222045082834161, + "grad_norm": 21758.9453125, + "learning_rate": 8.602102541575286e-05, + "loss": 0.4526, + "step": 62450 + }, + { + "epoch": 0.322462478266029, + "grad_norm": 20388.23828125, + "learning_rate": 8.599568077670654e-05, + "loss": 0.4522, + "step": 62500 + }, + { + "epoch": 0.32272044824864177, + "grad_norm": 22393.857421875, + "learning_rate": 8.597031692386286e-05, + "loss": 0.4457, + "step": 62550 + }, + { + "epoch": 0.3229784182312546, + "grad_norm": 22233.978515625, + "learning_rate": 8.594493387076052e-05, + "loss": 0.449, + "step": 62600 + }, + { + "epoch": 0.3232363882138674, + "grad_norm": 19831.12109375, + "learning_rate": 8.591953163094852e-05, + "loss": 0.4556, + "step": 62650 + }, + { + "epoch": 0.32349435819648026, + "grad_norm": 19109.783203125, + "learning_rate": 8.589411021798608e-05, + "loss": 0.4552, + "step": 62700 + }, + { + "epoch": 0.3237523281790931, + "grad_norm": 23053.642578125, + "learning_rate": 8.586866964544265e-05, + "loss": 0.4552, + "step": 62750 + }, + { + "epoch": 0.3240102981617059, + "grad_norm": 17938.240234375, + "learning_rate": 8.584320992689791e-05, + "loss": 0.4512, + "step": 62800 + }, + { + "epoch": 0.32426826814431875, + "grad_norm": 19569.431640625, + "learning_rate": 8.581773107594179e-05, + "loss": 0.4557, + "step": 62850 + }, + { + "epoch": 0.32452623812693154, + "grad_norm": 19247.82421875, + "learning_rate": 8.579223310617439e-05, + "loss": 0.4599, + "step": 62900 + }, + { + "epoch": 0.3247842081095444, + "grad_norm": 21565.8671875, + "learning_rate": 8.576671603120603e-05, + "loss": 0.4573, + "step": 62950 + }, + { + "epoch": 0.3250421780921572, + "grad_norm": 19029.005859375, + "learning_rate": 8.574117986465723e-05, + "loss": 0.455, + "step": 63000 + }, + { + "epoch": 0.32530014807477003, + "grad_norm": 21574.626953125, + "learning_rate": 8.57156246201587e-05, + "loss": 0.4512, + "step": 63050 + }, + { + "epoch": 0.32555811805738283, + "grad_norm": 21181.8203125, + "learning_rate": 8.569005031135136e-05, + "loss": 0.4513, + "step": 63100 + }, + { + "epoch": 0.3258160880399957, + "grad_norm": 22689.93359375, + "learning_rate": 8.566445695188624e-05, + "loss": 0.4515, + "step": 63150 + }, + { + "epoch": 0.32607405802260847, + "grad_norm": 22001.9921875, + "learning_rate": 8.563884455542461e-05, + "loss": 0.4459, + "step": 63200 + }, + { + "epoch": 0.3263320280052213, + "grad_norm": 20342.96875, + "learning_rate": 8.561321313563786e-05, + "loss": 0.4526, + "step": 63250 + }, + { + "epoch": 0.3265899979878341, + "grad_norm": 20673.75390625, + "learning_rate": 8.558756270620756e-05, + "loss": 0.4581, + "step": 63300 + }, + { + "epoch": 0.32684796797044696, + "grad_norm": 23113.490234375, + "learning_rate": 8.556189328082538e-05, + "loss": 0.4525, + "step": 63350 + }, + { + "epoch": 0.32710593795305976, + "grad_norm": 21878.384765625, + "learning_rate": 8.55362048731932e-05, + "loss": 0.4536, + "step": 63400 + }, + { + "epoch": 0.3273639079356726, + "grad_norm": 22787.79296875, + "learning_rate": 8.551049749702297e-05, + "loss": 0.4586, + "step": 63450 + }, + { + "epoch": 0.32762187791828545, + "grad_norm": 20422.0625, + "learning_rate": 8.548477116603679e-05, + "loss": 0.4496, + "step": 63500 + }, + { + "epoch": 0.32787984790089825, + "grad_norm": 21936.8828125, + "learning_rate": 8.54590258939669e-05, + "loss": 0.4509, + "step": 63550 + }, + { + "epoch": 0.3281378178835111, + "grad_norm": 21049.275390625, + "learning_rate": 8.54332616945556e-05, + "loss": 0.4514, + "step": 63600 + }, + { + "epoch": 0.3283957878661239, + "grad_norm": 22976.1015625, + "learning_rate": 8.540747858155533e-05, + "loss": 0.4611, + "step": 63650 + }, + { + "epoch": 0.32865375784873674, + "grad_norm": 21968.18359375, + "learning_rate": 8.538167656872861e-05, + "loss": 0.4557, + "step": 63700 + }, + { + "epoch": 0.32891172783134953, + "grad_norm": 22231.755859375, + "learning_rate": 8.53558556698481e-05, + "loss": 0.4556, + "step": 63750 + }, + { + "epoch": 0.3291696978139624, + "grad_norm": 21183.978515625, + "learning_rate": 8.533001589869643e-05, + "loss": 0.4479, + "step": 63800 + }, + { + "epoch": 0.3294276677965752, + "grad_norm": 23931.5234375, + "learning_rate": 8.530415726906642e-05, + "loss": 0.4533, + "step": 63850 + }, + { + "epoch": 0.329685637779188, + "grad_norm": 21073.62890625, + "learning_rate": 8.527827979476087e-05, + "loss": 0.4577, + "step": 63900 + }, + { + "epoch": 0.3299436077618008, + "grad_norm": 19957.09375, + "learning_rate": 8.525238348959268e-05, + "loss": 0.4486, + "step": 63950 + }, + { + "epoch": 0.33020157774441367, + "grad_norm": 18999.962890625, + "learning_rate": 8.522646836738482e-05, + "loss": 0.4525, + "step": 64000 + }, + { + "epoch": 0.33045954772702646, + "grad_norm": 24102.1640625, + "learning_rate": 8.520053444197026e-05, + "loss": 0.4545, + "step": 64050 + }, + { + "epoch": 0.3307175177096393, + "grad_norm": 20205.65234375, + "learning_rate": 8.517458172719203e-05, + "loss": 0.4539, + "step": 64100 + }, + { + "epoch": 0.33097548769225216, + "grad_norm": 24099.8203125, + "learning_rate": 8.514861023690321e-05, + "loss": 0.4465, + "step": 64150 + }, + { + "epoch": 0.33123345767486495, + "grad_norm": 19802.203125, + "learning_rate": 8.512261998496685e-05, + "loss": 0.4546, + "step": 64200 + }, + { + "epoch": 0.3314914276574778, + "grad_norm": 23137.609375, + "learning_rate": 8.509661098525603e-05, + "loss": 0.4539, + "step": 64250 + }, + { + "epoch": 0.3317493976400906, + "grad_norm": 23578.609375, + "learning_rate": 8.507058325165391e-05, + "loss": 0.4513, + "step": 64300 + }, + { + "epoch": 0.33200736762270344, + "grad_norm": 19172.0859375, + "learning_rate": 8.504453679805353e-05, + "loss": 0.456, + "step": 64350 + }, + { + "epoch": 0.33226533760531624, + "grad_norm": 19165.775390625, + "learning_rate": 8.5018471638358e-05, + "loss": 0.4578, + "step": 64400 + }, + { + "epoch": 0.3325233075879291, + "grad_norm": 18070.72265625, + "learning_rate": 8.49923877864804e-05, + "loss": 0.4608, + "step": 64450 + }, + { + "epoch": 0.3327812775705419, + "grad_norm": 20918.525390625, + "learning_rate": 8.49662852563438e-05, + "loss": 0.4526, + "step": 64500 + }, + { + "epoch": 0.3330392475531547, + "grad_norm": 21165.05078125, + "learning_rate": 8.494016406188121e-05, + "loss": 0.4503, + "step": 64550 + }, + { + "epoch": 0.3332972175357675, + "grad_norm": 19273.013671875, + "learning_rate": 8.491402421703562e-05, + "loss": 0.4572, + "step": 64600 + }, + { + "epoch": 0.33355518751838037, + "grad_norm": 21221.681640625, + "learning_rate": 8.488786573575998e-05, + "loss": 0.456, + "step": 64650 + }, + { + "epoch": 0.33381315750099316, + "grad_norm": 19485.8125, + "learning_rate": 8.486168863201716e-05, + "loss": 0.4423, + "step": 64700 + }, + { + "epoch": 0.334071127483606, + "grad_norm": 23241.580078125, + "learning_rate": 8.483549291978001e-05, + "loss": 0.4531, + "step": 64750 + }, + { + "epoch": 0.3343290974662188, + "grad_norm": 21281.111328125, + "learning_rate": 8.48092786130313e-05, + "loss": 0.452, + "step": 64800 + }, + { + "epoch": 0.33458706744883165, + "grad_norm": 21610.2578125, + "learning_rate": 8.47830457257637e-05, + "loss": 0.4488, + "step": 64850 + }, + { + "epoch": 0.3348450374314445, + "grad_norm": 19343.466796875, + "learning_rate": 8.475679427197982e-05, + "loss": 0.4514, + "step": 64900 + }, + { + "epoch": 0.3351030074140573, + "grad_norm": 19489.1875, + "learning_rate": 8.473052426569219e-05, + "loss": 0.447, + "step": 64950 + }, + { + "epoch": 0.33536097739667015, + "grad_norm": 24805.84765625, + "learning_rate": 8.470423572092323e-05, + "loss": 0.4594, + "step": 65000 + }, + { + "epoch": 0.33536097739667015, + "eval_loss": 0.440469890832901, + "eval_runtime": 3318.76, + "eval_samples_per_second": 934.421, + "eval_steps_per_second": 1.825, + "step": 65000 + }, + { + "epoch": 0.33561894737928294, + "grad_norm": 22912.732421875, + "learning_rate": 8.467792865170525e-05, + "loss": 0.4435, + "step": 65050 + }, + { + "epoch": 0.3358769173618958, + "grad_norm": 19958.994140625, + "learning_rate": 8.465160307208045e-05, + "loss": 0.4588, + "step": 65100 + }, + { + "epoch": 0.3361348873445086, + "grad_norm": 20914.193359375, + "learning_rate": 8.462525899610092e-05, + "loss": 0.4497, + "step": 65150 + }, + { + "epoch": 0.33639285732712143, + "grad_norm": 20505.814453125, + "learning_rate": 8.459889643782861e-05, + "loss": 0.4569, + "step": 65200 + }, + { + "epoch": 0.3366508273097342, + "grad_norm": 19486.068359375, + "learning_rate": 8.457251541133535e-05, + "loss": 0.4505, + "step": 65250 + }, + { + "epoch": 0.3369087972923471, + "grad_norm": 21967.84765625, + "learning_rate": 8.454611593070284e-05, + "loss": 0.4556, + "step": 65300 + }, + { + "epoch": 0.33716676727495987, + "grad_norm": 21949.767578125, + "learning_rate": 8.451969801002258e-05, + "loss": 0.4491, + "step": 65350 + }, + { + "epoch": 0.3374247372575727, + "grad_norm": 19765.14453125, + "learning_rate": 8.449326166339595e-05, + "loss": 0.4507, + "step": 65400 + }, + { + "epoch": 0.3376827072401855, + "grad_norm": 21396.982421875, + "learning_rate": 8.446680690493417e-05, + "loss": 0.4548, + "step": 65450 + }, + { + "epoch": 0.33794067722279836, + "grad_norm": 22511.8359375, + "learning_rate": 8.444033374875828e-05, + "loss": 0.454, + "step": 65500 + }, + { + "epoch": 0.33819864720541115, + "grad_norm": 21264.076171875, + "learning_rate": 8.441384220899912e-05, + "loss": 0.4486, + "step": 65550 + }, + { + "epoch": 0.338456617188024, + "grad_norm": 20736.046875, + "learning_rate": 8.438733229979741e-05, + "loss": 0.4505, + "step": 65600 + }, + { + "epoch": 0.33871458717063685, + "grad_norm": 20183.8359375, + "learning_rate": 8.436080403530356e-05, + "loss": 0.4485, + "step": 65650 + }, + { + "epoch": 0.33897255715324964, + "grad_norm": 21947.3671875, + "learning_rate": 8.433425742967787e-05, + "loss": 0.4499, + "step": 65700 + }, + { + "epoch": 0.3392305271358625, + "grad_norm": 22621.236328125, + "learning_rate": 8.430769249709042e-05, + "loss": 0.4503, + "step": 65750 + }, + { + "epoch": 0.3394884971184753, + "grad_norm": 21537.947265625, + "learning_rate": 8.428110925172103e-05, + "loss": 0.4634, + "step": 65800 + }, + { + "epoch": 0.33974646710108813, + "grad_norm": 20869.759765625, + "learning_rate": 8.425450770775936e-05, + "loss": 0.4504, + "step": 65850 + }, + { + "epoch": 0.34000443708370093, + "grad_norm": 20865.12109375, + "learning_rate": 8.422788787940477e-05, + "loss": 0.4509, + "step": 65900 + }, + { + "epoch": 0.3402624070663138, + "grad_norm": 23897.974609375, + "learning_rate": 8.42012497808664e-05, + "loss": 0.4512, + "step": 65950 + }, + { + "epoch": 0.34052037704892657, + "grad_norm": 23978.56640625, + "learning_rate": 8.417459342636318e-05, + "loss": 0.4513, + "step": 66000 + }, + { + "epoch": 0.3407783470315394, + "grad_norm": 22806.99609375, + "learning_rate": 8.414791883012374e-05, + "loss": 0.4468, + "step": 66050 + }, + { + "epoch": 0.3410363170141522, + "grad_norm": 20348.841796875, + "learning_rate": 8.412122600638646e-05, + "loss": 0.4484, + "step": 66100 + }, + { + "epoch": 0.34129428699676506, + "grad_norm": 21868.353515625, + "learning_rate": 8.409451496939945e-05, + "loss": 0.4601, + "step": 66150 + }, + { + "epoch": 0.34155225697937786, + "grad_norm": 20312.36328125, + "learning_rate": 8.406778573342055e-05, + "loss": 0.4485, + "step": 66200 + }, + { + "epoch": 0.3418102269619907, + "grad_norm": 25603.419921875, + "learning_rate": 8.404103831271733e-05, + "loss": 0.4487, + "step": 66250 + }, + { + "epoch": 0.34206819694460355, + "grad_norm": 21330.416015625, + "learning_rate": 8.4014272721567e-05, + "loss": 0.449, + "step": 66300 + }, + { + "epoch": 0.34232616692721635, + "grad_norm": 20045.4453125, + "learning_rate": 8.398748897425656e-05, + "loss": 0.447, + "step": 66350 + }, + { + "epoch": 0.3425841369098292, + "grad_norm": 21575.642578125, + "learning_rate": 8.396068708508262e-05, + "loss": 0.4495, + "step": 66400 + }, + { + "epoch": 0.342842106892442, + "grad_norm": 20396.5390625, + "learning_rate": 8.393386706835154e-05, + "loss": 0.4478, + "step": 66450 + }, + { + "epoch": 0.34310007687505484, + "grad_norm": 20366.8046875, + "learning_rate": 8.390702893837929e-05, + "loss": 0.4531, + "step": 66500 + }, + { + "epoch": 0.34335804685766763, + "grad_norm": 23514.521484375, + "learning_rate": 8.388017270949158e-05, + "loss": 0.4496, + "step": 66550 + }, + { + "epoch": 0.3436160168402805, + "grad_norm": 23656.869140625, + "learning_rate": 8.385329839602372e-05, + "loss": 0.448, + "step": 66600 + }, + { + "epoch": 0.3438739868228933, + "grad_norm": 23712.216796875, + "learning_rate": 8.382640601232071e-05, + "loss": 0.4502, + "step": 66650 + }, + { + "epoch": 0.3441319568055061, + "grad_norm": 23220.240234375, + "learning_rate": 8.379949557273717e-05, + "loss": 0.4469, + "step": 66700 + }, + { + "epoch": 0.3443899267881189, + "grad_norm": 21469.244140625, + "learning_rate": 8.37725670916374e-05, + "loss": 0.4506, + "step": 66750 + }, + { + "epoch": 0.34464789677073177, + "grad_norm": 19195.431640625, + "learning_rate": 8.374562058339528e-05, + "loss": 0.4494, + "step": 66800 + }, + { + "epoch": 0.34490586675334456, + "grad_norm": 21464.130859375, + "learning_rate": 8.371865606239433e-05, + "loss": 0.4552, + "step": 66850 + }, + { + "epoch": 0.3451638367359574, + "grad_norm": 23449.76953125, + "learning_rate": 8.36916735430277e-05, + "loss": 0.4513, + "step": 66900 + }, + { + "epoch": 0.3454218067185702, + "grad_norm": 20593.39453125, + "learning_rate": 8.366467303969814e-05, + "loss": 0.447, + "step": 66950 + }, + { + "epoch": 0.34567977670118305, + "grad_norm": 21341.72265625, + "learning_rate": 8.3637654566818e-05, + "loss": 0.4448, + "step": 67000 + }, + { + "epoch": 0.3459377466837959, + "grad_norm": 20746.919921875, + "learning_rate": 8.361061813880919e-05, + "loss": 0.4511, + "step": 67050 + }, + { + "epoch": 0.3461957166664087, + "grad_norm": 19786.162109375, + "learning_rate": 8.358356377010325e-05, + "loss": 0.452, + "step": 67100 + }, + { + "epoch": 0.34645368664902154, + "grad_norm": 20875.25, + "learning_rate": 8.355649147514128e-05, + "loss": 0.4491, + "step": 67150 + }, + { + "epoch": 0.34671165663163434, + "grad_norm": 22833.728515625, + "learning_rate": 8.352940126837394e-05, + "loss": 0.4545, + "step": 67200 + }, + { + "epoch": 0.3469696266142472, + "grad_norm": 21289.896484375, + "learning_rate": 8.350229316426146e-05, + "loss": 0.4451, + "step": 67250 + }, + { + "epoch": 0.34722759659686, + "grad_norm": 23276.080078125, + "learning_rate": 8.347516717727363e-05, + "loss": 0.4468, + "step": 67300 + }, + { + "epoch": 0.3474855665794728, + "grad_norm": 22568.234375, + "learning_rate": 8.344802332188977e-05, + "loss": 0.4455, + "step": 67350 + }, + { + "epoch": 0.3477435365620856, + "grad_norm": 19527.234375, + "learning_rate": 8.342086161259874e-05, + "loss": 0.4511, + "step": 67400 + }, + { + "epoch": 0.34800150654469847, + "grad_norm": 21764.56640625, + "learning_rate": 8.339368206389895e-05, + "loss": 0.4481, + "step": 67450 + }, + { + "epoch": 0.34825947652731126, + "grad_norm": 21142.33984375, + "learning_rate": 8.336648469029829e-05, + "loss": 0.4539, + "step": 67500 + }, + { + "epoch": 0.3485174465099241, + "grad_norm": 21612.60546875, + "learning_rate": 8.333926950631421e-05, + "loss": 0.4497, + "step": 67550 + }, + { + "epoch": 0.3487754164925369, + "grad_norm": 20772.0390625, + "learning_rate": 8.331203652647364e-05, + "loss": 0.458, + "step": 67600 + }, + { + "epoch": 0.34903338647514975, + "grad_norm": 22197.166015625, + "learning_rate": 8.328478576531303e-05, + "loss": 0.4499, + "step": 67650 + }, + { + "epoch": 0.34929135645776255, + "grad_norm": 20853.865234375, + "learning_rate": 8.32575172373783e-05, + "loss": 0.4473, + "step": 67700 + }, + { + "epoch": 0.3495493264403754, + "grad_norm": 19692.892578125, + "learning_rate": 8.323023095722486e-05, + "loss": 0.4516, + "step": 67750 + }, + { + "epoch": 0.34980729642298825, + "grad_norm": 22032.115234375, + "learning_rate": 8.32029269394176e-05, + "loss": 0.4452, + "step": 67800 + }, + { + "epoch": 0.35006526640560104, + "grad_norm": 23928.783203125, + "learning_rate": 8.317560519853089e-05, + "loss": 0.4489, + "step": 67850 + }, + { + "epoch": 0.3503232363882139, + "grad_norm": 20832.560546875, + "learning_rate": 8.314826574914853e-05, + "loss": 0.4493, + "step": 67900 + }, + { + "epoch": 0.3505812063708267, + "grad_norm": 23453.634765625, + "learning_rate": 8.31209086058638e-05, + "loss": 0.4487, + "step": 67950 + }, + { + "epoch": 0.35083917635343953, + "grad_norm": 23585.826171875, + "learning_rate": 8.309353378327938e-05, + "loss": 0.4473, + "step": 68000 + }, + { + "epoch": 0.3510971463360523, + "grad_norm": 21680.953125, + "learning_rate": 8.306614129600745e-05, + "loss": 0.4494, + "step": 68050 + }, + { + "epoch": 0.3513551163186652, + "grad_norm": 19228.56640625, + "learning_rate": 8.303873115866958e-05, + "loss": 0.4483, + "step": 68100 + }, + { + "epoch": 0.35161308630127797, + "grad_norm": 22056.6328125, + "learning_rate": 8.301130338589679e-05, + "loss": 0.4485, + "step": 68150 + }, + { + "epoch": 0.3518710562838908, + "grad_norm": 22030.484375, + "learning_rate": 8.298385799232947e-05, + "loss": 0.4462, + "step": 68200 + }, + { + "epoch": 0.3521290262665036, + "grad_norm": 19658.33984375, + "learning_rate": 8.295639499261745e-05, + "loss": 0.4444, + "step": 68250 + }, + { + "epoch": 0.35238699624911646, + "grad_norm": 19667.8125, + "learning_rate": 8.292891440141997e-05, + "loss": 0.4482, + "step": 68300 + }, + { + "epoch": 0.35264496623172925, + "grad_norm": 20248.193359375, + "learning_rate": 8.290141623340558e-05, + "loss": 0.454, + "step": 68350 + }, + { + "epoch": 0.3529029362143421, + "grad_norm": 21358.89453125, + "learning_rate": 8.287390050325232e-05, + "loss": 0.4485, + "step": 68400 + }, + { + "epoch": 0.35316090619695495, + "grad_norm": 19209.328125, + "learning_rate": 8.284636722564754e-05, + "loss": 0.4505, + "step": 68450 + }, + { + "epoch": 0.35341887617956774, + "grad_norm": 21890.7109375, + "learning_rate": 8.281881641528795e-05, + "loss": 0.4531, + "step": 68500 + }, + { + "epoch": 0.3536768461621806, + "grad_norm": 20904.052734375, + "learning_rate": 8.279124808687967e-05, + "loss": 0.4494, + "step": 68550 + }, + { + "epoch": 0.3539348161447934, + "grad_norm": 22519.888671875, + "learning_rate": 8.276366225513812e-05, + "loss": 0.4422, + "step": 68600 + }, + { + "epoch": 0.35419278612740623, + "grad_norm": 20027.009765625, + "learning_rate": 8.27360589347881e-05, + "loss": 0.4484, + "step": 68650 + }, + { + "epoch": 0.354450756110019, + "grad_norm": 22069.64453125, + "learning_rate": 8.27084381405637e-05, + "loss": 0.443, + "step": 68700 + }, + { + "epoch": 0.3547087260926319, + "grad_norm": 23096.74609375, + "learning_rate": 8.26807998872084e-05, + "loss": 0.4437, + "step": 68750 + }, + { + "epoch": 0.35496669607524467, + "grad_norm": 19204.626953125, + "learning_rate": 8.265314418947494e-05, + "loss": 0.4496, + "step": 68800 + }, + { + "epoch": 0.3552246660578575, + "grad_norm": 26871.888671875, + "learning_rate": 8.262547106212541e-05, + "loss": 0.446, + "step": 68850 + }, + { + "epoch": 0.3554826360404703, + "grad_norm": 21342.556640625, + "learning_rate": 8.259778051993118e-05, + "loss": 0.4525, + "step": 68900 + }, + { + "epoch": 0.35574060602308316, + "grad_norm": 23054.814453125, + "learning_rate": 8.25700725776729e-05, + "loss": 0.4427, + "step": 68950 + }, + { + "epoch": 0.35599857600569595, + "grad_norm": 20473.818359375, + "learning_rate": 8.254234725014061e-05, + "loss": 0.4452, + "step": 69000 + }, + { + "epoch": 0.3562565459883088, + "grad_norm": 22081.576171875, + "learning_rate": 8.251460455213347e-05, + "loss": 0.4533, + "step": 69050 + }, + { + "epoch": 0.3565145159709216, + "grad_norm": 21840.048828125, + "learning_rate": 8.248684449846004e-05, + "loss": 0.4503, + "step": 69100 + }, + { + "epoch": 0.35677248595353445, + "grad_norm": 21595.234375, + "learning_rate": 8.245906710393808e-05, + "loss": 0.4459, + "step": 69150 + }, + { + "epoch": 0.3570304559361473, + "grad_norm": 22540.302734375, + "learning_rate": 8.243127238339463e-05, + "loss": 0.4461, + "step": 69200 + }, + { + "epoch": 0.3572884259187601, + "grad_norm": 20646.5859375, + "learning_rate": 8.2403460351666e-05, + "loss": 0.4522, + "step": 69250 + }, + { + "epoch": 0.35754639590137294, + "grad_norm": 20219.978515625, + "learning_rate": 8.237563102359767e-05, + "loss": 0.4464, + "step": 69300 + }, + { + "epoch": 0.35780436588398573, + "grad_norm": 21399.888671875, + "learning_rate": 8.234778441404441e-05, + "loss": 0.451, + "step": 69350 + }, + { + "epoch": 0.3580623358665986, + "grad_norm": 23263.193359375, + "learning_rate": 8.231992053787024e-05, + "loss": 0.4491, + "step": 69400 + }, + { + "epoch": 0.3583203058492114, + "grad_norm": 20740.455078125, + "learning_rate": 8.229203940994829e-05, + "loss": 0.4456, + "step": 69450 + }, + { + "epoch": 0.3585782758318242, + "grad_norm": 21715.078125, + "learning_rate": 8.226414104516102e-05, + "loss": 0.4467, + "step": 69500 + }, + { + "epoch": 0.358836245814437, + "grad_norm": 19771.517578125, + "learning_rate": 8.223622545840001e-05, + "loss": 0.4505, + "step": 69550 + }, + { + "epoch": 0.35909421579704986, + "grad_norm": 20944.298828125, + "learning_rate": 8.220829266456608e-05, + "loss": 0.4481, + "step": 69600 + }, + { + "epoch": 0.35935218577966266, + "grad_norm": 22313.017578125, + "learning_rate": 8.21803426785692e-05, + "loss": 0.4503, + "step": 69650 + }, + { + "epoch": 0.3596101557622755, + "grad_norm": 22525.5859375, + "learning_rate": 8.215237551532853e-05, + "loss": 0.4488, + "step": 69700 + }, + { + "epoch": 0.3598681257448883, + "grad_norm": 22731.85546875, + "learning_rate": 8.21243911897724e-05, + "loss": 0.4476, + "step": 69750 + }, + { + "epoch": 0.36012609572750115, + "grad_norm": 20872.9375, + "learning_rate": 8.20963897168383e-05, + "loss": 0.4485, + "step": 69800 + }, + { + "epoch": 0.360384065710114, + "grad_norm": 21066.095703125, + "learning_rate": 8.206837111147289e-05, + "loss": 0.4511, + "step": 69850 + }, + { + "epoch": 0.3606420356927268, + "grad_norm": 21823.62890625, + "learning_rate": 8.204033538863197e-05, + "loss": 0.4415, + "step": 69900 + }, + { + "epoch": 0.36090000567533964, + "grad_norm": 19639.724609375, + "learning_rate": 8.201228256328042e-05, + "loss": 0.4456, + "step": 69950 + }, + { + "epoch": 0.36115797565795243, + "grad_norm": 25321.20703125, + "learning_rate": 8.198421265039231e-05, + "loss": 0.4506, + "step": 70000 + }, + { + "epoch": 0.36115797565795243, + "eval_loss": 0.43597322702407837, + "eval_runtime": 3285.9769, + "eval_samples_per_second": 943.744, + "eval_steps_per_second": 1.843, + "step": 70000 + }, + { + "epoch": 0.3614159456405653, + "grad_norm": 19558.943359375, + "learning_rate": 8.195612566495084e-05, + "loss": 0.4502, + "step": 70050 + }, + { + "epoch": 0.3616739156231781, + "grad_norm": 21766.482421875, + "learning_rate": 8.192802162194828e-05, + "loss": 0.4444, + "step": 70100 + }, + { + "epoch": 0.3619318856057909, + "grad_norm": 23117.017578125, + "learning_rate": 8.189990053638603e-05, + "loss": 0.4476, + "step": 70150 + }, + { + "epoch": 0.3621898555884037, + "grad_norm": 19175.60546875, + "learning_rate": 8.18717624232746e-05, + "loss": 0.4479, + "step": 70200 + }, + { + "epoch": 0.36244782557101657, + "grad_norm": 22124.80078125, + "learning_rate": 8.184360729763351e-05, + "loss": 0.449, + "step": 70250 + }, + { + "epoch": 0.36270579555362936, + "grad_norm": 21717.501953125, + "learning_rate": 8.181543517449147e-05, + "loss": 0.4488, + "step": 70300 + }, + { + "epoch": 0.3629637655362422, + "grad_norm": 20235.162109375, + "learning_rate": 8.178724606888621e-05, + "loss": 0.4496, + "step": 70350 + }, + { + "epoch": 0.363221735518855, + "grad_norm": 22513.677734375, + "learning_rate": 8.175903999586455e-05, + "loss": 0.4463, + "step": 70400 + }, + { + "epoch": 0.36347970550146785, + "grad_norm": 21388.1953125, + "learning_rate": 8.173081697048228e-05, + "loss": 0.4446, + "step": 70450 + }, + { + "epoch": 0.36373767548408065, + "grad_norm": 20549.271484375, + "learning_rate": 8.170257700780435e-05, + "loss": 0.4421, + "step": 70500 + }, + { + "epoch": 0.3639956454666935, + "grad_norm": 21219.158203125, + "learning_rate": 8.16743201229047e-05, + "loss": 0.4472, + "step": 70550 + }, + { + "epoch": 0.36425361544930634, + "grad_norm": 20570.34375, + "learning_rate": 8.164604633086632e-05, + "loss": 0.4487, + "step": 70600 + }, + { + "epoch": 0.36451158543191914, + "grad_norm": 17376.671875, + "learning_rate": 8.161775564678118e-05, + "loss": 0.4413, + "step": 70650 + }, + { + "epoch": 0.364769555414532, + "grad_norm": 21676.33984375, + "learning_rate": 8.158944808575032e-05, + "loss": 0.4433, + "step": 70700 + }, + { + "epoch": 0.3650275253971448, + "grad_norm": 21901.001953125, + "learning_rate": 8.156112366288378e-05, + "loss": 0.4465, + "step": 70750 + }, + { + "epoch": 0.36528549537975763, + "grad_norm": 20330.720703125, + "learning_rate": 8.153278239330056e-05, + "loss": 0.4456, + "step": 70800 + }, + { + "epoch": 0.3655434653623704, + "grad_norm": 22179.904296875, + "learning_rate": 8.15044242921287e-05, + "loss": 0.4465, + "step": 70850 + }, + { + "epoch": 0.3658014353449833, + "grad_norm": 21384.66015625, + "learning_rate": 8.14760493745052e-05, + "loss": 0.4476, + "step": 70900 + }, + { + "epoch": 0.36605940532759607, + "grad_norm": 21706.103515625, + "learning_rate": 8.144765765557604e-05, + "loss": 0.4475, + "step": 70950 + }, + { + "epoch": 0.3663173753102089, + "grad_norm": 20332.5, + "learning_rate": 8.141924915049617e-05, + "loss": 0.449, + "step": 71000 + }, + { + "epoch": 0.3665753452928217, + "grad_norm": 22648.640625, + "learning_rate": 8.139082387442951e-05, + "loss": 0.4566, + "step": 71050 + }, + { + "epoch": 0.36683331527543456, + "grad_norm": 21496.291015625, + "learning_rate": 8.136238184254892e-05, + "loss": 0.4493, + "step": 71100 + }, + { + "epoch": 0.36709128525804735, + "grad_norm": 22114.169921875, + "learning_rate": 8.133392307003618e-05, + "loss": 0.4441, + "step": 71150 + }, + { + "epoch": 0.3673492552406602, + "grad_norm": 22476.390625, + "learning_rate": 8.130544757208205e-05, + "loss": 0.4391, + "step": 71200 + }, + { + "epoch": 0.367607225223273, + "grad_norm": 22175.044921875, + "learning_rate": 8.127695536388623e-05, + "loss": 0.4439, + "step": 71250 + }, + { + "epoch": 0.36786519520588584, + "grad_norm": 19715.728515625, + "learning_rate": 8.124844646065724e-05, + "loss": 0.448, + "step": 71300 + }, + { + "epoch": 0.3681231651884987, + "grad_norm": 19609.146484375, + "learning_rate": 8.121992087761266e-05, + "loss": 0.4476, + "step": 71350 + }, + { + "epoch": 0.3683811351711115, + "grad_norm": 21872.12890625, + "learning_rate": 8.119137862997883e-05, + "loss": 0.4536, + "step": 71400 + }, + { + "epoch": 0.36863910515372433, + "grad_norm": 19710.619140625, + "learning_rate": 8.116281973299107e-05, + "loss": 0.4466, + "step": 71450 + }, + { + "epoch": 0.3688970751363371, + "grad_norm": 21783.138671875, + "learning_rate": 8.113424420189357e-05, + "loss": 0.4422, + "step": 71500 + }, + { + "epoch": 0.36915504511895, + "grad_norm": 20527.984375, + "learning_rate": 8.110565205193941e-05, + "loss": 0.4499, + "step": 71550 + }, + { + "epoch": 0.36941301510156277, + "grad_norm": 21693.171875, + "learning_rate": 8.10770432983905e-05, + "loss": 0.4465, + "step": 71600 + }, + { + "epoch": 0.3696709850841756, + "grad_norm": 19817.142578125, + "learning_rate": 8.104841795651765e-05, + "loss": 0.4471, + "step": 71650 + }, + { + "epoch": 0.3699289550667884, + "grad_norm": 20883.767578125, + "learning_rate": 8.101977604160052e-05, + "loss": 0.4507, + "step": 71700 + }, + { + "epoch": 0.37018692504940126, + "grad_norm": 21206.943359375, + "learning_rate": 8.099111756892759e-05, + "loss": 0.4415, + "step": 71750 + }, + { + "epoch": 0.37044489503201405, + "grad_norm": 21431.19140625, + "learning_rate": 8.096244255379621e-05, + "loss": 0.4542, + "step": 71800 + }, + { + "epoch": 0.3707028650146269, + "grad_norm": 23020.34375, + "learning_rate": 8.093375101151255e-05, + "loss": 0.4481, + "step": 71850 + }, + { + "epoch": 0.3709608349972397, + "grad_norm": 20704.1171875, + "learning_rate": 8.09050429573916e-05, + "loss": 0.4427, + "step": 71900 + }, + { + "epoch": 0.37121880497985255, + "grad_norm": 20195.037109375, + "learning_rate": 8.087631840675715e-05, + "loss": 0.4416, + "step": 71950 + }, + { + "epoch": 0.3714767749624654, + "grad_norm": 21187.99609375, + "learning_rate": 8.084757737494184e-05, + "loss": 0.452, + "step": 72000 + }, + { + "epoch": 0.3717347449450782, + "grad_norm": 20694.912109375, + "learning_rate": 8.081881987728703e-05, + "loss": 0.4416, + "step": 72050 + }, + { + "epoch": 0.37199271492769104, + "grad_norm": 23006.939453125, + "learning_rate": 8.079004592914297e-05, + "loss": 0.4426, + "step": 72100 + }, + { + "epoch": 0.37225068491030383, + "grad_norm": 21854.025390625, + "learning_rate": 8.076125554586859e-05, + "loss": 0.4453, + "step": 72150 + }, + { + "epoch": 0.3725086548929167, + "grad_norm": 19155.400390625, + "learning_rate": 8.073244874283166e-05, + "loss": 0.4539, + "step": 72200 + }, + { + "epoch": 0.3727666248755295, + "grad_norm": 22085.5625, + "learning_rate": 8.070362553540869e-05, + "loss": 0.4474, + "step": 72250 + }, + { + "epoch": 0.3730245948581423, + "grad_norm": 21225.626953125, + "learning_rate": 8.067478593898495e-05, + "loss": 0.4431, + "step": 72300 + }, + { + "epoch": 0.3732825648407551, + "grad_norm": 21605.546875, + "learning_rate": 8.064592996895446e-05, + "loss": 0.4534, + "step": 72350 + }, + { + "epoch": 0.37354053482336796, + "grad_norm": 20774.87109375, + "learning_rate": 8.061705764071999e-05, + "loss": 0.4462, + "step": 72400 + }, + { + "epoch": 0.37379850480598076, + "grad_norm": 21871.390625, + "learning_rate": 8.0588168969693e-05, + "loss": 0.4445, + "step": 72450 + }, + { + "epoch": 0.3740564747885936, + "grad_norm": 22102.560546875, + "learning_rate": 8.05592639712937e-05, + "loss": 0.4478, + "step": 72500 + }, + { + "epoch": 0.3743144447712064, + "grad_norm": 21172.283203125, + "learning_rate": 8.053034266095105e-05, + "loss": 0.4469, + "step": 72550 + }, + { + "epoch": 0.37457241475381925, + "grad_norm": 21827.390625, + "learning_rate": 8.050140505410268e-05, + "loss": 0.4485, + "step": 72600 + }, + { + "epoch": 0.37483038473643204, + "grad_norm": 21271.87890625, + "learning_rate": 8.047245116619492e-05, + "loss": 0.45, + "step": 72650 + }, + { + "epoch": 0.3750883547190449, + "grad_norm": 21192.6484375, + "learning_rate": 8.04434810126828e-05, + "loss": 0.442, + "step": 72700 + }, + { + "epoch": 0.37534632470165774, + "grad_norm": 21529.736328125, + "learning_rate": 8.041449460903001e-05, + "loss": 0.4462, + "step": 72750 + }, + { + "epoch": 0.37560429468427053, + "grad_norm": 18609.474609375, + "learning_rate": 8.038549197070893e-05, + "loss": 0.4436, + "step": 72800 + }, + { + "epoch": 0.3758622646668834, + "grad_norm": 21631.82421875, + "learning_rate": 8.035647311320062e-05, + "loss": 0.4507, + "step": 72850 + }, + { + "epoch": 0.3761202346494962, + "grad_norm": 22347.056640625, + "learning_rate": 8.03274380519948e-05, + "loss": 0.4472, + "step": 72900 + }, + { + "epoch": 0.376378204632109, + "grad_norm": 20416.37109375, + "learning_rate": 8.029838680258979e-05, + "loss": 0.4475, + "step": 72950 + }, + { + "epoch": 0.3766361746147218, + "grad_norm": 21952.27734375, + "learning_rate": 8.026931938049259e-05, + "loss": 0.4449, + "step": 73000 + }, + { + "epoch": 0.37689414459733467, + "grad_norm": 23068.12109375, + "learning_rate": 8.024023580121885e-05, + "loss": 0.4477, + "step": 73050 + }, + { + "epoch": 0.37715211457994746, + "grad_norm": 21956.462890625, + "learning_rate": 8.021113608029281e-05, + "loss": 0.4459, + "step": 73100 + }, + { + "epoch": 0.3774100845625603, + "grad_norm": 20933.28125, + "learning_rate": 8.018202023324733e-05, + "loss": 0.4481, + "step": 73150 + }, + { + "epoch": 0.3776680545451731, + "grad_norm": 23138.638671875, + "learning_rate": 8.015288827562389e-05, + "loss": 0.437, + "step": 73200 + }, + { + "epoch": 0.37792602452778595, + "grad_norm": 20973.119140625, + "learning_rate": 8.012374022297255e-05, + "loss": 0.4454, + "step": 73250 + }, + { + "epoch": 0.37818399451039875, + "grad_norm": 21328.29296875, + "learning_rate": 8.0094576090852e-05, + "loss": 0.4426, + "step": 73300 + }, + { + "epoch": 0.3784419644930116, + "grad_norm": 20653.591796875, + "learning_rate": 8.006539589482949e-05, + "loss": 0.4448, + "step": 73350 + }, + { + "epoch": 0.3786999344756244, + "grad_norm": 21520.181640625, + "learning_rate": 8.003619965048083e-05, + "loss": 0.4428, + "step": 73400 + }, + { + "epoch": 0.37895790445823724, + "grad_norm": 20736.89453125, + "learning_rate": 8.000698737339041e-05, + "loss": 0.4483, + "step": 73450 + }, + { + "epoch": 0.3792158744408501, + "grad_norm": 23887.587890625, + "learning_rate": 7.997775907915118e-05, + "loss": 0.4518, + "step": 73500 + }, + { + "epoch": 0.3794738444234629, + "grad_norm": 23771.8671875, + "learning_rate": 7.994851478336465e-05, + "loss": 0.4479, + "step": 73550 + }, + { + "epoch": 0.37973181440607573, + "grad_norm": 21563.27734375, + "learning_rate": 7.991925450164084e-05, + "loss": 0.4433, + "step": 73600 + }, + { + "epoch": 0.3799897843886885, + "grad_norm": 21403.751953125, + "learning_rate": 7.988997824959832e-05, + "loss": 0.4443, + "step": 73650 + }, + { + "epoch": 0.38024775437130137, + "grad_norm": 22136.51171875, + "learning_rate": 7.986068604286421e-05, + "loss": 0.446, + "step": 73700 + }, + { + "epoch": 0.38050572435391417, + "grad_norm": 22143.857421875, + "learning_rate": 7.98313778970741e-05, + "loss": 0.4416, + "step": 73750 + }, + { + "epoch": 0.380763694336527, + "grad_norm": 22035.1171875, + "learning_rate": 7.980205382787211e-05, + "loss": 0.4413, + "step": 73800 + }, + { + "epoch": 0.3810216643191398, + "grad_norm": 21744.25390625, + "learning_rate": 7.97727138509109e-05, + "loss": 0.4463, + "step": 73850 + }, + { + "epoch": 0.38127963430175266, + "grad_norm": 21739.26171875, + "learning_rate": 7.974335798185153e-05, + "loss": 0.4415, + "step": 73900 + }, + { + "epoch": 0.38153760428436545, + "grad_norm": 20974.59765625, + "learning_rate": 7.971398623636361e-05, + "loss": 0.4457, + "step": 73950 + }, + { + "epoch": 0.3817955742669783, + "grad_norm": 19807.79296875, + "learning_rate": 7.968459863012523e-05, + "loss": 0.4423, + "step": 74000 + }, + { + "epoch": 0.3820535442495911, + "grad_norm": 21711.158203125, + "learning_rate": 7.96551951788229e-05, + "loss": 0.4466, + "step": 74050 + }, + { + "epoch": 0.38231151423220394, + "grad_norm": 19187.47265625, + "learning_rate": 7.962577589815163e-05, + "loss": 0.4387, + "step": 74100 + }, + { + "epoch": 0.3825694842148168, + "grad_norm": 19402.611328125, + "learning_rate": 7.959634080381486e-05, + "loss": 0.444, + "step": 74150 + }, + { + "epoch": 0.3828274541974296, + "grad_norm": 21287.9765625, + "learning_rate": 7.956688991152445e-05, + "loss": 0.4386, + "step": 74200 + }, + { + "epoch": 0.38308542418004243, + "grad_norm": 20430.591796875, + "learning_rate": 7.953742323700075e-05, + "loss": 0.4453, + "step": 74250 + }, + { + "epoch": 0.3833433941626552, + "grad_norm": 23246.041015625, + "learning_rate": 7.950794079597248e-05, + "loss": 0.4448, + "step": 74300 + }, + { + "epoch": 0.3836013641452681, + "grad_norm": 23098.74609375, + "learning_rate": 7.94784426041768e-05, + "loss": 0.4449, + "step": 74350 + }, + { + "epoch": 0.38385933412788087, + "grad_norm": 21504.71484375, + "learning_rate": 7.944892867735929e-05, + "loss": 0.4423, + "step": 74400 + }, + { + "epoch": 0.3841173041104937, + "grad_norm": 20115.0859375, + "learning_rate": 7.941939903127386e-05, + "loss": 0.4462, + "step": 74450 + }, + { + "epoch": 0.3843752740931065, + "grad_norm": 20473.681640625, + "learning_rate": 7.938985368168293e-05, + "loss": 0.4541, + "step": 74500 + }, + { + "epoch": 0.38463324407571936, + "grad_norm": 19664.6640625, + "learning_rate": 7.93602926443572e-05, + "loss": 0.4439, + "step": 74550 + }, + { + "epoch": 0.38489121405833215, + "grad_norm": 20806.474609375, + "learning_rate": 7.933071593507579e-05, + "loss": 0.439, + "step": 74600 + }, + { + "epoch": 0.385149184040945, + "grad_norm": 20905.197265625, + "learning_rate": 7.930112356962618e-05, + "loss": 0.444, + "step": 74650 + }, + { + "epoch": 0.3854071540235578, + "grad_norm": 26333.470703125, + "learning_rate": 7.927151556380417e-05, + "loss": 0.4462, + "step": 74700 + }, + { + "epoch": 0.38566512400617065, + "grad_norm": 20478.18359375, + "learning_rate": 7.924189193341396e-05, + "loss": 0.4456, + "step": 74750 + }, + { + "epoch": 0.38592309398878344, + "grad_norm": 20605.662109375, + "learning_rate": 7.921225269426808e-05, + "loss": 0.4412, + "step": 74800 + }, + { + "epoch": 0.3861810639713963, + "grad_norm": 23029.943359375, + "learning_rate": 7.918259786218738e-05, + "loss": 0.4427, + "step": 74850 + }, + { + "epoch": 0.38643903395400914, + "grad_norm": 23275.130859375, + "learning_rate": 7.915292745300103e-05, + "loss": 0.4436, + "step": 74900 + }, + { + "epoch": 0.38669700393662193, + "grad_norm": 22123.671875, + "learning_rate": 7.91232414825465e-05, + "loss": 0.4456, + "step": 74950 + }, + { + "epoch": 0.3869549739192348, + "grad_norm": 22476.365234375, + "learning_rate": 7.909353996666961e-05, + "loss": 0.4424, + "step": 75000 + }, + { + "epoch": 0.3869549739192348, + "eval_loss": 0.43277591466903687, + "eval_runtime": 3260.4686, + "eval_samples_per_second": 951.127, + "eval_steps_per_second": 1.858, + "step": 75000 + }, + { + "epoch": 0.3872129439018476, + "grad_norm": 22150.966796875, + "learning_rate": 7.906382292122448e-05, + "loss": 0.4407, + "step": 75050 + }, + { + "epoch": 0.3874709138844604, + "grad_norm": 20100.5625, + "learning_rate": 7.903409036207343e-05, + "loss": 0.4443, + "step": 75100 + }, + { + "epoch": 0.3877288838670732, + "grad_norm": 22078.353515625, + "learning_rate": 7.900434230508715e-05, + "loss": 0.4468, + "step": 75150 + }, + { + "epoch": 0.38798685384968606, + "grad_norm": 20395.498046875, + "learning_rate": 7.897457876614461e-05, + "loss": 0.4424, + "step": 75200 + }, + { + "epoch": 0.38824482383229886, + "grad_norm": 23190.4140625, + "learning_rate": 7.894479976113298e-05, + "loss": 0.4394, + "step": 75250 + }, + { + "epoch": 0.3885027938149117, + "grad_norm": 21523.7265625, + "learning_rate": 7.891500530594771e-05, + "loss": 0.4441, + "step": 75300 + }, + { + "epoch": 0.3887607637975245, + "grad_norm": 22941.23828125, + "learning_rate": 7.888519541649253e-05, + "loss": 0.443, + "step": 75350 + }, + { + "epoch": 0.38901873378013735, + "grad_norm": 21467.90234375, + "learning_rate": 7.885537010867936e-05, + "loss": 0.4478, + "step": 75400 + }, + { + "epoch": 0.38927670376275014, + "grad_norm": 22635.732421875, + "learning_rate": 7.882552939842837e-05, + "loss": 0.4415, + "step": 75450 + }, + { + "epoch": 0.389534673745363, + "grad_norm": 21242.326171875, + "learning_rate": 7.879567330166797e-05, + "loss": 0.4352, + "step": 75500 + }, + { + "epoch": 0.38979264372797584, + "grad_norm": 20005.158203125, + "learning_rate": 7.876580183433475e-05, + "loss": 0.4393, + "step": 75550 + }, + { + "epoch": 0.39005061371058863, + "grad_norm": 23355.044921875, + "learning_rate": 7.873591501237351e-05, + "loss": 0.4465, + "step": 75600 + }, + { + "epoch": 0.3903085836932015, + "grad_norm": 21217.359375, + "learning_rate": 7.870601285173731e-05, + "loss": 0.4437, + "step": 75650 + }, + { + "epoch": 0.3905665536758143, + "grad_norm": 22424.580078125, + "learning_rate": 7.867609536838729e-05, + "loss": 0.4397, + "step": 75700 + }, + { + "epoch": 0.3908245236584271, + "grad_norm": 20943.65234375, + "learning_rate": 7.864616257829285e-05, + "loss": 0.4427, + "step": 75750 + }, + { + "epoch": 0.3910824936410399, + "grad_norm": 23246.5625, + "learning_rate": 7.861621449743152e-05, + "loss": 0.4479, + "step": 75800 + }, + { + "epoch": 0.39134046362365277, + "grad_norm": 21575.830078125, + "learning_rate": 7.858625114178902e-05, + "loss": 0.4384, + "step": 75850 + }, + { + "epoch": 0.39159843360626556, + "grad_norm": 22053.5546875, + "learning_rate": 7.855627252735918e-05, + "loss": 0.4364, + "step": 75900 + }, + { + "epoch": 0.3918564035888784, + "grad_norm": 21934.55078125, + "learning_rate": 7.852627867014406e-05, + "loss": 0.4466, + "step": 75950 + }, + { + "epoch": 0.3921143735714912, + "grad_norm": 20184.078125, + "learning_rate": 7.849626958615374e-05, + "loss": 0.4422, + "step": 76000 + }, + { + "epoch": 0.39237234355410405, + "grad_norm": 21770.923828125, + "learning_rate": 7.846624529140652e-05, + "loss": 0.4382, + "step": 76050 + }, + { + "epoch": 0.39263031353671685, + "grad_norm": 21592.16796875, + "learning_rate": 7.843620580192877e-05, + "loss": 0.4404, + "step": 76100 + }, + { + "epoch": 0.3928882835193297, + "grad_norm": 19634.1875, + "learning_rate": 7.8406151133755e-05, + "loss": 0.4443, + "step": 76150 + }, + { + "epoch": 0.3931462535019425, + "grad_norm": 24045.01171875, + "learning_rate": 7.837608130292782e-05, + "loss": 0.438, + "step": 76200 + }, + { + "epoch": 0.39340422348455534, + "grad_norm": 21739.921875, + "learning_rate": 7.83459963254979e-05, + "loss": 0.4474, + "step": 76250 + }, + { + "epoch": 0.3936621934671682, + "grad_norm": 20915.56640625, + "learning_rate": 7.831589621752405e-05, + "loss": 0.4463, + "step": 76300 + }, + { + "epoch": 0.393920163449781, + "grad_norm": 18799.80078125, + "learning_rate": 7.828578099507308e-05, + "loss": 0.4401, + "step": 76350 + }, + { + "epoch": 0.39417813343239383, + "grad_norm": 19029.51171875, + "learning_rate": 7.825565067421995e-05, + "loss": 0.4428, + "step": 76400 + }, + { + "epoch": 0.3944361034150066, + "grad_norm": 22817.376953125, + "learning_rate": 7.822550527104762e-05, + "loss": 0.4467, + "step": 76450 + }, + { + "epoch": 0.39469407339761947, + "grad_norm": 19165.529296875, + "learning_rate": 7.819534480164713e-05, + "loss": 0.4365, + "step": 76500 + }, + { + "epoch": 0.39495204338023226, + "grad_norm": 22980.056640625, + "learning_rate": 7.816516928211756e-05, + "loss": 0.4386, + "step": 76550 + }, + { + "epoch": 0.3952100133628451, + "grad_norm": 21261.7109375, + "learning_rate": 7.813497872856603e-05, + "loss": 0.4358, + "step": 76600 + }, + { + "epoch": 0.3954679833454579, + "grad_norm": 21533.779296875, + "learning_rate": 7.810477315710763e-05, + "loss": 0.4444, + "step": 76650 + }, + { + "epoch": 0.39572595332807076, + "grad_norm": 20503.556640625, + "learning_rate": 7.807455258386556e-05, + "loss": 0.4446, + "step": 76700 + }, + { + "epoch": 0.39598392331068355, + "grad_norm": 21180.939453125, + "learning_rate": 7.804431702497093e-05, + "loss": 0.4486, + "step": 76750 + }, + { + "epoch": 0.3962418932932964, + "grad_norm": 24126.484375, + "learning_rate": 7.801406649656294e-05, + "loss": 0.4419, + "step": 76800 + }, + { + "epoch": 0.3964998632759092, + "grad_norm": 19791.345703125, + "learning_rate": 7.79838010147887e-05, + "loss": 0.4499, + "step": 76850 + }, + { + "epoch": 0.39675783325852204, + "grad_norm": 21118.822265625, + "learning_rate": 7.795352059580334e-05, + "loss": 0.4403, + "step": 76900 + }, + { + "epoch": 0.39701580324113483, + "grad_norm": 20787.6015625, + "learning_rate": 7.792322525577e-05, + "loss": 0.4394, + "step": 76950 + }, + { + "epoch": 0.3972737732237477, + "grad_norm": 21575.86328125, + "learning_rate": 7.789291501085972e-05, + "loss": 0.4482, + "step": 77000 + }, + { + "epoch": 0.39753174320636053, + "grad_norm": 21271.287109375, + "learning_rate": 7.78625898772515e-05, + "loss": 0.4413, + "step": 77050 + }, + { + "epoch": 0.3977897131889733, + "grad_norm": 21294.7890625, + "learning_rate": 7.783224987113235e-05, + "loss": 0.4393, + "step": 77100 + }, + { + "epoch": 0.3980476831715862, + "grad_norm": 21880.341796875, + "learning_rate": 7.780189500869716e-05, + "loss": 0.4464, + "step": 77150 + }, + { + "epoch": 0.39830565315419897, + "grad_norm": 22501.482421875, + "learning_rate": 7.777152530614876e-05, + "loss": 0.4384, + "step": 77200 + }, + { + "epoch": 0.3985636231368118, + "grad_norm": 20404.89453125, + "learning_rate": 7.774114077969792e-05, + "loss": 0.4355, + "step": 77250 + }, + { + "epoch": 0.3988215931194246, + "grad_norm": 21435.66015625, + "learning_rate": 7.77107414455633e-05, + "loss": 0.4468, + "step": 77300 + }, + { + "epoch": 0.39907956310203746, + "grad_norm": 20239.091796875, + "learning_rate": 7.768032731997148e-05, + "loss": 0.4453, + "step": 77350 + }, + { + "epoch": 0.39933753308465025, + "grad_norm": 19040.37109375, + "learning_rate": 7.764989841915694e-05, + "loss": 0.4487, + "step": 77400 + }, + { + "epoch": 0.3995955030672631, + "grad_norm": 22501.13671875, + "learning_rate": 7.761945475936203e-05, + "loss": 0.4488, + "step": 77450 + }, + { + "epoch": 0.3998534730498759, + "grad_norm": 20773.27734375, + "learning_rate": 7.7588996356837e-05, + "loss": 0.4384, + "step": 77500 + }, + { + "epoch": 0.40011144303248874, + "grad_norm": 22598.4140625, + "learning_rate": 7.755852322783994e-05, + "loss": 0.4358, + "step": 77550 + }, + { + "epoch": 0.40036941301510154, + "grad_norm": 20656.033203125, + "learning_rate": 7.752803538863683e-05, + "loss": 0.4434, + "step": 77600 + }, + { + "epoch": 0.4006273829977144, + "grad_norm": 20882.3125, + "learning_rate": 7.749753285550146e-05, + "loss": 0.4408, + "step": 77650 + }, + { + "epoch": 0.40088535298032724, + "grad_norm": 19519.408203125, + "learning_rate": 7.746701564471553e-05, + "loss": 0.439, + "step": 77700 + }, + { + "epoch": 0.40114332296294003, + "grad_norm": 21141.80859375, + "learning_rate": 7.74364837725685e-05, + "loss": 0.4422, + "step": 77750 + }, + { + "epoch": 0.4014012929455529, + "grad_norm": 21487.45703125, + "learning_rate": 7.74059372553577e-05, + "loss": 0.429, + "step": 77800 + }, + { + "epoch": 0.4016592629281657, + "grad_norm": 19889.447265625, + "learning_rate": 7.737537610938829e-05, + "loss": 0.4474, + "step": 77850 + }, + { + "epoch": 0.4019172329107785, + "grad_norm": 21914.947265625, + "learning_rate": 7.73448003509732e-05, + "loss": 0.4403, + "step": 77900 + }, + { + "epoch": 0.4021752028933913, + "grad_norm": 24025.521484375, + "learning_rate": 7.731420999643319e-05, + "loss": 0.4432, + "step": 77950 + }, + { + "epoch": 0.40243317287600416, + "grad_norm": 19703.50390625, + "learning_rate": 7.728360506209679e-05, + "loss": 0.443, + "step": 78000 + }, + { + "epoch": 0.40269114285861696, + "grad_norm": 21566.37890625, + "learning_rate": 7.725298556430034e-05, + "loss": 0.448, + "step": 78050 + }, + { + "epoch": 0.4029491128412298, + "grad_norm": 21902.564453125, + "learning_rate": 7.72223515193879e-05, + "loss": 0.438, + "step": 78100 + }, + { + "epoch": 0.4032070828238426, + "grad_norm": 20892.7578125, + "learning_rate": 7.719170294371136e-05, + "loss": 0.4382, + "step": 78150 + }, + { + "epoch": 0.40346505280645545, + "grad_norm": 21648.673828125, + "learning_rate": 7.716103985363033e-05, + "loss": 0.4378, + "step": 78200 + }, + { + "epoch": 0.40372302278906824, + "grad_norm": 23124.40625, + "learning_rate": 7.713036226551215e-05, + "loss": 0.442, + "step": 78250 + }, + { + "epoch": 0.4039809927716811, + "grad_norm": 25006.751953125, + "learning_rate": 7.709967019573195e-05, + "loss": 0.4397, + "step": 78300 + }, + { + "epoch": 0.4042389627542939, + "grad_norm": 20722.802734375, + "learning_rate": 7.706896366067256e-05, + "loss": 0.4388, + "step": 78350 + }, + { + "epoch": 0.40449693273690673, + "grad_norm": 20202.013671875, + "learning_rate": 7.703824267672452e-05, + "loss": 0.4404, + "step": 78400 + }, + { + "epoch": 0.4047549027195196, + "grad_norm": 21261.9375, + "learning_rate": 7.700750726028609e-05, + "loss": 0.4369, + "step": 78450 + }, + { + "epoch": 0.4050128727021324, + "grad_norm": 25343.57421875, + "learning_rate": 7.69767574277633e-05, + "loss": 0.4444, + "step": 78500 + }, + { + "epoch": 0.4052708426847452, + "grad_norm": 20222.767578125, + "learning_rate": 7.694599319556972e-05, + "loss": 0.4425, + "step": 78550 + }, + { + "epoch": 0.405528812667358, + "grad_norm": 22934.466796875, + "learning_rate": 7.691521458012678e-05, + "loss": 0.4411, + "step": 78600 + }, + { + "epoch": 0.40578678264997087, + "grad_norm": 22235.30078125, + "learning_rate": 7.688442159786346e-05, + "loss": 0.4445, + "step": 78650 + }, + { + "epoch": 0.40604475263258366, + "grad_norm": 21313.986328125, + "learning_rate": 7.68536142652165e-05, + "loss": 0.4341, + "step": 78700 + }, + { + "epoch": 0.4063027226151965, + "grad_norm": 20130.53515625, + "learning_rate": 7.68227925986302e-05, + "loss": 0.4395, + "step": 78750 + }, + { + "epoch": 0.4065606925978093, + "grad_norm": 19342.740234375, + "learning_rate": 7.679195661455664e-05, + "loss": 0.4424, + "step": 78800 + }, + { + "epoch": 0.40681866258042215, + "grad_norm": 21876.705078125, + "learning_rate": 7.676110632945543e-05, + "loss": 0.4415, + "step": 78850 + }, + { + "epoch": 0.40707663256303495, + "grad_norm": 23199.501953125, + "learning_rate": 7.673024175979384e-05, + "loss": 0.4423, + "step": 78900 + }, + { + "epoch": 0.4073346025456478, + "grad_norm": 22781.091796875, + "learning_rate": 7.669936292204683e-05, + "loss": 0.4398, + "step": 78950 + }, + { + "epoch": 0.4075925725282606, + "grad_norm": 24025.9375, + "learning_rate": 7.666846983269688e-05, + "loss": 0.4326, + "step": 79000 + }, + { + "epoch": 0.40785054251087344, + "grad_norm": 20797.056640625, + "learning_rate": 7.663756250823413e-05, + "loss": 0.4388, + "step": 79050 + }, + { + "epoch": 0.40810851249348623, + "grad_norm": 25106.67578125, + "learning_rate": 7.660664096515632e-05, + "loss": 0.4385, + "step": 79100 + }, + { + "epoch": 0.4083664824760991, + "grad_norm": 22217.36328125, + "learning_rate": 7.657570521996877e-05, + "loss": 0.4455, + "step": 79150 + }, + { + "epoch": 0.40862445245871193, + "grad_norm": 21679.291015625, + "learning_rate": 7.654475528918439e-05, + "loss": 0.4409, + "step": 79200 + }, + { + "epoch": 0.4088824224413247, + "grad_norm": 20133.583984375, + "learning_rate": 7.651379118932364e-05, + "loss": 0.4391, + "step": 79250 + }, + { + "epoch": 0.40914039242393757, + "grad_norm": 23019.171875, + "learning_rate": 7.648281293691457e-05, + "loss": 0.446, + "step": 79300 + }, + { + "epoch": 0.40939836240655036, + "grad_norm": 24098.38671875, + "learning_rate": 7.645182054849276e-05, + "loss": 0.4417, + "step": 79350 + }, + { + "epoch": 0.4096563323891632, + "grad_norm": 23057.240234375, + "learning_rate": 7.642081404060136e-05, + "loss": 0.4424, + "step": 79400 + }, + { + "epoch": 0.409914302371776, + "grad_norm": 20033.328125, + "learning_rate": 7.638979342979103e-05, + "loss": 0.4386, + "step": 79450 + }, + { + "epoch": 0.41017227235438886, + "grad_norm": 20978.68359375, + "learning_rate": 7.635875873261995e-05, + "loss": 0.4363, + "step": 79500 + }, + { + "epoch": 0.41043024233700165, + "grad_norm": 21347.068359375, + "learning_rate": 7.63277099656539e-05, + "loss": 0.4431, + "step": 79550 + }, + { + "epoch": 0.4106882123196145, + "grad_norm": 22031.8125, + "learning_rate": 7.629664714546604e-05, + "loss": 0.4313, + "step": 79600 + }, + { + "epoch": 0.4109461823022273, + "grad_norm": 23963.99609375, + "learning_rate": 7.626557028863717e-05, + "loss": 0.4363, + "step": 79650 + }, + { + "epoch": 0.41120415228484014, + "grad_norm": 20183.259765625, + "learning_rate": 7.623447941175548e-05, + "loss": 0.4419, + "step": 79700 + }, + { + "epoch": 0.41146212226745293, + "grad_norm": 23588.68359375, + "learning_rate": 7.620337453141667e-05, + "loss": 0.4388, + "step": 79750 + }, + { + "epoch": 0.4117200922500658, + "grad_norm": 22210.7265625, + "learning_rate": 7.617225566422395e-05, + "loss": 0.442, + "step": 79800 + }, + { + "epoch": 0.41197806223267863, + "grad_norm": 18647.93359375, + "learning_rate": 7.614112282678794e-05, + "loss": 0.4349, + "step": 79850 + }, + { + "epoch": 0.4122360322152914, + "grad_norm": 20993.388671875, + "learning_rate": 7.610997603572675e-05, + "loss": 0.4386, + "step": 79900 + }, + { + "epoch": 0.4124940021979043, + "grad_norm": 23693.26171875, + "learning_rate": 7.607881530766596e-05, + "loss": 0.4385, + "step": 79950 + }, + { + "epoch": 0.41275197218051707, + "grad_norm": 22608.26953125, + "learning_rate": 7.604764065923852e-05, + "loss": 0.4415, + "step": 80000 + }, + { + "epoch": 0.41275197218051707, + "eval_loss": 0.4290848970413208, + "eval_runtime": 3332.9887, + "eval_samples_per_second": 930.432, + "eval_steps_per_second": 1.817, + "step": 80000 + }, + { + "epoch": 0.4130099421631299, + "grad_norm": 23348.44921875, + "learning_rate": 7.60164521070849e-05, + "loss": 0.4392, + "step": 80050 + }, + { + "epoch": 0.4132679121457427, + "grad_norm": 19942.9921875, + "learning_rate": 7.598524966785293e-05, + "loss": 0.4362, + "step": 80100 + }, + { + "epoch": 0.41352588212835556, + "grad_norm": 22776.587890625, + "learning_rate": 7.595403335819786e-05, + "loss": 0.4402, + "step": 80150 + }, + { + "epoch": 0.41378385211096835, + "grad_norm": 22519.923828125, + "learning_rate": 7.592280319478233e-05, + "loss": 0.4412, + "step": 80200 + }, + { + "epoch": 0.4140418220935812, + "grad_norm": 22480.52734375, + "learning_rate": 7.589155919427645e-05, + "loss": 0.4393, + "step": 80250 + }, + { + "epoch": 0.414299792076194, + "grad_norm": 20900.625, + "learning_rate": 7.586030137335762e-05, + "loss": 0.4344, + "step": 80300 + }, + { + "epoch": 0.41455776205880684, + "grad_norm": 21272.306640625, + "learning_rate": 7.582902974871069e-05, + "loss": 0.4385, + "step": 80350 + }, + { + "epoch": 0.41481573204141964, + "grad_norm": 21448.478515625, + "learning_rate": 7.57977443370278e-05, + "loss": 0.4395, + "step": 80400 + }, + { + "epoch": 0.4150737020240325, + "grad_norm": 21854.537109375, + "learning_rate": 7.576644515500855e-05, + "loss": 0.4411, + "step": 80450 + }, + { + "epoch": 0.4153316720066453, + "grad_norm": 21458.689453125, + "learning_rate": 7.573513221935979e-05, + "loss": 0.4429, + "step": 80500 + }, + { + "epoch": 0.41558964198925813, + "grad_norm": 21895.71875, + "learning_rate": 7.57038055467958e-05, + "loss": 0.4391, + "step": 80550 + }, + { + "epoch": 0.415847611971871, + "grad_norm": 23495.921875, + "learning_rate": 7.567246515403812e-05, + "loss": 0.4398, + "step": 80600 + }, + { + "epoch": 0.41610558195448377, + "grad_norm": 26117.8671875, + "learning_rate": 7.564111105781568e-05, + "loss": 0.4407, + "step": 80650 + }, + { + "epoch": 0.4163635519370966, + "grad_norm": 21881.818359375, + "learning_rate": 7.560974327486466e-05, + "loss": 0.4336, + "step": 80700 + }, + { + "epoch": 0.4166215219197094, + "grad_norm": 21309.1015625, + "learning_rate": 7.557836182192859e-05, + "loss": 0.4371, + "step": 80750 + }, + { + "epoch": 0.41687949190232226, + "grad_norm": 21723.498046875, + "learning_rate": 7.554696671575826e-05, + "loss": 0.4384, + "step": 80800 + }, + { + "epoch": 0.41713746188493506, + "grad_norm": 19767.9609375, + "learning_rate": 7.55155579731118e-05, + "loss": 0.4375, + "step": 80850 + }, + { + "epoch": 0.4173954318675479, + "grad_norm": 18992.958984375, + "learning_rate": 7.548413561075456e-05, + "loss": 0.4419, + "step": 80900 + }, + { + "epoch": 0.4176534018501607, + "grad_norm": 21593.255859375, + "learning_rate": 7.545269964545921e-05, + "loss": 0.4372, + "step": 80950 + }, + { + "epoch": 0.41791137183277355, + "grad_norm": 19369.3125, + "learning_rate": 7.542125009400565e-05, + "loss": 0.4402, + "step": 81000 + }, + { + "epoch": 0.41816934181538634, + "grad_norm": 20552.06640625, + "learning_rate": 7.538978697318105e-05, + "loss": 0.4418, + "step": 81050 + }, + { + "epoch": 0.4184273117979992, + "grad_norm": 21554.94140625, + "learning_rate": 7.53583102997798e-05, + "loss": 0.4406, + "step": 81100 + }, + { + "epoch": 0.418685281780612, + "grad_norm": 21098.296875, + "learning_rate": 7.532682009060356e-05, + "loss": 0.443, + "step": 81150 + }, + { + "epoch": 0.41894325176322483, + "grad_norm": 24148.71484375, + "learning_rate": 7.529531636246116e-05, + "loss": 0.4345, + "step": 81200 + }, + { + "epoch": 0.4192012217458376, + "grad_norm": 20404.298828125, + "learning_rate": 7.526379913216872e-05, + "loss": 0.4335, + "step": 81250 + }, + { + "epoch": 0.4194591917284505, + "grad_norm": 22061.607421875, + "learning_rate": 7.52322684165495e-05, + "loss": 0.4385, + "step": 81300 + }, + { + "epoch": 0.4197171617110633, + "grad_norm": 18455.380859375, + "learning_rate": 7.520072423243398e-05, + "loss": 0.4337, + "step": 81350 + }, + { + "epoch": 0.4199751316936761, + "grad_norm": 23344.2734375, + "learning_rate": 7.516916659665987e-05, + "loss": 0.4401, + "step": 81400 + }, + { + "epoch": 0.42023310167628897, + "grad_norm": 20872.77734375, + "learning_rate": 7.5137595526072e-05, + "loss": 0.4394, + "step": 81450 + }, + { + "epoch": 0.42049107165890176, + "grad_norm": 21003.841796875, + "learning_rate": 7.51060110375224e-05, + "loss": 0.4402, + "step": 81500 + }, + { + "epoch": 0.4207490416415146, + "grad_norm": 22772.330078125, + "learning_rate": 7.507441314787025e-05, + "loss": 0.4438, + "step": 81550 + }, + { + "epoch": 0.4210070116241274, + "grad_norm": 19593.216796875, + "learning_rate": 7.504280187398189e-05, + "loss": 0.4375, + "step": 81600 + }, + { + "epoch": 0.42126498160674025, + "grad_norm": 20914.66796875, + "learning_rate": 7.501117723273084e-05, + "loss": 0.4397, + "step": 81650 + }, + { + "epoch": 0.42152295158935305, + "grad_norm": 20479.12109375, + "learning_rate": 7.497953924099768e-05, + "loss": 0.4365, + "step": 81700 + }, + { + "epoch": 0.4217809215719659, + "grad_norm": 20309.25, + "learning_rate": 7.494788791567017e-05, + "loss": 0.4461, + "step": 81750 + }, + { + "epoch": 0.4220388915545787, + "grad_norm": 21467.72265625, + "learning_rate": 7.491622327364318e-05, + "loss": 0.4354, + "step": 81800 + }, + { + "epoch": 0.42229686153719154, + "grad_norm": 20826.80859375, + "learning_rate": 7.488454533181871e-05, + "loss": 0.4398, + "step": 81850 + }, + { + "epoch": 0.42255483151980433, + "grad_norm": 20537.826171875, + "learning_rate": 7.485285410710577e-05, + "loss": 0.4443, + "step": 81900 + }, + { + "epoch": 0.4228128015024172, + "grad_norm": 19521.810546875, + "learning_rate": 7.482114961642057e-05, + "loss": 0.4379, + "step": 81950 + }, + { + "epoch": 0.42307077148503003, + "grad_norm": 19407.5234375, + "learning_rate": 7.478943187668633e-05, + "loss": 0.4429, + "step": 82000 + }, + { + "epoch": 0.4233287414676428, + "grad_norm": 23058.337890625, + "learning_rate": 7.475770090483338e-05, + "loss": 0.4362, + "step": 82050 + }, + { + "epoch": 0.42358671145025567, + "grad_norm": 27362.29296875, + "learning_rate": 7.472595671779907e-05, + "loss": 0.4413, + "step": 82100 + }, + { + "epoch": 0.42384468143286846, + "grad_norm": 20389.08203125, + "learning_rate": 7.469419933252789e-05, + "loss": 0.4386, + "step": 82150 + }, + { + "epoch": 0.4241026514154813, + "grad_norm": 21554.896484375, + "learning_rate": 7.466242876597125e-05, + "loss": 0.4387, + "step": 82200 + }, + { + "epoch": 0.4243606213980941, + "grad_norm": 23449.822265625, + "learning_rate": 7.463064503508772e-05, + "loss": 0.4402, + "step": 82250 + }, + { + "epoch": 0.42461859138070696, + "grad_norm": 23945.1328125, + "learning_rate": 7.459884815684279e-05, + "loss": 0.4393, + "step": 82300 + }, + { + "epoch": 0.42487656136331975, + "grad_norm": 21705.064453125, + "learning_rate": 7.456703814820904e-05, + "loss": 0.4374, + "step": 82350 + }, + { + "epoch": 0.4251345313459326, + "grad_norm": 20050.66796875, + "learning_rate": 7.453521502616607e-05, + "loss": 0.4433, + "step": 82400 + }, + { + "epoch": 0.4253925013285454, + "grad_norm": 24757.845703125, + "learning_rate": 7.45033788077004e-05, + "loss": 0.4362, + "step": 82450 + }, + { + "epoch": 0.42565047131115824, + "grad_norm": 21754.42578125, + "learning_rate": 7.44715295098056e-05, + "loss": 0.4386, + "step": 82500 + }, + { + "epoch": 0.42590844129377103, + "grad_norm": 22891.12890625, + "learning_rate": 7.443966714948222e-05, + "loss": 0.4438, + "step": 82550 + }, + { + "epoch": 0.4261664112763839, + "grad_norm": 22174.580078125, + "learning_rate": 7.440779174373776e-05, + "loss": 0.4388, + "step": 82600 + }, + { + "epoch": 0.4264243812589967, + "grad_norm": 20407.677734375, + "learning_rate": 7.43759033095867e-05, + "loss": 0.4412, + "step": 82650 + }, + { + "epoch": 0.4266823512416095, + "grad_norm": 21960.552734375, + "learning_rate": 7.434400186405045e-05, + "loss": 0.4394, + "step": 82700 + }, + { + "epoch": 0.4269403212242224, + "grad_norm": 20736.583984375, + "learning_rate": 7.431208742415741e-05, + "loss": 0.4382, + "step": 82750 + }, + { + "epoch": 0.42719829120683517, + "grad_norm": 21133.63671875, + "learning_rate": 7.428016000694286e-05, + "loss": 0.4379, + "step": 82800 + }, + { + "epoch": 0.427456261189448, + "grad_norm": 23741.525390625, + "learning_rate": 7.424821962944908e-05, + "loss": 0.4398, + "step": 82850 + }, + { + "epoch": 0.4277142311720608, + "grad_norm": 21936.802734375, + "learning_rate": 7.42162663087252e-05, + "loss": 0.4383, + "step": 82900 + }, + { + "epoch": 0.42797220115467366, + "grad_norm": 24459.85546875, + "learning_rate": 7.418430006182727e-05, + "loss": 0.4393, + "step": 82950 + }, + { + "epoch": 0.42823017113728645, + "grad_norm": 21729.9921875, + "learning_rate": 7.415232090581828e-05, + "loss": 0.4421, + "step": 83000 + }, + { + "epoch": 0.4284881411198993, + "grad_norm": 21081.5703125, + "learning_rate": 7.412032885776807e-05, + "loss": 0.4414, + "step": 83050 + }, + { + "epoch": 0.4287461111025121, + "grad_norm": 20296.740234375, + "learning_rate": 7.408832393475338e-05, + "loss": 0.4316, + "step": 83100 + }, + { + "epoch": 0.42900408108512494, + "grad_norm": 20874.30078125, + "learning_rate": 7.405630615385781e-05, + "loss": 0.433, + "step": 83150 + }, + { + "epoch": 0.42926205106773774, + "grad_norm": 20673.11328125, + "learning_rate": 7.402427553217183e-05, + "loss": 0.4386, + "step": 83200 + }, + { + "epoch": 0.4295200210503506, + "grad_norm": 22462.07421875, + "learning_rate": 7.39922320867928e-05, + "loss": 0.4464, + "step": 83250 + }, + { + "epoch": 0.4297779910329634, + "grad_norm": 20411.771484375, + "learning_rate": 7.396017583482487e-05, + "loss": 0.444, + "step": 83300 + }, + { + "epoch": 0.43003596101557623, + "grad_norm": 21137.6953125, + "learning_rate": 7.392810679337902e-05, + "loss": 0.4416, + "step": 83350 + }, + { + "epoch": 0.4302939309981891, + "grad_norm": 23059.064453125, + "learning_rate": 7.38960249795731e-05, + "loss": 0.4401, + "step": 83400 + }, + { + "epoch": 0.43055190098080187, + "grad_norm": 20305.22265625, + "learning_rate": 7.386393041053176e-05, + "loss": 0.4399, + "step": 83450 + }, + { + "epoch": 0.4308098709634147, + "grad_norm": 22247.779296875, + "learning_rate": 7.38318231033865e-05, + "loss": 0.4362, + "step": 83500 + }, + { + "epoch": 0.4310678409460275, + "grad_norm": 22231.337890625, + "learning_rate": 7.379970307527552e-05, + "loss": 0.4417, + "step": 83550 + }, + { + "epoch": 0.43132581092864036, + "grad_norm": 21788.875, + "learning_rate": 7.376757034334388e-05, + "loss": 0.4374, + "step": 83600 + }, + { + "epoch": 0.43158378091125316, + "grad_norm": 22237.51953125, + "learning_rate": 7.373542492474343e-05, + "loss": 0.4372, + "step": 83650 + }, + { + "epoch": 0.431841750893866, + "grad_norm": 21732.943359375, + "learning_rate": 7.370326683663278e-05, + "loss": 0.4395, + "step": 83700 + }, + { + "epoch": 0.4320997208764788, + "grad_norm": 19517.212890625, + "learning_rate": 7.367109609617729e-05, + "loss": 0.4371, + "step": 83750 + }, + { + "epoch": 0.43235769085909165, + "grad_norm": 23681.388671875, + "learning_rate": 7.363891272054903e-05, + "loss": 0.4383, + "step": 83800 + }, + { + "epoch": 0.43261566084170444, + "grad_norm": 23889.822265625, + "learning_rate": 7.360671672692691e-05, + "loss": 0.441, + "step": 83850 + }, + { + "epoch": 0.4328736308243173, + "grad_norm": 21159.45703125, + "learning_rate": 7.357450813249654e-05, + "loss": 0.4328, + "step": 83900 + }, + { + "epoch": 0.4331316008069301, + "grad_norm": 20617.83984375, + "learning_rate": 7.354228695445023e-05, + "loss": 0.4395, + "step": 83950 + }, + { + "epoch": 0.43338957078954293, + "grad_norm": 19741.568359375, + "learning_rate": 7.351005320998699e-05, + "loss": 0.4356, + "step": 84000 + }, + { + "epoch": 0.4336475407721557, + "grad_norm": 21407.771484375, + "learning_rate": 7.347780691631259e-05, + "loss": 0.4322, + "step": 84050 + }, + { + "epoch": 0.4339055107547686, + "grad_norm": 22396.5625, + "learning_rate": 7.344554809063947e-05, + "loss": 0.4379, + "step": 84100 + }, + { + "epoch": 0.4341634807373814, + "grad_norm": 23536.361328125, + "learning_rate": 7.34132767501868e-05, + "loss": 0.4372, + "step": 84150 + }, + { + "epoch": 0.4344214507199942, + "grad_norm": 23622.90234375, + "learning_rate": 7.338099291218036e-05, + "loss": 0.4361, + "step": 84200 + }, + { + "epoch": 0.43467942070260707, + "grad_norm": 24463.931640625, + "learning_rate": 7.334869659385264e-05, + "loss": 0.4478, + "step": 84250 + }, + { + "epoch": 0.43493739068521986, + "grad_norm": 21666.328125, + "learning_rate": 7.331638781244283e-05, + "loss": 0.4387, + "step": 84300 + }, + { + "epoch": 0.4351953606678327, + "grad_norm": 21145.6875, + "learning_rate": 7.328406658519669e-05, + "loss": 0.4362, + "step": 84350 + }, + { + "epoch": 0.4354533306504455, + "grad_norm": 21766.228515625, + "learning_rate": 7.325173292936667e-05, + "loss": 0.4433, + "step": 84400 + }, + { + "epoch": 0.43571130063305835, + "grad_norm": 23118.056640625, + "learning_rate": 7.321938686221185e-05, + "loss": 0.4317, + "step": 84450 + }, + { + "epoch": 0.43596927061567115, + "grad_norm": 20925.833984375, + "learning_rate": 7.318702840099793e-05, + "loss": 0.4348, + "step": 84500 + }, + { + "epoch": 0.436227240598284, + "grad_norm": 21725.630859375, + "learning_rate": 7.315465756299727e-05, + "loss": 0.4363, + "step": 84550 + }, + { + "epoch": 0.4364852105808968, + "grad_norm": 20223.537109375, + "learning_rate": 7.312227436548875e-05, + "loss": 0.4363, + "step": 84600 + }, + { + "epoch": 0.43674318056350964, + "grad_norm": 22766.71484375, + "learning_rate": 7.308987882575793e-05, + "loss": 0.442, + "step": 84650 + }, + { + "epoch": 0.43700115054612243, + "grad_norm": 20453.341796875, + "learning_rate": 7.305747096109688e-05, + "loss": 0.4362, + "step": 84700 + }, + { + "epoch": 0.4372591205287353, + "grad_norm": 20761.466796875, + "learning_rate": 7.302505078880431e-05, + "loss": 0.435, + "step": 84750 + }, + { + "epoch": 0.4375170905113481, + "grad_norm": 20815.27734375, + "learning_rate": 7.299261832618551e-05, + "loss": 0.4398, + "step": 84800 + }, + { + "epoch": 0.4377750604939609, + "grad_norm": 22528.06640625, + "learning_rate": 7.296017359055224e-05, + "loss": 0.44, + "step": 84850 + }, + { + "epoch": 0.43803303047657377, + "grad_norm": 21391.71484375, + "learning_rate": 7.292771659922293e-05, + "loss": 0.4376, + "step": 84900 + }, + { + "epoch": 0.43829100045918656, + "grad_norm": 21485.966796875, + "learning_rate": 7.289524736952245e-05, + "loss": 0.4424, + "step": 84950 + }, + { + "epoch": 0.4385489704417994, + "grad_norm": 21160.314453125, + "learning_rate": 7.286276591878228e-05, + "loss": 0.4473, + "step": 85000 + }, + { + "epoch": 0.4385489704417994, + "eval_loss": 0.4252757728099823, + "eval_runtime": 3252.991, + "eval_samples_per_second": 953.313, + "eval_steps_per_second": 1.862, + "step": 85000 + }, + { + "epoch": 0.4388069404244122, + "grad_norm": 29667.109375, + "learning_rate": 7.283027226434036e-05, + "loss": 0.4414, + "step": 85050 + }, + { + "epoch": 0.43906491040702506, + "grad_norm": 24990.86328125, + "learning_rate": 7.27977664235412e-05, + "loss": 0.4321, + "step": 85100 + }, + { + "epoch": 0.43932288038963785, + "grad_norm": 21708.86328125, + "learning_rate": 7.276524841373576e-05, + "loss": 0.4331, + "step": 85150 + }, + { + "epoch": 0.4395808503722507, + "grad_norm": 22323.1015625, + "learning_rate": 7.273271825228157e-05, + "loss": 0.4372, + "step": 85200 + }, + { + "epoch": 0.4398388203548635, + "grad_norm": 21696.2734375, + "learning_rate": 7.270017595654255e-05, + "loss": 0.4271, + "step": 85250 + }, + { + "epoch": 0.44009679033747634, + "grad_norm": 23364.560546875, + "learning_rate": 7.266762154388917e-05, + "loss": 0.4327, + "step": 85300 + }, + { + "epoch": 0.44035476032008913, + "grad_norm": 21834.607421875, + "learning_rate": 7.263505503169834e-05, + "loss": 0.4337, + "step": 85350 + }, + { + "epoch": 0.440612730302702, + "grad_norm": 18636.244140625, + "learning_rate": 7.260247643735343e-05, + "loss": 0.4393, + "step": 85400 + }, + { + "epoch": 0.4408707002853148, + "grad_norm": 20385.875, + "learning_rate": 7.256988577824427e-05, + "loss": 0.4398, + "step": 85450 + }, + { + "epoch": 0.4411286702679276, + "grad_norm": 21459.576171875, + "learning_rate": 7.253728307176713e-05, + "loss": 0.435, + "step": 85500 + }, + { + "epoch": 0.4413866402505405, + "grad_norm": 22838.716796875, + "learning_rate": 7.25046683353247e-05, + "loss": 0.4368, + "step": 85550 + }, + { + "epoch": 0.44164461023315327, + "grad_norm": 23016.4140625, + "learning_rate": 7.247204158632608e-05, + "loss": 0.4353, + "step": 85600 + }, + { + "epoch": 0.4419025802157661, + "grad_norm": 22318.193359375, + "learning_rate": 7.243940284218682e-05, + "loss": 0.4374, + "step": 85650 + }, + { + "epoch": 0.4421605501983789, + "grad_norm": 20475.376953125, + "learning_rate": 7.240675212032884e-05, + "loss": 0.4339, + "step": 85700 + }, + { + "epoch": 0.44241852018099176, + "grad_norm": 22276.287109375, + "learning_rate": 7.237408943818042e-05, + "loss": 0.4275, + "step": 85750 + }, + { + "epoch": 0.44267649016360455, + "grad_norm": 22131.654296875, + "learning_rate": 7.234141481317634e-05, + "loss": 0.4373, + "step": 85800 + }, + { + "epoch": 0.4429344601462174, + "grad_norm": 24779.14453125, + "learning_rate": 7.230872826275765e-05, + "loss": 0.4347, + "step": 85850 + }, + { + "epoch": 0.4431924301288302, + "grad_norm": 22474.443359375, + "learning_rate": 7.227602980437179e-05, + "loss": 0.4341, + "step": 85900 + }, + { + "epoch": 0.44345040011144304, + "grad_norm": 21620.056640625, + "learning_rate": 7.224331945547258e-05, + "loss": 0.4399, + "step": 85950 + }, + { + "epoch": 0.44370837009405584, + "grad_norm": 21546.8046875, + "learning_rate": 7.221059723352014e-05, + "loss": 0.4437, + "step": 86000 + }, + { + "epoch": 0.4439663400766687, + "grad_norm": 22283.0078125, + "learning_rate": 7.2177863155981e-05, + "loss": 0.4403, + "step": 86050 + }, + { + "epoch": 0.4442243100592815, + "grad_norm": 21332.576171875, + "learning_rate": 7.214511724032795e-05, + "loss": 0.4369, + "step": 86100 + }, + { + "epoch": 0.44448228004189433, + "grad_norm": 23106.01953125, + "learning_rate": 7.211235950404013e-05, + "loss": 0.4369, + "step": 86150 + }, + { + "epoch": 0.4447402500245071, + "grad_norm": 21826.2734375, + "learning_rate": 7.207958996460298e-05, + "loss": 0.4407, + "step": 86200 + }, + { + "epoch": 0.44499822000711997, + "grad_norm": 22308.90625, + "learning_rate": 7.204680863950825e-05, + "loss": 0.4349, + "step": 86250 + }, + { + "epoch": 0.4452561899897328, + "grad_norm": 24916.359375, + "learning_rate": 7.2014015546254e-05, + "loss": 0.436, + "step": 86300 + }, + { + "epoch": 0.4455141599723456, + "grad_norm": 22585.77734375, + "learning_rate": 7.198121070234453e-05, + "loss": 0.4311, + "step": 86350 + }, + { + "epoch": 0.44577212995495846, + "grad_norm": 22984.658203125, + "learning_rate": 7.194839412529042e-05, + "loss": 0.4324, + "step": 86400 + }, + { + "epoch": 0.44603009993757126, + "grad_norm": 22495.552734375, + "learning_rate": 7.191556583260853e-05, + "loss": 0.4306, + "step": 86450 + }, + { + "epoch": 0.4462880699201841, + "grad_norm": 21413.2578125, + "learning_rate": 7.188272584182196e-05, + "loss": 0.4404, + "step": 86500 + }, + { + "epoch": 0.4465460399027969, + "grad_norm": 23719.43359375, + "learning_rate": 7.184987417046007e-05, + "loss": 0.4321, + "step": 86550 + }, + { + "epoch": 0.44680400988540975, + "grad_norm": 22586.095703125, + "learning_rate": 7.181701083605846e-05, + "loss": 0.4349, + "step": 86600 + }, + { + "epoch": 0.44706197986802254, + "grad_norm": 20580.166015625, + "learning_rate": 7.178413585615891e-05, + "loss": 0.4323, + "step": 86650 + }, + { + "epoch": 0.4473199498506354, + "grad_norm": 21345.71875, + "learning_rate": 7.175124924830948e-05, + "loss": 0.4326, + "step": 86700 + }, + { + "epoch": 0.4475779198332482, + "grad_norm": 20615.333984375, + "learning_rate": 7.171835103006438e-05, + "loss": 0.4425, + "step": 86750 + }, + { + "epoch": 0.44783588981586103, + "grad_norm": 25518.546875, + "learning_rate": 7.168544121898407e-05, + "loss": 0.4307, + "step": 86800 + }, + { + "epoch": 0.4480938597984738, + "grad_norm": 23149.703125, + "learning_rate": 7.165251983263512e-05, + "loss": 0.4336, + "step": 86850 + }, + { + "epoch": 0.4483518297810867, + "grad_norm": 22026.19140625, + "learning_rate": 7.16195868885904e-05, + "loss": 0.4401, + "step": 86900 + }, + { + "epoch": 0.44860979976369947, + "grad_norm": 21140.90234375, + "learning_rate": 7.158664240442881e-05, + "loss": 0.436, + "step": 86950 + }, + { + "epoch": 0.4488677697463123, + "grad_norm": 25489.1796875, + "learning_rate": 7.155368639773552e-05, + "loss": 0.4379, + "step": 87000 + }, + { + "epoch": 0.44912573972892517, + "grad_norm": 21035.275390625, + "learning_rate": 7.152071888610176e-05, + "loss": 0.433, + "step": 87050 + }, + { + "epoch": 0.44938370971153796, + "grad_norm": 25905.03515625, + "learning_rate": 7.148773988712503e-05, + "loss": 0.4423, + "step": 87100 + }, + { + "epoch": 0.4496416796941508, + "grad_norm": 21237.857421875, + "learning_rate": 7.14547494184088e-05, + "loss": 0.4346, + "step": 87150 + }, + { + "epoch": 0.4498996496767636, + "grad_norm": 19255.748046875, + "learning_rate": 7.14217474975628e-05, + "loss": 0.4333, + "step": 87200 + }, + { + "epoch": 0.45015761965937645, + "grad_norm": 22115.05078125, + "learning_rate": 7.138873414220277e-05, + "loss": 0.4371, + "step": 87250 + }, + { + "epoch": 0.45041558964198924, + "grad_norm": 23271.462890625, + "learning_rate": 7.135570936995064e-05, + "loss": 0.4362, + "step": 87300 + }, + { + "epoch": 0.4506735596246021, + "grad_norm": 24245.02734375, + "learning_rate": 7.132267319843438e-05, + "loss": 0.4371, + "step": 87350 + }, + { + "epoch": 0.4509315296072149, + "grad_norm": 22234.224609375, + "learning_rate": 7.128962564528805e-05, + "loss": 0.4306, + "step": 87400 + }, + { + "epoch": 0.45118949958982774, + "grad_norm": 22704.115234375, + "learning_rate": 7.12565667281518e-05, + "loss": 0.4408, + "step": 87450 + }, + { + "epoch": 0.45144746957244053, + "grad_norm": 21906.650390625, + "learning_rate": 7.122349646467183e-05, + "loss": 0.4322, + "step": 87500 + }, + { + "epoch": 0.4517054395550534, + "grad_norm": 21960.501953125, + "learning_rate": 7.119041487250045e-05, + "loss": 0.4322, + "step": 87550 + }, + { + "epoch": 0.45196340953766617, + "grad_norm": 20264.14453125, + "learning_rate": 7.11573219692959e-05, + "loss": 0.4403, + "step": 87600 + }, + { + "epoch": 0.452221379520279, + "grad_norm": 20237.078125, + "learning_rate": 7.112421777272259e-05, + "loss": 0.4421, + "step": 87650 + }, + { + "epoch": 0.45247934950289187, + "grad_norm": 22111.3203125, + "learning_rate": 7.109110230045087e-05, + "loss": 0.4386, + "step": 87700 + }, + { + "epoch": 0.45273731948550466, + "grad_norm": 20690.015625, + "learning_rate": 7.105797557015715e-05, + "loss": 0.4315, + "step": 87750 + }, + { + "epoch": 0.4529952894681175, + "grad_norm": 23273.888671875, + "learning_rate": 7.102483759952384e-05, + "loss": 0.4397, + "step": 87800 + }, + { + "epoch": 0.4532532594507303, + "grad_norm": 20268.541015625, + "learning_rate": 7.099168840623935e-05, + "loss": 0.4381, + "step": 87850 + }, + { + "epoch": 0.45351122943334315, + "grad_norm": 21591.724609375, + "learning_rate": 7.095852800799806e-05, + "loss": 0.4368, + "step": 87900 + }, + { + "epoch": 0.45376919941595595, + "grad_norm": 20683.994140625, + "learning_rate": 7.092535642250035e-05, + "loss": 0.4315, + "step": 87950 + }, + { + "epoch": 0.4540271693985688, + "grad_norm": 22910.26953125, + "learning_rate": 7.089217366745258e-05, + "loss": 0.4415, + "step": 88000 + }, + { + "epoch": 0.4542851393811816, + "grad_norm": 22321.40234375, + "learning_rate": 7.085897976056706e-05, + "loss": 0.4386, + "step": 88050 + }, + { + "epoch": 0.45454310936379444, + "grad_norm": 20730.521484375, + "learning_rate": 7.082577471956206e-05, + "loss": 0.4335, + "step": 88100 + }, + { + "epoch": 0.45480107934640723, + "grad_norm": 23302.033203125, + "learning_rate": 7.079255856216177e-05, + "loss": 0.4366, + "step": 88150 + }, + { + "epoch": 0.4550590493290201, + "grad_norm": 21125.5625, + "learning_rate": 7.075933130609636e-05, + "loss": 0.4388, + "step": 88200 + }, + { + "epoch": 0.4553170193116329, + "grad_norm": 24245.548828125, + "learning_rate": 7.072609296910187e-05, + "loss": 0.4369, + "step": 88250 + }, + { + "epoch": 0.4555749892942457, + "grad_norm": 19609.1484375, + "learning_rate": 7.06928435689203e-05, + "loss": 0.4287, + "step": 88300 + }, + { + "epoch": 0.4558329592768585, + "grad_norm": 21653.08984375, + "learning_rate": 7.065958312329953e-05, + "loss": 0.4357, + "step": 88350 + }, + { + "epoch": 0.45609092925947137, + "grad_norm": 23725.236328125, + "learning_rate": 7.062631164999331e-05, + "loss": 0.4382, + "step": 88400 + }, + { + "epoch": 0.4563488992420842, + "grad_norm": 21436.92578125, + "learning_rate": 7.059302916676137e-05, + "loss": 0.4373, + "step": 88450 + }, + { + "epoch": 0.456606869224697, + "grad_norm": 20179.189453125, + "learning_rate": 7.05597356913692e-05, + "loss": 0.4304, + "step": 88500 + }, + { + "epoch": 0.45686483920730986, + "grad_norm": 22804.22265625, + "learning_rate": 7.052643124158824e-05, + "loss": 0.4343, + "step": 88550 + }, + { + "epoch": 0.45712280918992265, + "grad_norm": 21530.931640625, + "learning_rate": 7.049311583519574e-05, + "loss": 0.4364, + "step": 88600 + }, + { + "epoch": 0.4573807791725355, + "grad_norm": 21411.646484375, + "learning_rate": 7.045978948997486e-05, + "loss": 0.436, + "step": 88650 + }, + { + "epoch": 0.4576387491551483, + "grad_norm": 20853.962890625, + "learning_rate": 7.042645222371451e-05, + "loss": 0.436, + "step": 88700 + }, + { + "epoch": 0.45789671913776114, + "grad_norm": 20940.28125, + "learning_rate": 7.039310405420952e-05, + "loss": 0.4349, + "step": 88750 + }, + { + "epoch": 0.45815468912037394, + "grad_norm": 22368.05078125, + "learning_rate": 7.035974499926045e-05, + "loss": 0.4355, + "step": 88800 + }, + { + "epoch": 0.4584126591029868, + "grad_norm": 21155.3984375, + "learning_rate": 7.032637507667377e-05, + "loss": 0.4292, + "step": 88850 + }, + { + "epoch": 0.4586706290855996, + "grad_norm": 21627.353515625, + "learning_rate": 7.029299430426164e-05, + "loss": 0.4404, + "step": 88900 + }, + { + "epoch": 0.45892859906821243, + "grad_norm": 22008.23046875, + "learning_rate": 7.025960269984212e-05, + "loss": 0.431, + "step": 88950 + }, + { + "epoch": 0.4591865690508252, + "grad_norm": 21588.109375, + "learning_rate": 7.022620028123898e-05, + "loss": 0.4319, + "step": 89000 + }, + { + "epoch": 0.45944453903343807, + "grad_norm": 21680.646484375, + "learning_rate": 7.019278706628179e-05, + "loss": 0.4403, + "step": 89050 + }, + { + "epoch": 0.4597025090160509, + "grad_norm": 25427.423828125, + "learning_rate": 7.015936307280587e-05, + "loss": 0.435, + "step": 89100 + }, + { + "epoch": 0.4599604789986637, + "grad_norm": 22674.693359375, + "learning_rate": 7.01259283186523e-05, + "loss": 0.4377, + "step": 89150 + }, + { + "epoch": 0.46021844898127656, + "grad_norm": 24841.029296875, + "learning_rate": 7.009248282166793e-05, + "loss": 0.4387, + "step": 89200 + }, + { + "epoch": 0.46047641896388936, + "grad_norm": 21259.369140625, + "learning_rate": 7.005902659970528e-05, + "loss": 0.4355, + "step": 89250 + }, + { + "epoch": 0.4607343889465022, + "grad_norm": 19364.466796875, + "learning_rate": 7.002555967062265e-05, + "loss": 0.4353, + "step": 89300 + }, + { + "epoch": 0.460992358929115, + "grad_norm": 25116.47265625, + "learning_rate": 6.999208205228405e-05, + "loss": 0.4328, + "step": 89350 + }, + { + "epoch": 0.46125032891172785, + "grad_norm": 24426.4296875, + "learning_rate": 6.995859376255918e-05, + "loss": 0.4331, + "step": 89400 + }, + { + "epoch": 0.46150829889434064, + "grad_norm": 20802.759765625, + "learning_rate": 6.99250948193234e-05, + "loss": 0.4294, + "step": 89450 + }, + { + "epoch": 0.4617662688769535, + "grad_norm": 23164.2109375, + "learning_rate": 6.989158524045787e-05, + "loss": 0.4338, + "step": 89500 + }, + { + "epoch": 0.4620242388595663, + "grad_norm": 20543.28515625, + "learning_rate": 6.98580650438493e-05, + "loss": 0.4243, + "step": 89550 + }, + { + "epoch": 0.46228220884217913, + "grad_norm": 22468.732421875, + "learning_rate": 6.982453424739016e-05, + "loss": 0.4306, + "step": 89600 + }, + { + "epoch": 0.4625401788247919, + "grad_norm": 22903.12890625, + "learning_rate": 6.979099286897849e-05, + "loss": 0.4316, + "step": 89650 + }, + { + "epoch": 0.4627981488074048, + "grad_norm": 23074.068359375, + "learning_rate": 6.975744092651808e-05, + "loss": 0.4371, + "step": 89700 + }, + { + "epoch": 0.46305611879001757, + "grad_norm": 22003.00390625, + "learning_rate": 6.972387843791827e-05, + "loss": 0.4329, + "step": 89750 + }, + { + "epoch": 0.4633140887726304, + "grad_norm": 21524.93359375, + "learning_rate": 6.969030542109407e-05, + "loss": 0.4348, + "step": 89800 + }, + { + "epoch": 0.46357205875524327, + "grad_norm": 20501.130859375, + "learning_rate": 6.965672189396614e-05, + "loss": 0.4286, + "step": 89850 + }, + { + "epoch": 0.46383002873785606, + "grad_norm": 21559.396484375, + "learning_rate": 6.962312787446068e-05, + "loss": 0.434, + "step": 89900 + }, + { + "epoch": 0.4640879987204689, + "grad_norm": 21185.537109375, + "learning_rate": 6.958952338050955e-05, + "loss": 0.4326, + "step": 89950 + }, + { + "epoch": 0.4643459687030817, + "grad_norm": 23004.626953125, + "learning_rate": 6.955590843005016e-05, + "loss": 0.4272, + "step": 90000 + }, + { + "epoch": 0.4643459687030817, + "eval_loss": 0.4223860800266266, + "eval_runtime": 3251.8949, + "eval_samples_per_second": 953.635, + "eval_steps_per_second": 1.863, + "step": 90000 + }, + { + "epoch": 0.46460393868569455, + "grad_norm": 20333.259765625, + "learning_rate": 6.952228304102553e-05, + "loss": 0.4338, + "step": 90050 + }, + { + "epoch": 0.46486190866830734, + "grad_norm": 25967.029296875, + "learning_rate": 6.948864723138423e-05, + "loss": 0.4352, + "step": 90100 + }, + { + "epoch": 0.4651198786509202, + "grad_norm": 22849.9375, + "learning_rate": 6.945500101908043e-05, + "loss": 0.4358, + "step": 90150 + }, + { + "epoch": 0.465377848633533, + "grad_norm": 20628.9453125, + "learning_rate": 6.94213444220738e-05, + "loss": 0.4343, + "step": 90200 + }, + { + "epoch": 0.46563581861614584, + "grad_norm": 22179.84375, + "learning_rate": 6.938767745832959e-05, + "loss": 0.4314, + "step": 90250 + }, + { + "epoch": 0.46589378859875863, + "grad_norm": 24433.46484375, + "learning_rate": 6.935400014581858e-05, + "loss": 0.436, + "step": 90300 + }, + { + "epoch": 0.4661517585813715, + "grad_norm": 21914.666015625, + "learning_rate": 6.932031250251705e-05, + "loss": 0.431, + "step": 90350 + }, + { + "epoch": 0.46640972856398427, + "grad_norm": 19517.78125, + "learning_rate": 6.928661454640683e-05, + "loss": 0.4282, + "step": 90400 + }, + { + "epoch": 0.4666676985465971, + "grad_norm": 25924.5234375, + "learning_rate": 6.925290629547522e-05, + "loss": 0.4344, + "step": 90450 + }, + { + "epoch": 0.4669256685292099, + "grad_norm": 20866.927734375, + "learning_rate": 6.921918776771505e-05, + "loss": 0.4336, + "step": 90500 + }, + { + "epoch": 0.46718363851182276, + "grad_norm": 22734.5625, + "learning_rate": 6.91854589811246e-05, + "loss": 0.4375, + "step": 90550 + }, + { + "epoch": 0.4674416084944356, + "grad_norm": 21173.5703125, + "learning_rate": 6.915171995370766e-05, + "loss": 0.428, + "step": 90600 + }, + { + "epoch": 0.4676995784770484, + "grad_norm": 23864.681640625, + "learning_rate": 6.911797070347346e-05, + "loss": 0.4344, + "step": 90650 + }, + { + "epoch": 0.46795754845966125, + "grad_norm": 26236.091796875, + "learning_rate": 6.908421124843669e-05, + "loss": 0.4345, + "step": 90700 + }, + { + "epoch": 0.46821551844227405, + "grad_norm": 20788.6015625, + "learning_rate": 6.905044160661748e-05, + "loss": 0.4332, + "step": 90750 + }, + { + "epoch": 0.4684734884248869, + "grad_norm": 21382.2578125, + "learning_rate": 6.901666179604148e-05, + "loss": 0.4356, + "step": 90800 + }, + { + "epoch": 0.4687314584074997, + "grad_norm": 20230.220703125, + "learning_rate": 6.898287183473961e-05, + "loss": 0.4262, + "step": 90850 + }, + { + "epoch": 0.46898942839011254, + "grad_norm": 31838.697265625, + "learning_rate": 6.894907174074836e-05, + "loss": 0.4316, + "step": 90900 + }, + { + "epoch": 0.46924739837272533, + "grad_norm": 21029.5234375, + "learning_rate": 6.891526153210953e-05, + "loss": 0.4346, + "step": 90950 + }, + { + "epoch": 0.4695053683553382, + "grad_norm": 23617.826171875, + "learning_rate": 6.888144122687035e-05, + "loss": 0.4262, + "step": 91000 + }, + { + "epoch": 0.469763338337951, + "grad_norm": 23151.751953125, + "learning_rate": 6.884761084308349e-05, + "loss": 0.4296, + "step": 91050 + }, + { + "epoch": 0.4700213083205638, + "grad_norm": 19649.466796875, + "learning_rate": 6.881377039880692e-05, + "loss": 0.4325, + "step": 91100 + }, + { + "epoch": 0.4702792783031766, + "grad_norm": 20488.10546875, + "learning_rate": 6.8779919912104e-05, + "loss": 0.4352, + "step": 91150 + }, + { + "epoch": 0.47053724828578947, + "grad_norm": 21639.306640625, + "learning_rate": 6.874605940104349e-05, + "loss": 0.4319, + "step": 91200 + }, + { + "epoch": 0.4707952182684023, + "grad_norm": 21799.994140625, + "learning_rate": 6.871218888369947e-05, + "loss": 0.4315, + "step": 91250 + }, + { + "epoch": 0.4710531882510151, + "grad_norm": 22425.94140625, + "learning_rate": 6.867830837815137e-05, + "loss": 0.4381, + "step": 91300 + }, + { + "epoch": 0.47131115823362796, + "grad_norm": 22582.57421875, + "learning_rate": 6.864441790248396e-05, + "loss": 0.4297, + "step": 91350 + }, + { + "epoch": 0.47156912821624075, + "grad_norm": 21082.38671875, + "learning_rate": 6.861051747478726e-05, + "loss": 0.4292, + "step": 91400 + }, + { + "epoch": 0.4718270981988536, + "grad_norm": 23156.5546875, + "learning_rate": 6.857660711315672e-05, + "loss": 0.4276, + "step": 91450 + }, + { + "epoch": 0.4720850681814664, + "grad_norm": 21754.6796875, + "learning_rate": 6.854268683569302e-05, + "loss": 0.4369, + "step": 91500 + }, + { + "epoch": 0.47234303816407924, + "grad_norm": 22397.896484375, + "learning_rate": 6.850875666050216e-05, + "loss": 0.4312, + "step": 91550 + }, + { + "epoch": 0.47260100814669204, + "grad_norm": 21344.166015625, + "learning_rate": 6.847481660569537e-05, + "loss": 0.4291, + "step": 91600 + }, + { + "epoch": 0.4728589781293049, + "grad_norm": 23818.71484375, + "learning_rate": 6.844086668938923e-05, + "loss": 0.4352, + "step": 91650 + }, + { + "epoch": 0.4731169481119177, + "grad_norm": 21734.537109375, + "learning_rate": 6.840690692970554e-05, + "loss": 0.4326, + "step": 91700 + }, + { + "epoch": 0.47337491809453053, + "grad_norm": 22027.734375, + "learning_rate": 6.837293734477136e-05, + "loss": 0.4369, + "step": 91750 + }, + { + "epoch": 0.4736328880771433, + "grad_norm": 23111.103515625, + "learning_rate": 6.8338957952719e-05, + "loss": 0.4396, + "step": 91800 + }, + { + "epoch": 0.47389085805975617, + "grad_norm": 22521.767578125, + "learning_rate": 6.830496877168599e-05, + "loss": 0.4376, + "step": 91850 + }, + { + "epoch": 0.47414882804236896, + "grad_norm": 19730.158203125, + "learning_rate": 6.827096981981511e-05, + "loss": 0.4321, + "step": 91900 + }, + { + "epoch": 0.4744067980249818, + "grad_norm": 21871.134765625, + "learning_rate": 6.823696111525433e-05, + "loss": 0.4373, + "step": 91950 + }, + { + "epoch": 0.47466476800759466, + "grad_norm": 22332.384765625, + "learning_rate": 6.820294267615686e-05, + "loss": 0.4323, + "step": 92000 + }, + { + "epoch": 0.47492273799020746, + "grad_norm": 22426.59765625, + "learning_rate": 6.816891452068104e-05, + "loss": 0.4272, + "step": 92050 + }, + { + "epoch": 0.4751807079728203, + "grad_norm": 23286.05859375, + "learning_rate": 6.81348766669905e-05, + "loss": 0.4442, + "step": 92100 + }, + { + "epoch": 0.4754386779554331, + "grad_norm": 21696.1171875, + "learning_rate": 6.810082913325395e-05, + "loss": 0.4288, + "step": 92150 + }, + { + "epoch": 0.47569664793804595, + "grad_norm": 20548.908203125, + "learning_rate": 6.80667719376453e-05, + "loss": 0.4358, + "step": 92200 + }, + { + "epoch": 0.47595461792065874, + "grad_norm": 22605.1640625, + "learning_rate": 6.803270509834363e-05, + "loss": 0.4327, + "step": 92250 + }, + { + "epoch": 0.4762125879032716, + "grad_norm": 23604.30078125, + "learning_rate": 6.799862863353318e-05, + "loss": 0.441, + "step": 92300 + }, + { + "epoch": 0.4764705578858844, + "grad_norm": 22117.1796875, + "learning_rate": 6.796454256140328e-05, + "loss": 0.4289, + "step": 92350 + }, + { + "epoch": 0.47672852786849723, + "grad_norm": 22476.54296875, + "learning_rate": 6.793044690014842e-05, + "loss": 0.4319, + "step": 92400 + }, + { + "epoch": 0.47698649785111, + "grad_norm": 20855.140625, + "learning_rate": 6.789634166796821e-05, + "loss": 0.4326, + "step": 92450 + }, + { + "epoch": 0.4772444678337229, + "grad_norm": 23704.125, + "learning_rate": 6.786222688306734e-05, + "loss": 0.4374, + "step": 92500 + }, + { + "epoch": 0.47750243781633567, + "grad_norm": 20677.91015625, + "learning_rate": 6.782810256365568e-05, + "loss": 0.4261, + "step": 92550 + }, + { + "epoch": 0.4777604077989485, + "grad_norm": 21245.837890625, + "learning_rate": 6.779396872794807e-05, + "loss": 0.4309, + "step": 92600 + }, + { + "epoch": 0.4780183777815613, + "grad_norm": 25415.859375, + "learning_rate": 6.775982539416453e-05, + "loss": 0.437, + "step": 92650 + }, + { + "epoch": 0.47827634776417416, + "grad_norm": 20582.556640625, + "learning_rate": 6.772567258053007e-05, + "loss": 0.4349, + "step": 92700 + }, + { + "epoch": 0.478534317746787, + "grad_norm": 20002.013671875, + "learning_rate": 6.769151030527483e-05, + "loss": 0.4263, + "step": 92750 + }, + { + "epoch": 0.4787922877293998, + "grad_norm": 23287.6875, + "learning_rate": 6.765733858663397e-05, + "loss": 0.4332, + "step": 92800 + }, + { + "epoch": 0.47905025771201265, + "grad_norm": 22023.66796875, + "learning_rate": 6.76231574428477e-05, + "loss": 0.4339, + "step": 92850 + }, + { + "epoch": 0.47930822769462544, + "grad_norm": 21299.185546875, + "learning_rate": 6.758896689216122e-05, + "loss": 0.4293, + "step": 92900 + }, + { + "epoch": 0.4795661976772383, + "grad_norm": 21979.560546875, + "learning_rate": 6.755476695282479e-05, + "loss": 0.4314, + "step": 92950 + }, + { + "epoch": 0.4798241676598511, + "grad_norm": 21399.029296875, + "learning_rate": 6.752055764309372e-05, + "loss": 0.4374, + "step": 93000 + }, + { + "epoch": 0.48008213764246394, + "grad_norm": 23827.685546875, + "learning_rate": 6.748633898122823e-05, + "loss": 0.4348, + "step": 93050 + }, + { + "epoch": 0.48034010762507673, + "grad_norm": 21079.61328125, + "learning_rate": 6.74521109854936e-05, + "loss": 0.4312, + "step": 93100 + }, + { + "epoch": 0.4805980776076896, + "grad_norm": 20395.04296875, + "learning_rate": 6.741787367416006e-05, + "loss": 0.4246, + "step": 93150 + }, + { + "epoch": 0.48085604759030237, + "grad_norm": 21922.576171875, + "learning_rate": 6.738362706550284e-05, + "loss": 0.4355, + "step": 93200 + }, + { + "epoch": 0.4811140175729152, + "grad_norm": 21317.001953125, + "learning_rate": 6.734937117780211e-05, + "loss": 0.4302, + "step": 93250 + }, + { + "epoch": 0.481371987555528, + "grad_norm": 21387.46484375, + "learning_rate": 6.731510602934298e-05, + "loss": 0.434, + "step": 93300 + }, + { + "epoch": 0.48162995753814086, + "grad_norm": 24289.28515625, + "learning_rate": 6.728083163841554e-05, + "loss": 0.4338, + "step": 93350 + }, + { + "epoch": 0.4818879275207537, + "grad_norm": 23514.162109375, + "learning_rate": 6.72465480233148e-05, + "loss": 0.4357, + "step": 93400 + }, + { + "epoch": 0.4821458975033665, + "grad_norm": 21481.0859375, + "learning_rate": 6.721225520234068e-05, + "loss": 0.4307, + "step": 93450 + }, + { + "epoch": 0.48240386748597935, + "grad_norm": 25044.396484375, + "learning_rate": 6.717795319379805e-05, + "loss": 0.4335, + "step": 93500 + }, + { + "epoch": 0.48266183746859215, + "grad_norm": 21193.333984375, + "learning_rate": 6.714364201599662e-05, + "loss": 0.4243, + "step": 93550 + }, + { + "epoch": 0.482919807451205, + "grad_norm": 19113.275390625, + "learning_rate": 6.710932168725105e-05, + "loss": 0.4331, + "step": 93600 + }, + { + "epoch": 0.4831777774338178, + "grad_norm": 21924.162109375, + "learning_rate": 6.707499222588087e-05, + "loss": 0.4309, + "step": 93650 + }, + { + "epoch": 0.48343574741643064, + "grad_norm": 21123.498046875, + "learning_rate": 6.704065365021048e-05, + "loss": 0.4392, + "step": 93700 + }, + { + "epoch": 0.48369371739904343, + "grad_norm": 22201.29296875, + "learning_rate": 6.700630597856914e-05, + "loss": 0.4281, + "step": 93750 + }, + { + "epoch": 0.4839516873816563, + "grad_norm": 24237.494140625, + "learning_rate": 6.697194922929096e-05, + "loss": 0.4367, + "step": 93800 + }, + { + "epoch": 0.4842096573642691, + "grad_norm": 21306.8125, + "learning_rate": 6.693758342071495e-05, + "loss": 0.4374, + "step": 93850 + }, + { + "epoch": 0.4844676273468819, + "grad_norm": 22120.75, + "learning_rate": 6.690320857118488e-05, + "loss": 0.4309, + "step": 93900 + }, + { + "epoch": 0.4847255973294947, + "grad_norm": 20799.59765625, + "learning_rate": 6.686882469904939e-05, + "loss": 0.4262, + "step": 93950 + }, + { + "epoch": 0.48498356731210757, + "grad_norm": 22964.642578125, + "learning_rate": 6.683443182266192e-05, + "loss": 0.4338, + "step": 94000 + }, + { + "epoch": 0.48524153729472036, + "grad_norm": 22017.076171875, + "learning_rate": 6.68000299603807e-05, + "loss": 0.4317, + "step": 94050 + }, + { + "epoch": 0.4854995072773332, + "grad_norm": 21423.890625, + "learning_rate": 6.676561913056884e-05, + "loss": 0.4329, + "step": 94100 + }, + { + "epoch": 0.48575747725994606, + "grad_norm": 22123.390625, + "learning_rate": 6.67311993515941e-05, + "loss": 0.4309, + "step": 94150 + }, + { + "epoch": 0.48601544724255885, + "grad_norm": 23107.208984375, + "learning_rate": 6.669677064182915e-05, + "loss": 0.4316, + "step": 94200 + }, + { + "epoch": 0.4862734172251717, + "grad_norm": 21250.33203125, + "learning_rate": 6.666233301965132e-05, + "loss": 0.4289, + "step": 94250 + }, + { + "epoch": 0.4865313872077845, + "grad_norm": 21629.720703125, + "learning_rate": 6.66278865034428e-05, + "loss": 0.4301, + "step": 94300 + }, + { + "epoch": 0.48678935719039734, + "grad_norm": 23665.4609375, + "learning_rate": 6.659343111159043e-05, + "loss": 0.4267, + "step": 94350 + }, + { + "epoch": 0.48704732717301014, + "grad_norm": 23254.232421875, + "learning_rate": 6.655896686248583e-05, + "loss": 0.4266, + "step": 94400 + }, + { + "epoch": 0.487305297155623, + "grad_norm": 22491.404296875, + "learning_rate": 6.652449377452539e-05, + "loss": 0.4278, + "step": 94450 + }, + { + "epoch": 0.4875632671382358, + "grad_norm": 21071.74609375, + "learning_rate": 6.649001186611015e-05, + "loss": 0.4308, + "step": 94500 + }, + { + "epoch": 0.4878212371208486, + "grad_norm": 20860.861328125, + "learning_rate": 6.64555211556459e-05, + "loss": 0.4308, + "step": 94550 + }, + { + "epoch": 0.4880792071034614, + "grad_norm": 21733.033203125, + "learning_rate": 6.642102166154308e-05, + "loss": 0.4376, + "step": 94600 + }, + { + "epoch": 0.48833717708607427, + "grad_norm": 22799.3984375, + "learning_rate": 6.638651340221687e-05, + "loss": 0.4289, + "step": 94650 + }, + { + "epoch": 0.48859514706868706, + "grad_norm": 21678.296875, + "learning_rate": 6.635199639608709e-05, + "loss": 0.4301, + "step": 94700 + }, + { + "epoch": 0.4888531170512999, + "grad_norm": 20510.052734375, + "learning_rate": 6.631747066157831e-05, + "loss": 0.4276, + "step": 94750 + }, + { + "epoch": 0.48911108703391276, + "grad_norm": 21075.474609375, + "learning_rate": 6.628293621711964e-05, + "loss": 0.435, + "step": 94800 + }, + { + "epoch": 0.48936905701652555, + "grad_norm": 22063.083984375, + "learning_rate": 6.624839308114492e-05, + "loss": 0.434, + "step": 94850 + }, + { + "epoch": 0.4896270269991384, + "grad_norm": 20185.99609375, + "learning_rate": 6.621384127209261e-05, + "loss": 0.4246, + "step": 94900 + }, + { + "epoch": 0.4898849969817512, + "grad_norm": 22002.326171875, + "learning_rate": 6.61792808084058e-05, + "loss": 0.4272, + "step": 94950 + }, + { + "epoch": 0.49014296696436405, + "grad_norm": 22271.25, + "learning_rate": 6.614471170853218e-05, + "loss": 0.4323, + "step": 95000 + }, + { + "epoch": 0.49014296696436405, + "eval_loss": 0.4187907576560974, + "eval_runtime": 3274.3922, + "eval_samples_per_second": 947.083, + "eval_steps_per_second": 1.85, + "step": 95000 + }, + { + "epoch": 0.49040093694697684, + "grad_norm": 20668.224609375, + "learning_rate": 6.611013399092406e-05, + "loss": 0.4285, + "step": 95050 + }, + { + "epoch": 0.4906589069295897, + "grad_norm": 20890.05078125, + "learning_rate": 6.607554767403838e-05, + "loss": 0.4333, + "step": 95100 + }, + { + "epoch": 0.4909168769122025, + "grad_norm": 22767.6875, + "learning_rate": 6.604095277633664e-05, + "loss": 0.4284, + "step": 95150 + }, + { + "epoch": 0.49117484689481533, + "grad_norm": 22603.083984375, + "learning_rate": 6.600634931628493e-05, + "loss": 0.4332, + "step": 95200 + }, + { + "epoch": 0.4914328168774281, + "grad_norm": 25005.8984375, + "learning_rate": 6.597173731235388e-05, + "loss": 0.4284, + "step": 95250 + }, + { + "epoch": 0.491690786860041, + "grad_norm": 23687.4765625, + "learning_rate": 6.593711678301874e-05, + "loss": 0.4316, + "step": 95300 + }, + { + "epoch": 0.49194875684265377, + "grad_norm": 19670.087890625, + "learning_rate": 6.590248774675926e-05, + "loss": 0.4326, + "step": 95350 + }, + { + "epoch": 0.4922067268252666, + "grad_norm": 23065.818359375, + "learning_rate": 6.586785022205977e-05, + "loss": 0.4316, + "step": 95400 + }, + { + "epoch": 0.4924646968078794, + "grad_norm": 21279.01953125, + "learning_rate": 6.583320422740909e-05, + "loss": 0.4278, + "step": 95450 + }, + { + "epoch": 0.49272266679049226, + "grad_norm": 19707.6328125, + "learning_rate": 6.579854978130057e-05, + "loss": 0.4272, + "step": 95500 + }, + { + "epoch": 0.4929806367731051, + "grad_norm": 22938.3515625, + "learning_rate": 6.57638869022321e-05, + "loss": 0.4316, + "step": 95550 + }, + { + "epoch": 0.4932386067557179, + "grad_norm": 24812.65625, + "learning_rate": 6.572921560870607e-05, + "loss": 0.4315, + "step": 95600 + }, + { + "epoch": 0.49349657673833075, + "grad_norm": 21462.873046875, + "learning_rate": 6.569453591922931e-05, + "loss": 0.4299, + "step": 95650 + }, + { + "epoch": 0.49375454672094354, + "grad_norm": 22590.384765625, + "learning_rate": 6.565984785231318e-05, + "loss": 0.4294, + "step": 95700 + }, + { + "epoch": 0.4940125167035564, + "grad_norm": 23677.619140625, + "learning_rate": 6.56251514264735e-05, + "loss": 0.4379, + "step": 95750 + }, + { + "epoch": 0.4942704866861692, + "grad_norm": 22078.87109375, + "learning_rate": 6.559044666023057e-05, + "loss": 0.4276, + "step": 95800 + }, + { + "epoch": 0.49452845666878203, + "grad_norm": 22440.369140625, + "learning_rate": 6.55557335721091e-05, + "loss": 0.4279, + "step": 95850 + }, + { + "epoch": 0.49478642665139483, + "grad_norm": 24544.12109375, + "learning_rate": 6.552101218063826e-05, + "loss": 0.4305, + "step": 95900 + }, + { + "epoch": 0.4950443966340077, + "grad_norm": 21647.107421875, + "learning_rate": 6.548628250435167e-05, + "loss": 0.4328, + "step": 95950 + }, + { + "epoch": 0.49530236661662047, + "grad_norm": 21392.28125, + "learning_rate": 6.545154456178735e-05, + "loss": 0.4299, + "step": 96000 + }, + { + "epoch": 0.4955603365992333, + "grad_norm": 19458.55078125, + "learning_rate": 6.541679837148775e-05, + "loss": 0.4375, + "step": 96050 + }, + { + "epoch": 0.4958183065818461, + "grad_norm": 21774.14453125, + "learning_rate": 6.53820439519997e-05, + "loss": 0.4348, + "step": 96100 + }, + { + "epoch": 0.49607627656445896, + "grad_norm": 22902.63671875, + "learning_rate": 6.534728132187444e-05, + "loss": 0.4297, + "step": 96150 + }, + { + "epoch": 0.49633424654707176, + "grad_norm": 20869.306640625, + "learning_rate": 6.531251049966762e-05, + "loss": 0.4313, + "step": 96200 + }, + { + "epoch": 0.4965922165296846, + "grad_norm": 23554.537109375, + "learning_rate": 6.527773150393919e-05, + "loss": 0.4313, + "step": 96250 + }, + { + "epoch": 0.49685018651229745, + "grad_norm": 23000.92578125, + "learning_rate": 6.524294435325351e-05, + "loss": 0.4266, + "step": 96300 + }, + { + "epoch": 0.49710815649491025, + "grad_norm": 21331.72265625, + "learning_rate": 6.52081490661793e-05, + "loss": 0.4261, + "step": 96350 + }, + { + "epoch": 0.4973661264775231, + "grad_norm": 22540.75, + "learning_rate": 6.517334566128961e-05, + "loss": 0.4282, + "step": 96400 + }, + { + "epoch": 0.4976240964601359, + "grad_norm": 21733.560546875, + "learning_rate": 6.51385341571618e-05, + "loss": 0.43, + "step": 96450 + }, + { + "epoch": 0.49788206644274874, + "grad_norm": 23288.21875, + "learning_rate": 6.510371457237765e-05, + "loss": 0.4306, + "step": 96500 + }, + { + "epoch": 0.49814003642536153, + "grad_norm": 24475.9453125, + "learning_rate": 6.506888692552309e-05, + "loss": 0.4299, + "step": 96550 + }, + { + "epoch": 0.4983980064079744, + "grad_norm": 20756.5078125, + "learning_rate": 6.503405123518847e-05, + "loss": 0.4292, + "step": 96600 + }, + { + "epoch": 0.4986559763905872, + "grad_norm": 21059.365234375, + "learning_rate": 6.499920751996845e-05, + "loss": 0.4261, + "step": 96650 + }, + { + "epoch": 0.4989139463732, + "grad_norm": 22173.65625, + "learning_rate": 6.496435579846188e-05, + "loss": 0.4309, + "step": 96700 + }, + { + "epoch": 0.4991719163558128, + "grad_norm": 23941.49609375, + "learning_rate": 6.492949608927196e-05, + "loss": 0.4355, + "step": 96750 + }, + { + "epoch": 0.49942988633842567, + "grad_norm": 22027.400390625, + "learning_rate": 6.489462841100611e-05, + "loss": 0.433, + "step": 96800 + }, + { + "epoch": 0.49968785632103846, + "grad_norm": 21414.77734375, + "learning_rate": 6.485975278227605e-05, + "loss": 0.4291, + "step": 96850 + }, + { + "epoch": 0.4999458263036513, + "grad_norm": 23023.60546875, + "learning_rate": 6.482486922169767e-05, + "loss": 0.4309, + "step": 96900 + }, + { + "epoch": 0.5002037962862641, + "grad_norm": 23856.318359375, + "learning_rate": 6.478997774789119e-05, + "loss": 0.4314, + "step": 96950 + }, + { + "epoch": 0.500461766268877, + "grad_norm": 21834.822265625, + "learning_rate": 6.475507837948096e-05, + "loss": 0.4319, + "step": 97000 + }, + { + "epoch": 0.5007197362514898, + "grad_norm": 22487.779296875, + "learning_rate": 6.472017113509561e-05, + "loss": 0.4281, + "step": 97050 + }, + { + "epoch": 0.5009777062341026, + "grad_norm": 23955.73046875, + "learning_rate": 6.468525603336796e-05, + "loss": 0.4324, + "step": 97100 + }, + { + "epoch": 0.5012356762167154, + "grad_norm": 23631.203125, + "learning_rate": 6.4650333092935e-05, + "loss": 0.4333, + "step": 97150 + }, + { + "epoch": 0.5014936461993282, + "grad_norm": 21347.26953125, + "learning_rate": 6.461540233243792e-05, + "loss": 0.421, + "step": 97200 + }, + { + "epoch": 0.5017516161819411, + "grad_norm": 23590.9140625, + "learning_rate": 6.458046377052209e-05, + "loss": 0.4347, + "step": 97250 + }, + { + "epoch": 0.5020095861645539, + "grad_norm": 23192.708984375, + "learning_rate": 6.454551742583703e-05, + "loss": 0.4363, + "step": 97300 + }, + { + "epoch": 0.5022675561471667, + "grad_norm": 23588.974609375, + "learning_rate": 6.451056331703643e-05, + "loss": 0.4268, + "step": 97350 + }, + { + "epoch": 0.5025255261297795, + "grad_norm": 19536.3046875, + "learning_rate": 6.44756014627781e-05, + "loss": 0.4268, + "step": 97400 + }, + { + "epoch": 0.5027834961123924, + "grad_norm": 20248.345703125, + "learning_rate": 6.444063188172401e-05, + "loss": 0.4286, + "step": 97450 + }, + { + "epoch": 0.5030414660950052, + "grad_norm": 21598.1171875, + "learning_rate": 6.440565459254027e-05, + "loss": 0.4302, + "step": 97500 + }, + { + "epoch": 0.503299436077618, + "grad_norm": 25492.541015625, + "learning_rate": 6.437066961389704e-05, + "loss": 0.4223, + "step": 97550 + }, + { + "epoch": 0.5035574060602308, + "grad_norm": 22227.8125, + "learning_rate": 6.433567696446865e-05, + "loss": 0.4194, + "step": 97600 + }, + { + "epoch": 0.5038153760428437, + "grad_norm": 23799.134765625, + "learning_rate": 6.430067666293348e-05, + "loss": 0.4239, + "step": 97650 + }, + { + "epoch": 0.5040733460254565, + "grad_norm": 25147.080078125, + "learning_rate": 6.426566872797403e-05, + "loss": 0.4369, + "step": 97700 + }, + { + "epoch": 0.5043313160080694, + "grad_norm": 22497.68359375, + "learning_rate": 6.423065317827686e-05, + "loss": 0.4332, + "step": 97750 + }, + { + "epoch": 0.5045892859906821, + "grad_norm": 23273.966796875, + "learning_rate": 6.419563003253258e-05, + "loss": 0.4331, + "step": 97800 + }, + { + "epoch": 0.5048472559732949, + "grad_norm": 21943.7734375, + "learning_rate": 6.416059930943585e-05, + "loss": 0.4331, + "step": 97850 + }, + { + "epoch": 0.5051052259559078, + "grad_norm": 23134.685546875, + "learning_rate": 6.412556102768544e-05, + "loss": 0.4283, + "step": 97900 + }, + { + "epoch": 0.5053631959385206, + "grad_norm": 21504.177734375, + "learning_rate": 6.409051520598405e-05, + "loss": 0.4319, + "step": 97950 + }, + { + "epoch": 0.5056211659211334, + "grad_norm": 25481.029296875, + "learning_rate": 6.405546186303852e-05, + "loss": 0.4268, + "step": 98000 + }, + { + "epoch": 0.5058791359037462, + "grad_norm": 21170.70703125, + "learning_rate": 6.402040101755961e-05, + "loss": 0.4253, + "step": 98050 + }, + { + "epoch": 0.5061371058863591, + "grad_norm": 20005.333984375, + "learning_rate": 6.398533268826212e-05, + "loss": 0.4267, + "step": 98100 + }, + { + "epoch": 0.5063950758689719, + "grad_norm": 20913.32421875, + "learning_rate": 6.395025689386485e-05, + "loss": 0.4245, + "step": 98150 + }, + { + "epoch": 0.5066530458515847, + "grad_norm": 24310.720703125, + "learning_rate": 6.391517365309059e-05, + "loss": 0.4246, + "step": 98200 + }, + { + "epoch": 0.5069110158341975, + "grad_norm": 21981.455078125, + "learning_rate": 6.388008298466607e-05, + "loss": 0.4286, + "step": 98250 + }, + { + "epoch": 0.5071689858168104, + "grad_norm": 23764.30078125, + "learning_rate": 6.384498490732202e-05, + "loss": 0.4282, + "step": 98300 + }, + { + "epoch": 0.5074269557994232, + "grad_norm": 20518.447265625, + "learning_rate": 6.380987943979314e-05, + "loss": 0.4333, + "step": 98350 + }, + { + "epoch": 0.5076849257820361, + "grad_norm": 23327.80859375, + "learning_rate": 6.377476660081803e-05, + "loss": 0.4255, + "step": 98400 + }, + { + "epoch": 0.5079428957646488, + "grad_norm": 19600.84375, + "learning_rate": 6.373964640913924e-05, + "loss": 0.4277, + "step": 98450 + }, + { + "epoch": 0.5082008657472616, + "grad_norm": 23252.146484375, + "learning_rate": 6.370451888350322e-05, + "loss": 0.4311, + "step": 98500 + }, + { + "epoch": 0.5084588357298745, + "grad_norm": 21930.736328125, + "learning_rate": 6.366938404266041e-05, + "loss": 0.4329, + "step": 98550 + }, + { + "epoch": 0.5087168057124873, + "grad_norm": 21249.69140625, + "learning_rate": 6.36342419053651e-05, + "loss": 0.4257, + "step": 98600 + }, + { + "epoch": 0.5089747756951001, + "grad_norm": 21809.4609375, + "learning_rate": 6.359909249037548e-05, + "loss": 0.431, + "step": 98650 + }, + { + "epoch": 0.5092327456777129, + "grad_norm": 23142.6796875, + "learning_rate": 6.356393581645359e-05, + "loss": 0.4329, + "step": 98700 + }, + { + "epoch": 0.5094907156603258, + "grad_norm": 21783.541015625, + "learning_rate": 6.352877190236542e-05, + "loss": 0.4362, + "step": 98750 + }, + { + "epoch": 0.5097486856429386, + "grad_norm": 22534.080078125, + "learning_rate": 6.349360076688079e-05, + "loss": 0.4302, + "step": 98800 + }, + { + "epoch": 0.5100066556255514, + "grad_norm": 22630.03515625, + "learning_rate": 6.345842242877336e-05, + "loss": 0.4314, + "step": 98850 + }, + { + "epoch": 0.5102646256081642, + "grad_norm": 23446.0390625, + "learning_rate": 6.342323690682064e-05, + "loss": 0.428, + "step": 98900 + }, + { + "epoch": 0.5105225955907771, + "grad_norm": 25644.2734375, + "learning_rate": 6.338804421980398e-05, + "loss": 0.4219, + "step": 98950 + }, + { + "epoch": 0.5107805655733899, + "grad_norm": 23159.580078125, + "learning_rate": 6.335284438650856e-05, + "loss": 0.434, + "step": 99000 + }, + { + "epoch": 0.5110385355560026, + "grad_norm": 23536.556640625, + "learning_rate": 6.331763742572337e-05, + "loss": 0.4293, + "step": 99050 + }, + { + "epoch": 0.5112965055386155, + "grad_norm": 23240.662109375, + "learning_rate": 6.328242335624121e-05, + "loss": 0.434, + "step": 99100 + }, + { + "epoch": 0.5115544755212283, + "grad_norm": 22368.94921875, + "learning_rate": 6.324720219685866e-05, + "loss": 0.4295, + "step": 99150 + }, + { + "epoch": 0.5118124455038412, + "grad_norm": 23257.068359375, + "learning_rate": 6.321197396637608e-05, + "loss": 0.4198, + "step": 99200 + }, + { + "epoch": 0.512070415486454, + "grad_norm": 21806.6953125, + "learning_rate": 6.317673868359765e-05, + "loss": 0.4241, + "step": 99250 + }, + { + "epoch": 0.5123283854690668, + "grad_norm": 24117.416015625, + "learning_rate": 6.314149636733125e-05, + "loss": 0.4261, + "step": 99300 + }, + { + "epoch": 0.5125863554516796, + "grad_norm": 25262.626953125, + "learning_rate": 6.310624703638858e-05, + "loss": 0.4234, + "step": 99350 + }, + { + "epoch": 0.5128443254342925, + "grad_norm": 22739.923828125, + "learning_rate": 6.3070990709585e-05, + "loss": 0.4299, + "step": 99400 + }, + { + "epoch": 0.5131022954169053, + "grad_norm": 20651.646484375, + "learning_rate": 6.303572740573971e-05, + "loss": 0.4307, + "step": 99450 + }, + { + "epoch": 0.5133602653995181, + "grad_norm": 22125.037109375, + "learning_rate": 6.300045714367555e-05, + "loss": 0.4216, + "step": 99500 + }, + { + "epoch": 0.5136182353821309, + "grad_norm": 22210.080078125, + "learning_rate": 6.29651799422191e-05, + "loss": 0.429, + "step": 99550 + }, + { + "epoch": 0.5138762053647438, + "grad_norm": 23850.673828125, + "learning_rate": 6.292989582020063e-05, + "loss": 0.4337, + "step": 99600 + }, + { + "epoch": 0.5141341753473566, + "grad_norm": 21346.251953125, + "learning_rate": 6.289460479645417e-05, + "loss": 0.4352, + "step": 99650 + }, + { + "epoch": 0.5143921453299694, + "grad_norm": 22687.080078125, + "learning_rate": 6.285930688981735e-05, + "loss": 0.433, + "step": 99700 + }, + { + "epoch": 0.5146501153125822, + "grad_norm": 20447.666015625, + "learning_rate": 6.282400211913154e-05, + "loss": 0.4288, + "step": 99750 + }, + { + "epoch": 0.514908085295195, + "grad_norm": 21768.51953125, + "learning_rate": 6.278869050324168e-05, + "loss": 0.4363, + "step": 99800 + }, + { + "epoch": 0.5151660552778079, + "grad_norm": 21896.47265625, + "learning_rate": 6.27533720609965e-05, + "loss": 0.4307, + "step": 99850 + }, + { + "epoch": 0.5154240252604207, + "grad_norm": 22967.384765625, + "learning_rate": 6.271804681124827e-05, + "loss": 0.4295, + "step": 99900 + }, + { + "epoch": 0.5156819952430335, + "grad_norm": 20233.869140625, + "learning_rate": 6.268271477285292e-05, + "loss": 0.4329, + "step": 99950 + }, + { + "epoch": 0.5159399652256463, + "grad_norm": 20550.060546875, + "learning_rate": 6.264737596466998e-05, + "loss": 0.4267, + "step": 100000 + }, + { + "epoch": 0.5159399652256463, + "eval_loss": 0.4161209166049957, + "eval_runtime": 2887.0736, + "eval_samples_per_second": 1074.14, + "eval_steps_per_second": 2.098, + "step": 100000 + }, + { + "epoch": 0.5161979352082592, + "grad_norm": 22327.767578125, + "learning_rate": 6.261203040556267e-05, + "loss": 0.4272, + "step": 100050 + }, + { + "epoch": 0.516455905190872, + "grad_norm": 22512.1640625, + "learning_rate": 6.257667811439776e-05, + "loss": 0.4267, + "step": 100100 + }, + { + "epoch": 0.5167138751734848, + "grad_norm": 22710.8828125, + "learning_rate": 6.254131911004561e-05, + "loss": 0.42, + "step": 100150 + }, + { + "epoch": 0.5169718451560976, + "grad_norm": 21731.365234375, + "learning_rate": 6.250595341138014e-05, + "loss": 0.4259, + "step": 100200 + }, + { + "epoch": 0.5172298151387105, + "grad_norm": 21478.970703125, + "learning_rate": 6.247058103727892e-05, + "loss": 0.4217, + "step": 100250 + }, + { + "epoch": 0.5174877851213233, + "grad_norm": 22431.939453125, + "learning_rate": 6.243520200662303e-05, + "loss": 0.4272, + "step": 100300 + }, + { + "epoch": 0.5177457551039361, + "grad_norm": 22137.5078125, + "learning_rate": 6.239981633829709e-05, + "loss": 0.4301, + "step": 100350 + }, + { + "epoch": 0.5180037250865489, + "grad_norm": 22802.220703125, + "learning_rate": 6.23644240511893e-05, + "loss": 0.4346, + "step": 100400 + }, + { + "epoch": 0.5182616950691618, + "grad_norm": 20567.640625, + "learning_rate": 6.232902516419137e-05, + "loss": 0.4271, + "step": 100450 + }, + { + "epoch": 0.5185196650517746, + "grad_norm": 20855.70703125, + "learning_rate": 6.229361969619855e-05, + "loss": 0.4237, + "step": 100500 + }, + { + "epoch": 0.5187776350343875, + "grad_norm": 22052.44921875, + "learning_rate": 6.225820766610958e-05, + "loss": 0.4324, + "step": 100550 + }, + { + "epoch": 0.5190356050170002, + "grad_norm": 21984.818359375, + "learning_rate": 6.222278909282674e-05, + "loss": 0.4315, + "step": 100600 + }, + { + "epoch": 0.519293574999613, + "grad_norm": 22044.8359375, + "learning_rate": 6.218736399525575e-05, + "loss": 0.4324, + "step": 100650 + }, + { + "epoch": 0.5195515449822259, + "grad_norm": 22661.78515625, + "learning_rate": 6.215193239230586e-05, + "loss": 0.4273, + "step": 100700 + }, + { + "epoch": 0.5198095149648387, + "grad_norm": 22091.01171875, + "learning_rate": 6.211649430288976e-05, + "loss": 0.4252, + "step": 100750 + }, + { + "epoch": 0.5200674849474515, + "grad_norm": 22164.376953125, + "learning_rate": 6.208104974592364e-05, + "loss": 0.4272, + "step": 100800 + }, + { + "epoch": 0.5203254549300643, + "grad_norm": 23387.287109375, + "learning_rate": 6.20455987403271e-05, + "loss": 0.4281, + "step": 100850 + }, + { + "epoch": 0.5205834249126772, + "grad_norm": 22505.326171875, + "learning_rate": 6.201014130502317e-05, + "loss": 0.4285, + "step": 100900 + }, + { + "epoch": 0.52084139489529, + "grad_norm": 21150.341796875, + "learning_rate": 6.19746774589384e-05, + "loss": 0.4274, + "step": 100950 + }, + { + "epoch": 0.5210993648779028, + "grad_norm": 23076.650390625, + "learning_rate": 6.193920722100268e-05, + "loss": 0.4289, + "step": 101000 + }, + { + "epoch": 0.5213573348605156, + "grad_norm": 20890.41796875, + "learning_rate": 6.190373061014932e-05, + "loss": 0.4305, + "step": 101050 + }, + { + "epoch": 0.5216153048431285, + "grad_norm": 22231.6328125, + "learning_rate": 6.186824764531507e-05, + "loss": 0.4304, + "step": 101100 + }, + { + "epoch": 0.5218732748257413, + "grad_norm": 22094.197265625, + "learning_rate": 6.183275834544005e-05, + "loss": 0.4279, + "step": 101150 + }, + { + "epoch": 0.522131244808354, + "grad_norm": 23188.353515625, + "learning_rate": 6.179726272946774e-05, + "loss": 0.4272, + "step": 101200 + }, + { + "epoch": 0.5223892147909669, + "grad_norm": 22908.5, + "learning_rate": 6.176176081634504e-05, + "loss": 0.4229, + "step": 101250 + }, + { + "epoch": 0.5226471847735797, + "grad_norm": 21536.37109375, + "learning_rate": 6.172625262502215e-05, + "loss": 0.4267, + "step": 101300 + }, + { + "epoch": 0.5229051547561926, + "grad_norm": 22923.38671875, + "learning_rate": 6.169073817445268e-05, + "loss": 0.4256, + "step": 101350 + }, + { + "epoch": 0.5231631247388054, + "grad_norm": 22802.669921875, + "learning_rate": 6.165521748359356e-05, + "loss": 0.4241, + "step": 101400 + }, + { + "epoch": 0.5234210947214182, + "grad_norm": 22852.59765625, + "learning_rate": 6.161969057140504e-05, + "loss": 0.4275, + "step": 101450 + }, + { + "epoch": 0.523679064704031, + "grad_norm": 27410.056640625, + "learning_rate": 6.158415745685068e-05, + "loss": 0.4316, + "step": 101500 + }, + { + "epoch": 0.5239370346866439, + "grad_norm": 21783.482421875, + "learning_rate": 6.15486181588974e-05, + "loss": 0.4235, + "step": 101550 + }, + { + "epoch": 0.5241950046692567, + "grad_norm": 21013.259765625, + "learning_rate": 6.151307269651536e-05, + "loss": 0.426, + "step": 101600 + }, + { + "epoch": 0.5244529746518695, + "grad_norm": 23852.673828125, + "learning_rate": 6.147752108867807e-05, + "loss": 0.4226, + "step": 101650 + }, + { + "epoch": 0.5247109446344823, + "grad_norm": 24846.427734375, + "learning_rate": 6.144196335436225e-05, + "loss": 0.4277, + "step": 101700 + }, + { + "epoch": 0.5249689146170952, + "grad_norm": 21197.177734375, + "learning_rate": 6.140639951254796e-05, + "loss": 0.4247, + "step": 101750 + }, + { + "epoch": 0.525226884599708, + "grad_norm": 24620.37890625, + "learning_rate": 6.137082958221848e-05, + "loss": 0.429, + "step": 101800 + }, + { + "epoch": 0.5254848545823207, + "grad_norm": 22811.875, + "learning_rate": 6.133525358236036e-05, + "loss": 0.4274, + "step": 101850 + }, + { + "epoch": 0.5257428245649336, + "grad_norm": 20224.125, + "learning_rate": 6.129967153196336e-05, + "loss": 0.4338, + "step": 101900 + }, + { + "epoch": 0.5260007945475464, + "grad_norm": 21489.734375, + "learning_rate": 6.126408345002052e-05, + "loss": 0.4333, + "step": 101950 + }, + { + "epoch": 0.5262587645301593, + "grad_norm": 21771.20703125, + "learning_rate": 6.122848935552804e-05, + "loss": 0.4258, + "step": 102000 + }, + { + "epoch": 0.5265167345127721, + "grad_norm": 23362.43359375, + "learning_rate": 6.119288926748537e-05, + "loss": 0.4234, + "step": 102050 + }, + { + "epoch": 0.5267747044953849, + "grad_norm": 20869.46484375, + "learning_rate": 6.115728320489516e-05, + "loss": 0.4233, + "step": 102100 + }, + { + "epoch": 0.5270326744779977, + "grad_norm": 21146.568359375, + "learning_rate": 6.11216711867632e-05, + "loss": 0.4243, + "step": 102150 + }, + { + "epoch": 0.5272906444606106, + "grad_norm": 24031.97265625, + "learning_rate": 6.108605323209853e-05, + "loss": 0.4334, + "step": 102200 + }, + { + "epoch": 0.5275486144432234, + "grad_norm": 23461.306640625, + "learning_rate": 6.10504293599133e-05, + "loss": 0.4289, + "step": 102250 + }, + { + "epoch": 0.5278065844258362, + "grad_norm": 21013.169921875, + "learning_rate": 6.101479958922287e-05, + "loss": 0.4334, + "step": 102300 + }, + { + "epoch": 0.528064554408449, + "grad_norm": 23328.306640625, + "learning_rate": 6.0979163939045716e-05, + "loss": 0.4285, + "step": 102350 + }, + { + "epoch": 0.5283225243910619, + "grad_norm": 21542.20703125, + "learning_rate": 6.094352242840343e-05, + "loss": 0.4321, + "step": 102400 + }, + { + "epoch": 0.5285804943736747, + "grad_norm": 20556.357421875, + "learning_rate": 6.09078750763208e-05, + "loss": 0.4255, + "step": 102450 + }, + { + "epoch": 0.5288384643562875, + "grad_norm": 24925.21875, + "learning_rate": 6.0872221901825666e-05, + "loss": 0.4225, + "step": 102500 + }, + { + "epoch": 0.5290964343389003, + "grad_norm": 22750.419921875, + "learning_rate": 6.0836562923949016e-05, + "loss": 0.4287, + "step": 102550 + }, + { + "epoch": 0.5293544043215132, + "grad_norm": 21514.8984375, + "learning_rate": 6.080089816172489e-05, + "loss": 0.4254, + "step": 102600 + }, + { + "epoch": 0.529612374304126, + "grad_norm": 23347.03125, + "learning_rate": 6.07652276341905e-05, + "loss": 0.4346, + "step": 102650 + }, + { + "epoch": 0.5298703442867388, + "grad_norm": 23180.916015625, + "learning_rate": 6.072955136038604e-05, + "loss": 0.4244, + "step": 102700 + }, + { + "epoch": 0.5301283142693516, + "grad_norm": 20701.431640625, + "learning_rate": 6.069386935935484e-05, + "loss": 0.43, + "step": 102750 + }, + { + "epoch": 0.5303862842519644, + "grad_norm": 23350.99609375, + "learning_rate": 6.0658181650143245e-05, + "loss": 0.4217, + "step": 102800 + }, + { + "epoch": 0.5306442542345773, + "grad_norm": 21068.111328125, + "learning_rate": 6.062248825180066e-05, + "loss": 0.4278, + "step": 102850 + }, + { + "epoch": 0.5309022242171901, + "grad_norm": 23415.25, + "learning_rate": 6.0586789183379554e-05, + "loss": 0.4331, + "step": 102900 + }, + { + "epoch": 0.5311601941998029, + "grad_norm": 22186.048828125, + "learning_rate": 6.055108446393538e-05, + "loss": 0.4327, + "step": 102950 + }, + { + "epoch": 0.5314181641824157, + "grad_norm": 20644.166015625, + "learning_rate": 6.051537411252662e-05, + "loss": 0.4264, + "step": 103000 + }, + { + "epoch": 0.5316761341650286, + "grad_norm": 21755.712890625, + "learning_rate": 6.047965814821478e-05, + "loss": 0.4253, + "step": 103050 + }, + { + "epoch": 0.5319341041476414, + "grad_norm": 22319.177734375, + "learning_rate": 6.044393659006435e-05, + "loss": 0.4238, + "step": 103100 + }, + { + "epoch": 0.5321920741302542, + "grad_norm": 22544.064453125, + "learning_rate": 6.040820945714281e-05, + "loss": 0.4306, + "step": 103150 + }, + { + "epoch": 0.532450044112867, + "grad_norm": 21484.53125, + "learning_rate": 6.037247676852059e-05, + "loss": 0.4254, + "step": 103200 + }, + { + "epoch": 0.5327080140954799, + "grad_norm": 23923.201171875, + "learning_rate": 6.033673854327114e-05, + "loss": 0.4258, + "step": 103250 + }, + { + "epoch": 0.5329659840780927, + "grad_norm": 20412.08984375, + "learning_rate": 6.03009948004708e-05, + "loss": 0.4286, + "step": 103300 + }, + { + "epoch": 0.5332239540607056, + "grad_norm": 19932.908203125, + "learning_rate": 6.026524555919891e-05, + "loss": 0.4367, + "step": 103350 + }, + { + "epoch": 0.5334819240433183, + "grad_norm": 21761.033203125, + "learning_rate": 6.022949083853772e-05, + "loss": 0.4272, + "step": 103400 + }, + { + "epoch": 0.5337398940259311, + "grad_norm": 23392.29296875, + "learning_rate": 6.019373065757239e-05, + "loss": 0.4274, + "step": 103450 + }, + { + "epoch": 0.533997864008544, + "grad_norm": 26151.69921875, + "learning_rate": 6.015796503539103e-05, + "loss": 0.4189, + "step": 103500 + }, + { + "epoch": 0.5342558339911568, + "grad_norm": 22503.529296875, + "learning_rate": 6.012219399108463e-05, + "loss": 0.428, + "step": 103550 + }, + { + "epoch": 0.5345138039737696, + "grad_norm": 25906.685546875, + "learning_rate": 6.008641754374709e-05, + "loss": 0.4287, + "step": 103600 + }, + { + "epoch": 0.5347717739563824, + "grad_norm": 23784.685546875, + "learning_rate": 6.005063571247517e-05, + "loss": 0.4276, + "step": 103650 + }, + { + "epoch": 0.5350297439389953, + "grad_norm": 21574.30078125, + "learning_rate": 6.0014848516368515e-05, + "loss": 0.4344, + "step": 103700 + }, + { + "epoch": 0.5352877139216081, + "grad_norm": 22296.921875, + "learning_rate": 5.9979055974529675e-05, + "loss": 0.4322, + "step": 103750 + }, + { + "epoch": 0.5355456839042209, + "grad_norm": 21478.611328125, + "learning_rate": 5.994325810606397e-05, + "loss": 0.429, + "step": 103800 + }, + { + "epoch": 0.5358036538868337, + "grad_norm": 22572.37109375, + "learning_rate": 5.9907454930079645e-05, + "loss": 0.4281, + "step": 103850 + }, + { + "epoch": 0.5360616238694466, + "grad_norm": 23416.80859375, + "learning_rate": 5.98716464656877e-05, + "loss": 0.4266, + "step": 103900 + }, + { + "epoch": 0.5363195938520594, + "grad_norm": 23470.626953125, + "learning_rate": 5.983583273200204e-05, + "loss": 0.426, + "step": 103950 + }, + { + "epoch": 0.5365775638346721, + "grad_norm": 24464.38671875, + "learning_rate": 5.980001374813933e-05, + "loss": 0.4218, + "step": 104000 + }, + { + "epoch": 0.536835533817285, + "grad_norm": 23835.29296875, + "learning_rate": 5.976418953321904e-05, + "loss": 0.4261, + "step": 104050 + }, + { + "epoch": 0.5370935037998978, + "grad_norm": 23344.654296875, + "learning_rate": 5.972836010636346e-05, + "loss": 0.4292, + "step": 104100 + }, + { + "epoch": 0.5373514737825107, + "grad_norm": 23925.935546875, + "learning_rate": 5.9692525486697616e-05, + "loss": 0.4323, + "step": 104150 + }, + { + "epoch": 0.5376094437651235, + "grad_norm": 23155.76953125, + "learning_rate": 5.965668569334937e-05, + "loss": 0.428, + "step": 104200 + }, + { + "epoch": 0.5378674137477363, + "grad_norm": 22334.19921875, + "learning_rate": 5.962084074544928e-05, + "loss": 0.4129, + "step": 104250 + }, + { + "epoch": 0.5381253837303491, + "grad_norm": 20239.66796875, + "learning_rate": 5.95849906621307e-05, + "loss": 0.4335, + "step": 104300 + }, + { + "epoch": 0.538383353712962, + "grad_norm": 22626.19140625, + "learning_rate": 5.9549135462529704e-05, + "loss": 0.4274, + "step": 104350 + }, + { + "epoch": 0.5386413236955748, + "grad_norm": 21798.65625, + "learning_rate": 5.951327516578512e-05, + "loss": 0.4258, + "step": 104400 + }, + { + "epoch": 0.5388992936781876, + "grad_norm": 21796.7421875, + "learning_rate": 5.947740979103845e-05, + "loss": 0.4263, + "step": 104450 + }, + { + "epoch": 0.5391572636608004, + "grad_norm": 22380.21484375, + "learning_rate": 5.944153935743396e-05, + "loss": 0.4218, + "step": 104500 + }, + { + "epoch": 0.5394152336434133, + "grad_norm": 22526.4296875, + "learning_rate": 5.940566388411859e-05, + "loss": 0.4233, + "step": 104550 + }, + { + "epoch": 0.5396732036260261, + "grad_norm": 22876.5703125, + "learning_rate": 5.936978339024195e-05, + "loss": 0.4296, + "step": 104600 + }, + { + "epoch": 0.5399311736086388, + "grad_norm": 22592.654296875, + "learning_rate": 5.9333897894956394e-05, + "loss": 0.4287, + "step": 104650 + }, + { + "epoch": 0.5401891435912517, + "grad_norm": 21235.43359375, + "learning_rate": 5.929800741741688e-05, + "loss": 0.4269, + "step": 104700 + }, + { + "epoch": 0.5404471135738645, + "grad_norm": 22049.05859375, + "learning_rate": 5.926211197678104e-05, + "loss": 0.4266, + "step": 104750 + }, + { + "epoch": 0.5407050835564774, + "grad_norm": 23252.845703125, + "learning_rate": 5.922621159220918e-05, + "loss": 0.4223, + "step": 104800 + }, + { + "epoch": 0.5409630535390902, + "grad_norm": 20577.1796875, + "learning_rate": 5.919030628286424e-05, + "loss": 0.4302, + "step": 104850 + }, + { + "epoch": 0.541221023521703, + "grad_norm": 24854.8671875, + "learning_rate": 5.915439606791174e-05, + "loss": 0.4212, + "step": 104900 + }, + { + "epoch": 0.5414789935043158, + "grad_norm": 22561.552734375, + "learning_rate": 5.9118480966519906e-05, + "loss": 0.4196, + "step": 104950 + }, + { + "epoch": 0.5417369634869287, + "grad_norm": 23885.4765625, + "learning_rate": 5.9082560997859496e-05, + "loss": 0.421, + "step": 105000 + }, + { + "epoch": 0.5417369634869287, + "eval_loss": 0.4132173955440521, + "eval_runtime": 2876.3365, + "eval_samples_per_second": 1078.149, + "eval_steps_per_second": 2.106, + "step": 105000 + }, + { + "epoch": 0.5419949334695415, + "grad_norm": 20974.994140625, + "learning_rate": 5.90466361811039e-05, + "loss": 0.4228, + "step": 105050 + }, + { + "epoch": 0.5422529034521543, + "grad_norm": 24338.412109375, + "learning_rate": 5.9010706535429086e-05, + "loss": 0.4215, + "step": 105100 + }, + { + "epoch": 0.5425108734347671, + "grad_norm": 20734.796875, + "learning_rate": 5.8974772080013605e-05, + "loss": 0.4319, + "step": 105150 + }, + { + "epoch": 0.54276884341738, + "grad_norm": 21026.123046875, + "learning_rate": 5.8938832834038574e-05, + "loss": 0.4318, + "step": 105200 + }, + { + "epoch": 0.5430268133999928, + "grad_norm": 20023.287109375, + "learning_rate": 5.890288881668766e-05, + "loss": 0.4306, + "step": 105250 + }, + { + "epoch": 0.5432847833826056, + "grad_norm": 23171.42578125, + "learning_rate": 5.88669400471471e-05, + "loss": 0.4237, + "step": 105300 + }, + { + "epoch": 0.5435427533652184, + "grad_norm": 21692.109375, + "learning_rate": 5.8830986544605635e-05, + "loss": 0.4261, + "step": 105350 + }, + { + "epoch": 0.5438007233478312, + "grad_norm": 22358.216796875, + "learning_rate": 5.8795028328254566e-05, + "loss": 0.4204, + "step": 105400 + }, + { + "epoch": 0.5440586933304441, + "grad_norm": 22529.650390625, + "learning_rate": 5.875906541728766e-05, + "loss": 0.422, + "step": 105450 + }, + { + "epoch": 0.544316663313057, + "grad_norm": 18307.05859375, + "learning_rate": 5.8723097830901264e-05, + "loss": 0.4236, + "step": 105500 + }, + { + "epoch": 0.5445746332956697, + "grad_norm": 22356.583984375, + "learning_rate": 5.8687125588294154e-05, + "loss": 0.4213, + "step": 105550 + }, + { + "epoch": 0.5448326032782825, + "grad_norm": 21446.732421875, + "learning_rate": 5.8651148708667625e-05, + "loss": 0.4216, + "step": 105600 + }, + { + "epoch": 0.5450905732608954, + "grad_norm": 24014.49609375, + "learning_rate": 5.8615167211225416e-05, + "loss": 0.4283, + "step": 105650 + }, + { + "epoch": 0.5453485432435082, + "grad_norm": 22394.306640625, + "learning_rate": 5.8579181115173785e-05, + "loss": 0.4242, + "step": 105700 + }, + { + "epoch": 0.545606513226121, + "grad_norm": 25348.26171875, + "learning_rate": 5.8543190439721405e-05, + "loss": 0.4234, + "step": 105750 + }, + { + "epoch": 0.5458644832087338, + "grad_norm": 22638.720703125, + "learning_rate": 5.850719520407939e-05, + "loss": 0.4269, + "step": 105800 + }, + { + "epoch": 0.5461224531913467, + "grad_norm": 22702.841796875, + "learning_rate": 5.847119542746131e-05, + "loss": 0.4201, + "step": 105850 + }, + { + "epoch": 0.5463804231739595, + "grad_norm": 22299.849609375, + "learning_rate": 5.843519112908315e-05, + "loss": 0.4243, + "step": 105900 + }, + { + "epoch": 0.5466383931565723, + "grad_norm": 21965.283203125, + "learning_rate": 5.8399182328163304e-05, + "loss": 0.4209, + "step": 105950 + }, + { + "epoch": 0.5468963631391851, + "grad_norm": 22101.755859375, + "learning_rate": 5.836316904392256e-05, + "loss": 0.4254, + "step": 106000 + }, + { + "epoch": 0.547154333121798, + "grad_norm": 22735.970703125, + "learning_rate": 5.8327151295584126e-05, + "loss": 0.4251, + "step": 106050 + }, + { + "epoch": 0.5474123031044108, + "grad_norm": 24287.58203125, + "learning_rate": 5.829112910237359e-05, + "loss": 0.427, + "step": 106100 + }, + { + "epoch": 0.5476702730870235, + "grad_norm": 22509.02734375, + "learning_rate": 5.825510248351889e-05, + "loss": 0.4209, + "step": 106150 + }, + { + "epoch": 0.5479282430696364, + "grad_norm": 22325.32421875, + "learning_rate": 5.821907145825032e-05, + "loss": 0.4276, + "step": 106200 + }, + { + "epoch": 0.5481862130522492, + "grad_norm": 21362.255859375, + "learning_rate": 5.8183036045800556e-05, + "loss": 0.4273, + "step": 106250 + }, + { + "epoch": 0.5484441830348621, + "grad_norm": 22934.61328125, + "learning_rate": 5.814699626540461e-05, + "loss": 0.4318, + "step": 106300 + }, + { + "epoch": 0.5487021530174749, + "grad_norm": 23663.65625, + "learning_rate": 5.8110952136299814e-05, + "loss": 0.4246, + "step": 106350 + }, + { + "epoch": 0.5489601230000877, + "grad_norm": 20743.84765625, + "learning_rate": 5.807490367772584e-05, + "loss": 0.4289, + "step": 106400 + }, + { + "epoch": 0.5492180929827005, + "grad_norm": 20859.244140625, + "learning_rate": 5.8038850908924636e-05, + "loss": 0.4255, + "step": 106450 + }, + { + "epoch": 0.5494760629653134, + "grad_norm": 21824.990234375, + "learning_rate": 5.800279384914047e-05, + "loss": 0.4311, + "step": 106500 + }, + { + "epoch": 0.5497340329479262, + "grad_norm": 19514.681640625, + "learning_rate": 5.7966732517619926e-05, + "loss": 0.4311, + "step": 106550 + }, + { + "epoch": 0.549992002930539, + "grad_norm": 24263.765625, + "learning_rate": 5.7930666933611835e-05, + "loss": 0.4257, + "step": 106600 + }, + { + "epoch": 0.5502499729131518, + "grad_norm": 23152.279296875, + "learning_rate": 5.789459711636729e-05, + "loss": 0.4226, + "step": 106650 + }, + { + "epoch": 0.5505079428957647, + "grad_norm": 21756.8671875, + "learning_rate": 5.785852308513967e-05, + "loss": 0.4266, + "step": 106700 + }, + { + "epoch": 0.5507659128783775, + "grad_norm": 20913.3125, + "learning_rate": 5.78224448591846e-05, + "loss": 0.4228, + "step": 106750 + }, + { + "epoch": 0.5510238828609902, + "grad_norm": 24674.92578125, + "learning_rate": 5.778636245775996e-05, + "loss": 0.4246, + "step": 106800 + }, + { + "epoch": 0.5512818528436031, + "grad_norm": 24229.4296875, + "learning_rate": 5.775027590012579e-05, + "loss": 0.4244, + "step": 106850 + }, + { + "epoch": 0.5515398228262159, + "grad_norm": 21722.048828125, + "learning_rate": 5.771418520554443e-05, + "loss": 0.4264, + "step": 106900 + }, + { + "epoch": 0.5517977928088288, + "grad_norm": 22060.224609375, + "learning_rate": 5.7678090393280384e-05, + "loss": 0.4268, + "step": 106950 + }, + { + "epoch": 0.5520557627914416, + "grad_norm": 25690.306640625, + "learning_rate": 5.7641991482600366e-05, + "loss": 0.4298, + "step": 107000 + }, + { + "epoch": 0.5523137327740544, + "grad_norm": 24629.115234375, + "learning_rate": 5.7605888492773266e-05, + "loss": 0.4223, + "step": 107050 + }, + { + "epoch": 0.5525717027566672, + "grad_norm": 23552.78515625, + "learning_rate": 5.756978144307018e-05, + "loss": 0.4246, + "step": 107100 + }, + { + "epoch": 0.5528296727392801, + "grad_norm": 21611.703125, + "learning_rate": 5.753367035276431e-05, + "loss": 0.4173, + "step": 107150 + }, + { + "epoch": 0.5530876427218929, + "grad_norm": 24158.64453125, + "learning_rate": 5.749755524113111e-05, + "loss": 0.4211, + "step": 107200 + }, + { + "epoch": 0.5533456127045057, + "grad_norm": 23446.94140625, + "learning_rate": 5.746143612744811e-05, + "loss": 0.4262, + "step": 107250 + }, + { + "epoch": 0.5536035826871185, + "grad_norm": 21608.703125, + "learning_rate": 5.742531303099498e-05, + "loss": 0.424, + "step": 107300 + }, + { + "epoch": 0.5538615526697314, + "grad_norm": 25070.78125, + "learning_rate": 5.738918597105353e-05, + "loss": 0.4219, + "step": 107350 + }, + { + "epoch": 0.5541195226523442, + "grad_norm": 21161.5234375, + "learning_rate": 5.735305496690769e-05, + "loss": 0.4293, + "step": 107400 + }, + { + "epoch": 0.554377492634957, + "grad_norm": 23108.521484375, + "learning_rate": 5.7316920037843516e-05, + "loss": 0.427, + "step": 107450 + }, + { + "epoch": 0.5546354626175698, + "grad_norm": 22233.87890625, + "learning_rate": 5.728078120314909e-05, + "loss": 0.4204, + "step": 107500 + }, + { + "epoch": 0.5548934326001826, + "grad_norm": 24522.310546875, + "learning_rate": 5.724463848211464e-05, + "loss": 0.4257, + "step": 107550 + }, + { + "epoch": 0.5551514025827955, + "grad_norm": 22916.892578125, + "learning_rate": 5.720849189403244e-05, + "loss": 0.4251, + "step": 107600 + }, + { + "epoch": 0.5554093725654083, + "grad_norm": 20069.236328125, + "learning_rate": 5.7172341458196876e-05, + "loss": 0.424, + "step": 107650 + }, + { + "epoch": 0.5556673425480211, + "grad_norm": 25173.86328125, + "learning_rate": 5.713618719390432e-05, + "loss": 0.4294, + "step": 107700 + }, + { + "epoch": 0.5559253125306339, + "grad_norm": 21957.373046875, + "learning_rate": 5.710002912045323e-05, + "loss": 0.4239, + "step": 107750 + }, + { + "epoch": 0.5561832825132468, + "grad_norm": 20540.82421875, + "learning_rate": 5.706386725714407e-05, + "loss": 0.4259, + "step": 107800 + }, + { + "epoch": 0.5564412524958596, + "grad_norm": 22470.4921875, + "learning_rate": 5.702770162327936e-05, + "loss": 0.4273, + "step": 107850 + }, + { + "epoch": 0.5566992224784724, + "grad_norm": 21721.197265625, + "learning_rate": 5.69915322381636e-05, + "loss": 0.4233, + "step": 107900 + }, + { + "epoch": 0.5569571924610852, + "grad_norm": 21666.955078125, + "learning_rate": 5.6955359121103324e-05, + "loss": 0.4283, + "step": 107950 + }, + { + "epoch": 0.5572151624436981, + "grad_norm": 20970.9296875, + "learning_rate": 5.6919182291407014e-05, + "loss": 0.4275, + "step": 108000 + }, + { + "epoch": 0.5574731324263109, + "grad_norm": 22353.306640625, + "learning_rate": 5.688300176838518e-05, + "loss": 0.4244, + "step": 108050 + }, + { + "epoch": 0.5577311024089237, + "grad_norm": 22607.357421875, + "learning_rate": 5.68468175713503e-05, + "loss": 0.4252, + "step": 108100 + }, + { + "epoch": 0.5579890723915365, + "grad_norm": 21949.1015625, + "learning_rate": 5.681062971961677e-05, + "loss": 0.4194, + "step": 108150 + }, + { + "epoch": 0.5582470423741493, + "grad_norm": 23135.21875, + "learning_rate": 5.677443823250099e-05, + "loss": 0.425, + "step": 108200 + }, + { + "epoch": 0.5585050123567622, + "grad_norm": 19050.34765625, + "learning_rate": 5.673824312932123e-05, + "loss": 0.422, + "step": 108250 + }, + { + "epoch": 0.5587629823393749, + "grad_norm": 22969.15625, + "learning_rate": 5.67020444293978e-05, + "loss": 0.4253, + "step": 108300 + }, + { + "epoch": 0.5590209523219878, + "grad_norm": 22808.203125, + "learning_rate": 5.666584215205282e-05, + "loss": 0.4261, + "step": 108350 + }, + { + "epoch": 0.5592789223046006, + "grad_norm": 23061.126953125, + "learning_rate": 5.662963631661038e-05, + "loss": 0.4248, + "step": 108400 + }, + { + "epoch": 0.5595368922872135, + "grad_norm": 24134.693359375, + "learning_rate": 5.659342694239642e-05, + "loss": 0.4273, + "step": 108450 + }, + { + "epoch": 0.5597948622698263, + "grad_norm": 23659.2578125, + "learning_rate": 5.655721404873886e-05, + "loss": 0.427, + "step": 108500 + }, + { + "epoch": 0.5600528322524391, + "grad_norm": 20205.1953125, + "learning_rate": 5.652099765496741e-05, + "loss": 0.4257, + "step": 108550 + }, + { + "epoch": 0.5603108022350519, + "grad_norm": 21324.837890625, + "learning_rate": 5.6484777780413686e-05, + "loss": 0.4248, + "step": 108600 + }, + { + "epoch": 0.5605687722176648, + "grad_norm": 21779.849609375, + "learning_rate": 5.644855444441114e-05, + "loss": 0.4259, + "step": 108650 + }, + { + "epoch": 0.5608267422002776, + "grad_norm": 20502.0859375, + "learning_rate": 5.641232766629512e-05, + "loss": 0.4281, + "step": 108700 + }, + { + "epoch": 0.5610847121828904, + "grad_norm": 23600.5859375, + "learning_rate": 5.637609746540276e-05, + "loss": 0.4183, + "step": 108750 + }, + { + "epoch": 0.5613426821655032, + "grad_norm": 22977.41015625, + "learning_rate": 5.633986386107302e-05, + "loss": 0.4219, + "step": 108800 + }, + { + "epoch": 0.561600652148116, + "grad_norm": 23411.263671875, + "learning_rate": 5.630362687264672e-05, + "loss": 0.4268, + "step": 108850 + }, + { + "epoch": 0.5618586221307289, + "grad_norm": 20194.060546875, + "learning_rate": 5.6267386519466446e-05, + "loss": 0.4175, + "step": 108900 + }, + { + "epoch": 0.5621165921133416, + "grad_norm": 19387.88671875, + "learning_rate": 5.623114282087664e-05, + "loss": 0.4274, + "step": 108950 + }, + { + "epoch": 0.5623745620959545, + "grad_norm": 23158.28125, + "learning_rate": 5.619489579622343e-05, + "loss": 0.4222, + "step": 109000 + }, + { + "epoch": 0.5626325320785673, + "grad_norm": 23551.431640625, + "learning_rate": 5.6158645464854817e-05, + "loss": 0.428, + "step": 109050 + }, + { + "epoch": 0.5628905020611802, + "grad_norm": 23904.896484375, + "learning_rate": 5.6122391846120495e-05, + "loss": 0.4252, + "step": 109100 + }, + { + "epoch": 0.563148472043793, + "grad_norm": 21354.61328125, + "learning_rate": 5.608613495937197e-05, + "loss": 0.4202, + "step": 109150 + }, + { + "epoch": 0.5634064420264058, + "grad_norm": 23561.978515625, + "learning_rate": 5.6049874823962456e-05, + "loss": 0.4301, + "step": 109200 + }, + { + "epoch": 0.5636644120090186, + "grad_norm": 20979.53515625, + "learning_rate": 5.601361145924692e-05, + "loss": 0.4204, + "step": 109250 + }, + { + "epoch": 0.5639223819916315, + "grad_norm": 24039.125, + "learning_rate": 5.5977344884582e-05, + "loss": 0.4284, + "step": 109300 + }, + { + "epoch": 0.5641803519742443, + "grad_norm": 22242.35546875, + "learning_rate": 5.594107511932615e-05, + "loss": 0.4248, + "step": 109350 + }, + { + "epoch": 0.5644383219568571, + "grad_norm": 20016.1875, + "learning_rate": 5.5904802182839434e-05, + "loss": 0.4222, + "step": 109400 + }, + { + "epoch": 0.5646962919394699, + "grad_norm": 22243.0703125, + "learning_rate": 5.5868526094483666e-05, + "loss": 0.4276, + "step": 109450 + }, + { + "epoch": 0.5649542619220828, + "grad_norm": 23286.38671875, + "learning_rate": 5.58322468736223e-05, + "loss": 0.4208, + "step": 109500 + }, + { + "epoch": 0.5652122319046956, + "grad_norm": 21801.802734375, + "learning_rate": 5.579596453962047e-05, + "loss": 0.4275, + "step": 109550 + }, + { + "epoch": 0.5654702018873083, + "grad_norm": 23282.025390625, + "learning_rate": 5.575967911184502e-05, + "loss": 0.4255, + "step": 109600 + }, + { + "epoch": 0.5657281718699212, + "grad_norm": 25253.943359375, + "learning_rate": 5.572339060966439e-05, + "loss": 0.4239, + "step": 109650 + }, + { + "epoch": 0.565986141852534, + "grad_norm": 22364.595703125, + "learning_rate": 5.5687099052448675e-05, + "loss": 0.4255, + "step": 109700 + }, + { + "epoch": 0.5662441118351469, + "grad_norm": 23305.46484375, + "learning_rate": 5.565080445956961e-05, + "loss": 0.4254, + "step": 109750 + }, + { + "epoch": 0.5665020818177597, + "grad_norm": 20225.2421875, + "learning_rate": 5.561450685040054e-05, + "loss": 0.4239, + "step": 109800 + }, + { + "epoch": 0.5667600518003725, + "grad_norm": 20221.8203125, + "learning_rate": 5.557820624431645e-05, + "loss": 0.4171, + "step": 109850 + }, + { + "epoch": 0.5670180217829853, + "grad_norm": 19833.607421875, + "learning_rate": 5.554190266069387e-05, + "loss": 0.4224, + "step": 109900 + }, + { + "epoch": 0.5672759917655982, + "grad_norm": 19884.58203125, + "learning_rate": 5.550559611891095e-05, + "loss": 0.4196, + "step": 109950 + }, + { + "epoch": 0.567533961748211, + "grad_norm": 22072.25390625, + "learning_rate": 5.546928663834745e-05, + "loss": 0.4196, + "step": 110000 + }, + { + "epoch": 0.567533961748211, + "eval_loss": 0.4103853106498718, + "eval_runtime": 3606.5234, + "eval_samples_per_second": 859.864, + "eval_steps_per_second": 1.679, + "step": 110000 + }, + { + "epoch": 0.5677919317308238, + "grad_norm": 21647.181640625, + "learning_rate": 5.543297423838464e-05, + "loss": 0.414, + "step": 110050 + }, + { + "epoch": 0.5680499017134366, + "grad_norm": 23264.748046875, + "learning_rate": 5.5396658938405396e-05, + "loss": 0.4192, + "step": 110100 + }, + { + "epoch": 0.5683078716960495, + "grad_norm": 21868.10546875, + "learning_rate": 5.536034075779409e-05, + "loss": 0.4222, + "step": 110150 + }, + { + "epoch": 0.5685658416786623, + "grad_norm": 22489.07421875, + "learning_rate": 5.53240197159367e-05, + "loss": 0.4237, + "step": 110200 + }, + { + "epoch": 0.568823811661275, + "grad_norm": 21589.173828125, + "learning_rate": 5.5287695832220674e-05, + "loss": 0.4218, + "step": 110250 + }, + { + "epoch": 0.5690817816438879, + "grad_norm": 23184.103515625, + "learning_rate": 5.525136912603501e-05, + "loss": 0.4203, + "step": 110300 + }, + { + "epoch": 0.5693397516265007, + "grad_norm": 23085.970703125, + "learning_rate": 5.521503961677019e-05, + "loss": 0.4216, + "step": 110350 + }, + { + "epoch": 0.5695977216091136, + "grad_norm": 22217.3671875, + "learning_rate": 5.51787073238182e-05, + "loss": 0.424, + "step": 110400 + }, + { + "epoch": 0.5698556915917263, + "grad_norm": 23515.9375, + "learning_rate": 5.514237226657253e-05, + "loss": 0.4217, + "step": 110450 + }, + { + "epoch": 0.5701136615743392, + "grad_norm": 21375.2734375, + "learning_rate": 5.510603446442812e-05, + "loss": 0.4175, + "step": 110500 + }, + { + "epoch": 0.570371631556952, + "grad_norm": 21658.15625, + "learning_rate": 5.506969393678139e-05, + "loss": 0.4191, + "step": 110550 + }, + { + "epoch": 0.5706296015395649, + "grad_norm": 24653.294921875, + "learning_rate": 5.503335070303018e-05, + "loss": 0.419, + "step": 110600 + }, + { + "epoch": 0.5708875715221777, + "grad_norm": 21722.984375, + "learning_rate": 5.4997004782573855e-05, + "loss": 0.4237, + "step": 110650 + }, + { + "epoch": 0.5711455415047905, + "grad_norm": 20897.595703125, + "learning_rate": 5.496065619481312e-05, + "loss": 0.4211, + "step": 110700 + }, + { + "epoch": 0.5714035114874033, + "grad_norm": 20729.123046875, + "learning_rate": 5.4924304959150175e-05, + "loss": 0.4228, + "step": 110750 + }, + { + "epoch": 0.5716614814700162, + "grad_norm": 22107.888671875, + "learning_rate": 5.488795109498861e-05, + "loss": 0.4222, + "step": 110800 + }, + { + "epoch": 0.571919451452629, + "grad_norm": 24264.587890625, + "learning_rate": 5.485159462173337e-05, + "loss": 0.4232, + "step": 110850 + }, + { + "epoch": 0.5721774214352418, + "grad_norm": 23000.34375, + "learning_rate": 5.481523555879089e-05, + "loss": 0.4236, + "step": 110900 + }, + { + "epoch": 0.5724353914178546, + "grad_norm": 20345.26953125, + "learning_rate": 5.4778873925568905e-05, + "loss": 0.4254, + "step": 110950 + }, + { + "epoch": 0.5726933614004674, + "grad_norm": 25514.09765625, + "learning_rate": 5.4742509741476566e-05, + "loss": 0.4247, + "step": 111000 + }, + { + "epoch": 0.5729513313830803, + "grad_norm": 22510.115234375, + "learning_rate": 5.470614302592434e-05, + "loss": 0.4271, + "step": 111050 + }, + { + "epoch": 0.573209301365693, + "grad_norm": 24683.4921875, + "learning_rate": 5.466977379832411e-05, + "loss": 0.4207, + "step": 111100 + }, + { + "epoch": 0.5734672713483059, + "grad_norm": 22154.2890625, + "learning_rate": 5.4633402078089035e-05, + "loss": 0.422, + "step": 111150 + }, + { + "epoch": 0.5737252413309187, + "grad_norm": 23333.966796875, + "learning_rate": 5.459702788463367e-05, + "loss": 0.4218, + "step": 111200 + }, + { + "epoch": 0.5739832113135316, + "grad_norm": 26566.900390625, + "learning_rate": 5.4560651237373814e-05, + "loss": 0.4269, + "step": 111250 + }, + { + "epoch": 0.5742411812961444, + "grad_norm": 21463.828125, + "learning_rate": 5.452427215572666e-05, + "loss": 0.4196, + "step": 111300 + }, + { + "epoch": 0.5744991512787572, + "grad_norm": 24921.373046875, + "learning_rate": 5.448789065911064e-05, + "loss": 0.4248, + "step": 111350 + }, + { + "epoch": 0.57475712126137, + "grad_norm": 23610.16015625, + "learning_rate": 5.445150676694548e-05, + "loss": 0.4245, + "step": 111400 + }, + { + "epoch": 0.5750150912439829, + "grad_norm": 24598.2109375, + "learning_rate": 5.441512049865221e-05, + "loss": 0.4199, + "step": 111450 + }, + { + "epoch": 0.5752730612265957, + "grad_norm": 24330.02734375, + "learning_rate": 5.43787318736531e-05, + "loss": 0.423, + "step": 111500 + }, + { + "epoch": 0.5755310312092085, + "grad_norm": 23434.587890625, + "learning_rate": 5.434234091137171e-05, + "loss": 0.4214, + "step": 111550 + }, + { + "epoch": 0.5757890011918213, + "grad_norm": 25007.08203125, + "learning_rate": 5.430594763123283e-05, + "loss": 0.4258, + "step": 111600 + }, + { + "epoch": 0.5760469711744342, + "grad_norm": 24568.759765625, + "learning_rate": 5.4269552052662486e-05, + "loss": 0.4248, + "step": 111650 + }, + { + "epoch": 0.576304941157047, + "grad_norm": 22131.74609375, + "learning_rate": 5.423315419508792e-05, + "loss": 0.418, + "step": 111700 + }, + { + "epoch": 0.5765629111396597, + "grad_norm": 22058.443359375, + "learning_rate": 5.4196754077937626e-05, + "loss": 0.4289, + "step": 111750 + }, + { + "epoch": 0.5768208811222726, + "grad_norm": 23790.3203125, + "learning_rate": 5.4160351720641276e-05, + "loss": 0.4217, + "step": 111800 + }, + { + "epoch": 0.5770788511048854, + "grad_norm": 20349.287109375, + "learning_rate": 5.412394714262974e-05, + "loss": 0.4154, + "step": 111850 + }, + { + "epoch": 0.5773368210874983, + "grad_norm": 20262.9296875, + "learning_rate": 5.408754036333506e-05, + "loss": 0.4214, + "step": 111900 + }, + { + "epoch": 0.5775947910701111, + "grad_norm": 21678.17578125, + "learning_rate": 5.4051131402190494e-05, + "loss": 0.4168, + "step": 111950 + }, + { + "epoch": 0.5778527610527239, + "grad_norm": 22649.1875, + "learning_rate": 5.4014720278630415e-05, + "loss": 0.4206, + "step": 112000 + }, + { + "epoch": 0.5781107310353367, + "grad_norm": 22304.1328125, + "learning_rate": 5.39783070120904e-05, + "loss": 0.4199, + "step": 112050 + }, + { + "epoch": 0.5783687010179496, + "grad_norm": 22573.169921875, + "learning_rate": 5.394189162200715e-05, + "loss": 0.4325, + "step": 112100 + }, + { + "epoch": 0.5786266710005624, + "grad_norm": 22942.09765625, + "learning_rate": 5.390547412781847e-05, + "loss": 0.416, + "step": 112150 + }, + { + "epoch": 0.5788846409831752, + "grad_norm": 20210.18359375, + "learning_rate": 5.386905454896333e-05, + "loss": 0.4274, + "step": 112200 + }, + { + "epoch": 0.579142610965788, + "grad_norm": 22916.09375, + "learning_rate": 5.38326329048818e-05, + "loss": 0.4208, + "step": 112250 + }, + { + "epoch": 0.5794005809484009, + "grad_norm": 21563.78125, + "learning_rate": 5.379620921501503e-05, + "loss": 0.4264, + "step": 112300 + }, + { + "epoch": 0.5796585509310137, + "grad_norm": 20984.3671875, + "learning_rate": 5.375978349880528e-05, + "loss": 0.4229, + "step": 112350 + }, + { + "epoch": 0.5799165209136264, + "grad_norm": 22014.6640625, + "learning_rate": 5.372335577569592e-05, + "loss": 0.4205, + "step": 112400 + }, + { + "epoch": 0.5801744908962393, + "grad_norm": 22977.39453125, + "learning_rate": 5.3686926065131325e-05, + "loss": 0.4248, + "step": 112450 + }, + { + "epoch": 0.5804324608788521, + "grad_norm": 22589.521484375, + "learning_rate": 5.365049438655702e-05, + "loss": 0.4165, + "step": 112500 + }, + { + "epoch": 0.580690430861465, + "grad_norm": 24455.625, + "learning_rate": 5.3614060759419474e-05, + "loss": 0.4224, + "step": 112550 + }, + { + "epoch": 0.5809484008440777, + "grad_norm": 24485.833984375, + "learning_rate": 5.357762520316628e-05, + "loss": 0.4264, + "step": 112600 + }, + { + "epoch": 0.5812063708266906, + "grad_norm": 23294.244140625, + "learning_rate": 5.354118773724603e-05, + "loss": 0.4254, + "step": 112650 + }, + { + "epoch": 0.5814643408093034, + "grad_norm": 21813.884765625, + "learning_rate": 5.350474838110835e-05, + "loss": 0.4226, + "step": 112700 + }, + { + "epoch": 0.5817223107919163, + "grad_norm": 23532.0546875, + "learning_rate": 5.3468307154203836e-05, + "loss": 0.4236, + "step": 112750 + }, + { + "epoch": 0.5819802807745291, + "grad_norm": 24739.787109375, + "learning_rate": 5.343186407598413e-05, + "loss": 0.4276, + "step": 112800 + }, + { + "epoch": 0.5822382507571419, + "grad_norm": 23312.783203125, + "learning_rate": 5.339541916590184e-05, + "loss": 0.4281, + "step": 112850 + }, + { + "epoch": 0.5824962207397547, + "grad_norm": 24166.798828125, + "learning_rate": 5.335897244341054e-05, + "loss": 0.4238, + "step": 112900 + }, + { + "epoch": 0.5827541907223676, + "grad_norm": 23690.455078125, + "learning_rate": 5.332252392796478e-05, + "loss": 0.4181, + "step": 112950 + }, + { + "epoch": 0.5830121607049804, + "grad_norm": 23499.16015625, + "learning_rate": 5.32860736390201e-05, + "loss": 0.4143, + "step": 113000 + }, + { + "epoch": 0.5832701306875931, + "grad_norm": 23299.5625, + "learning_rate": 5.324962159603294e-05, + "loss": 0.4198, + "step": 113050 + }, + { + "epoch": 0.583528100670206, + "grad_norm": 22958.423828125, + "learning_rate": 5.321316781846071e-05, + "loss": 0.421, + "step": 113100 + }, + { + "epoch": 0.5837860706528188, + "grad_norm": 20775.119140625, + "learning_rate": 5.3176712325761704e-05, + "loss": 0.4148, + "step": 113150 + }, + { + "epoch": 0.5840440406354317, + "grad_norm": 23139.953125, + "learning_rate": 5.3140255137395155e-05, + "loss": 0.422, + "step": 113200 + }, + { + "epoch": 0.5843020106180444, + "grad_norm": 19829.94140625, + "learning_rate": 5.310379627282125e-05, + "loss": 0.4248, + "step": 113250 + }, + { + "epoch": 0.5845599806006573, + "grad_norm": 20085.572265625, + "learning_rate": 5.306733575150099e-05, + "loss": 0.4183, + "step": 113300 + }, + { + "epoch": 0.5848179505832701, + "grad_norm": 23206.27734375, + "learning_rate": 5.303087359289629e-05, + "loss": 0.423, + "step": 113350 + }, + { + "epoch": 0.585075920565883, + "grad_norm": 25039.34765625, + "learning_rate": 5.299440981646996e-05, + "loss": 0.4232, + "step": 113400 + }, + { + "epoch": 0.5853338905484958, + "grad_norm": 21276.865234375, + "learning_rate": 5.2957944441685646e-05, + "loss": 0.4205, + "step": 113450 + }, + { + "epoch": 0.5855918605311086, + "grad_norm": 22706.712890625, + "learning_rate": 5.292147748800788e-05, + "loss": 0.4225, + "step": 113500 + }, + { + "epoch": 0.5858498305137214, + "grad_norm": 18046.537109375, + "learning_rate": 5.2885008974902004e-05, + "loss": 0.4195, + "step": 113550 + }, + { + "epoch": 0.5861078004963343, + "grad_norm": 22363.5625, + "learning_rate": 5.28485389218342e-05, + "loss": 0.4149, + "step": 113600 + }, + { + "epoch": 0.5863657704789471, + "grad_norm": 24409.609375, + "learning_rate": 5.2812067348271466e-05, + "loss": 0.4224, + "step": 113650 + }, + { + "epoch": 0.5866237404615599, + "grad_norm": 23921.68359375, + "learning_rate": 5.277559427368164e-05, + "loss": 0.4274, + "step": 113700 + }, + { + "epoch": 0.5868817104441727, + "grad_norm": 23887.84765625, + "learning_rate": 5.273911971753335e-05, + "loss": 0.4185, + "step": 113750 + }, + { + "epoch": 0.5871396804267855, + "grad_norm": 23169.423828125, + "learning_rate": 5.270264369929597e-05, + "loss": 0.4218, + "step": 113800 + }, + { + "epoch": 0.5873976504093984, + "grad_norm": 23339.57421875, + "learning_rate": 5.266616623843972e-05, + "loss": 0.4211, + "step": 113850 + }, + { + "epoch": 0.5876556203920111, + "grad_norm": 22072.59765625, + "learning_rate": 5.2629687354435576e-05, + "loss": 0.4191, + "step": 113900 + }, + { + "epoch": 0.587913590374624, + "grad_norm": 24308.357421875, + "learning_rate": 5.259320706675523e-05, + "loss": 0.4168, + "step": 113950 + }, + { + "epoch": 0.5881715603572368, + "grad_norm": 20896.5703125, + "learning_rate": 5.255672539487119e-05, + "loss": 0.4201, + "step": 114000 + }, + { + "epoch": 0.5884295303398497, + "grad_norm": 20070.814453125, + "learning_rate": 5.252024235825661e-05, + "loss": 0.4216, + "step": 114050 + }, + { + "epoch": 0.5886875003224625, + "grad_norm": 24864.869140625, + "learning_rate": 5.2483757976385486e-05, + "loss": 0.4269, + "step": 114100 + }, + { + "epoch": 0.5889454703050753, + "grad_norm": 24734.0234375, + "learning_rate": 5.2447272268732436e-05, + "loss": 0.4196, + "step": 114150 + }, + { + "epoch": 0.5892034402876881, + "grad_norm": 22383.212890625, + "learning_rate": 5.2410785254772856e-05, + "loss": 0.4171, + "step": 114200 + }, + { + "epoch": 0.589461410270301, + "grad_norm": 22587.44921875, + "learning_rate": 5.237429695398276e-05, + "loss": 0.4232, + "step": 114250 + }, + { + "epoch": 0.5897193802529138, + "grad_norm": 23357.263671875, + "learning_rate": 5.2337807385838935e-05, + "loss": 0.4241, + "step": 114300 + }, + { + "epoch": 0.5899773502355266, + "grad_norm": 24632.125, + "learning_rate": 5.23013165698188e-05, + "loss": 0.4154, + "step": 114350 + }, + { + "epoch": 0.5902353202181394, + "grad_norm": 23191.818359375, + "learning_rate": 5.226482452540045e-05, + "loss": 0.424, + "step": 114400 + }, + { + "epoch": 0.5904932902007523, + "grad_norm": 23649.560546875, + "learning_rate": 5.2228331272062626e-05, + "loss": 0.427, + "step": 114450 + }, + { + "epoch": 0.5907512601833651, + "grad_norm": 23992.169921875, + "learning_rate": 5.21918368292847e-05, + "loss": 0.4267, + "step": 114500 + }, + { + "epoch": 0.5910092301659778, + "grad_norm": 21792.041015625, + "learning_rate": 5.215534121654673e-05, + "loss": 0.4272, + "step": 114550 + }, + { + "epoch": 0.5912672001485907, + "grad_norm": 25516.345703125, + "learning_rate": 5.211884445332935e-05, + "loss": 0.4207, + "step": 114600 + }, + { + "epoch": 0.5915251701312035, + "grad_norm": 22557.25390625, + "learning_rate": 5.208234655911384e-05, + "loss": 0.4228, + "step": 114650 + }, + { + "epoch": 0.5917831401138164, + "grad_norm": 24185.09765625, + "learning_rate": 5.2045847553382045e-05, + "loss": 0.4226, + "step": 114700 + }, + { + "epoch": 0.5920411100964291, + "grad_norm": 20565.134765625, + "learning_rate": 5.200934745561643e-05, + "loss": 0.4274, + "step": 114750 + }, + { + "epoch": 0.592299080079042, + "grad_norm": 24019.0, + "learning_rate": 5.197284628530007e-05, + "loss": 0.4234, + "step": 114800 + }, + { + "epoch": 0.5925570500616548, + "grad_norm": 26129.01171875, + "learning_rate": 5.193634406191658e-05, + "loss": 0.418, + "step": 114850 + }, + { + "epoch": 0.5928150200442677, + "grad_norm": 25187.611328125, + "learning_rate": 5.18998408049501e-05, + "loss": 0.4213, + "step": 114900 + }, + { + "epoch": 0.5930729900268805, + "grad_norm": 20145.14453125, + "learning_rate": 5.186333653388539e-05, + "loss": 0.418, + "step": 114950 + }, + { + "epoch": 0.5933309600094933, + "grad_norm": 22472.322265625, + "learning_rate": 5.182683126820773e-05, + "loss": 0.4209, + "step": 115000 + }, + { + "epoch": 0.5933309600094933, + "eval_loss": 0.4084908068180084, + "eval_runtime": 3582.6916, + "eval_samples_per_second": 865.584, + "eval_steps_per_second": 1.691, + "step": 115000 + }, + { + "epoch": 0.5935889299921061, + "grad_norm": 22404.052734375, + "learning_rate": 5.179032502740291e-05, + "loss": 0.4147, + "step": 115050 + }, + { + "epoch": 0.593846899974719, + "grad_norm": 21242.705078125, + "learning_rate": 5.175381783095725e-05, + "loss": 0.4237, + "step": 115100 + }, + { + "epoch": 0.5941048699573318, + "grad_norm": 22416.06640625, + "learning_rate": 5.171730969835758e-05, + "loss": 0.4185, + "step": 115150 + }, + { + "epoch": 0.5943628399399445, + "grad_norm": 22231.525390625, + "learning_rate": 5.1680800649091276e-05, + "loss": 0.4227, + "step": 115200 + }, + { + "epoch": 0.5946208099225574, + "grad_norm": 22431.462890625, + "learning_rate": 5.164429070264613e-05, + "loss": 0.4225, + "step": 115250 + }, + { + "epoch": 0.5948787799051702, + "grad_norm": 26008.57421875, + "learning_rate": 5.160777987851044e-05, + "loss": 0.4253, + "step": 115300 + }, + { + "epoch": 0.5951367498877831, + "grad_norm": 22555.501953125, + "learning_rate": 5.157126819617297e-05, + "loss": 0.4181, + "step": 115350 + }, + { + "epoch": 0.5953947198703958, + "grad_norm": 25113.587890625, + "learning_rate": 5.153475567512298e-05, + "loss": 0.4261, + "step": 115400 + }, + { + "epoch": 0.5956526898530087, + "grad_norm": 22877.908203125, + "learning_rate": 5.149824233485013e-05, + "loss": 0.4177, + "step": 115450 + }, + { + "epoch": 0.5959106598356215, + "grad_norm": 22468.34375, + "learning_rate": 5.1461728194844526e-05, + "loss": 0.4223, + "step": 115500 + }, + { + "epoch": 0.5961686298182344, + "grad_norm": 24525.326171875, + "learning_rate": 5.142521327459672e-05, + "loss": 0.4159, + "step": 115550 + }, + { + "epoch": 0.5964265998008472, + "grad_norm": 23334.296875, + "learning_rate": 5.1388697593597643e-05, + "loss": 0.4206, + "step": 115600 + }, + { + "epoch": 0.59668456978346, + "grad_norm": 21743.333984375, + "learning_rate": 5.1352181171338706e-05, + "loss": 0.4191, + "step": 115650 + }, + { + "epoch": 0.5969425397660728, + "grad_norm": 26287.66796875, + "learning_rate": 5.131566402731165e-05, + "loss": 0.4147, + "step": 115700 + }, + { + "epoch": 0.5972005097486857, + "grad_norm": 20856.890625, + "learning_rate": 5.12791461810086e-05, + "loss": 0.4248, + "step": 115750 + }, + { + "epoch": 0.5974584797312985, + "grad_norm": 22821.73046875, + "learning_rate": 5.124262765192208e-05, + "loss": 0.4239, + "step": 115800 + }, + { + "epoch": 0.5977164497139112, + "grad_norm": 24805.427734375, + "learning_rate": 5.1206108459545e-05, + "loss": 0.4172, + "step": 115850 + }, + { + "epoch": 0.5979744196965241, + "grad_norm": 25195.064453125, + "learning_rate": 5.116958862337057e-05, + "loss": 0.4242, + "step": 115900 + }, + { + "epoch": 0.5982323896791369, + "grad_norm": 22029.236328125, + "learning_rate": 5.1133068162892383e-05, + "loss": 0.4217, + "step": 115950 + }, + { + "epoch": 0.5984903596617498, + "grad_norm": 23296.77734375, + "learning_rate": 5.109654709760434e-05, + "loss": 0.4223, + "step": 116000 + }, + { + "epoch": 0.5987483296443625, + "grad_norm": 23822.447265625, + "learning_rate": 5.106002544700069e-05, + "loss": 0.4235, + "step": 116050 + }, + { + "epoch": 0.5990062996269754, + "grad_norm": 21188.46875, + "learning_rate": 5.1023503230576e-05, + "loss": 0.4275, + "step": 116100 + }, + { + "epoch": 0.5992642696095882, + "grad_norm": 24459.021484375, + "learning_rate": 5.0986980467825096e-05, + "loss": 0.4217, + "step": 116150 + }, + { + "epoch": 0.5995222395922011, + "grad_norm": 22304.396484375, + "learning_rate": 5.095045717824313e-05, + "loss": 0.42, + "step": 116200 + }, + { + "epoch": 0.5997802095748139, + "grad_norm": 20124.943359375, + "learning_rate": 5.0913933381325516e-05, + "loss": 0.4149, + "step": 116250 + }, + { + "epoch": 0.6000381795574267, + "grad_norm": 22610.3046875, + "learning_rate": 5.087740909656798e-05, + "loss": 0.4164, + "step": 116300 + }, + { + "epoch": 0.6002961495400395, + "grad_norm": 22058.974609375, + "learning_rate": 5.084088434346645e-05, + "loss": 0.4211, + "step": 116350 + }, + { + "epoch": 0.6005541195226524, + "grad_norm": 23463.626953125, + "learning_rate": 5.0804359141517134e-05, + "loss": 0.4182, + "step": 116400 + }, + { + "epoch": 0.6008120895052652, + "grad_norm": 25045.67578125, + "learning_rate": 5.076783351021648e-05, + "loss": 0.4202, + "step": 116450 + }, + { + "epoch": 0.601070059487878, + "grad_norm": 22583.076171875, + "learning_rate": 5.0731307469061184e-05, + "loss": 0.4177, + "step": 116500 + }, + { + "epoch": 0.6013280294704908, + "grad_norm": 26350.400390625, + "learning_rate": 5.069478103754811e-05, + "loss": 0.4193, + "step": 116550 + }, + { + "epoch": 0.6015859994531036, + "grad_norm": 21430.255859375, + "learning_rate": 5.0658254235174385e-05, + "loss": 0.422, + "step": 116600 + }, + { + "epoch": 0.6018439694357165, + "grad_norm": 21467.482421875, + "learning_rate": 5.0621727081437275e-05, + "loss": 0.4157, + "step": 116650 + }, + { + "epoch": 0.6021019394183292, + "grad_norm": 25780.095703125, + "learning_rate": 5.05851995958343e-05, + "loss": 0.4243, + "step": 116700 + }, + { + "epoch": 0.6023599094009421, + "grad_norm": 21074.52734375, + "learning_rate": 5.0548671797863125e-05, + "loss": 0.4271, + "step": 116750 + }, + { + "epoch": 0.6026178793835549, + "grad_norm": 25752.841796875, + "learning_rate": 5.051214370702155e-05, + "loss": 0.4209, + "step": 116800 + }, + { + "epoch": 0.6028758493661678, + "grad_norm": 23178.93359375, + "learning_rate": 5.047561534280758e-05, + "loss": 0.4232, + "step": 116850 + }, + { + "epoch": 0.6031338193487806, + "grad_norm": 23263.65625, + "learning_rate": 5.0439086724719355e-05, + "loss": 0.4196, + "step": 116900 + }, + { + "epoch": 0.6033917893313934, + "grad_norm": 20372.861328125, + "learning_rate": 5.040255787225514e-05, + "loss": 0.4194, + "step": 116950 + }, + { + "epoch": 0.6036497593140062, + "grad_norm": 23453.59375, + "learning_rate": 5.036602880491332e-05, + "loss": 0.4156, + "step": 117000 + }, + { + "epoch": 0.6039077292966191, + "grad_norm": 24039.392578125, + "learning_rate": 5.032949954219243e-05, + "loss": 0.4192, + "step": 117050 + }, + { + "epoch": 0.6041656992792319, + "grad_norm": 24246.55859375, + "learning_rate": 5.0292970103591044e-05, + "loss": 0.426, + "step": 117100 + }, + { + "epoch": 0.6044236692618447, + "grad_norm": 23403.130859375, + "learning_rate": 5.0256440508607904e-05, + "loss": 0.4195, + "step": 117150 + }, + { + "epoch": 0.6046816392444575, + "grad_norm": 21872.07421875, + "learning_rate": 5.021991077674179e-05, + "loss": 0.4214, + "step": 117200 + }, + { + "epoch": 0.6049396092270704, + "grad_norm": 22344.455078125, + "learning_rate": 5.018338092749155e-05, + "loss": 0.4205, + "step": 117250 + }, + { + "epoch": 0.6051975792096832, + "grad_norm": 22999.099609375, + "learning_rate": 5.014685098035612e-05, + "loss": 0.4203, + "step": 117300 + }, + { + "epoch": 0.6054555491922959, + "grad_norm": 21572.994140625, + "learning_rate": 5.011032095483448e-05, + "loss": 0.4279, + "step": 117350 + }, + { + "epoch": 0.6057135191749088, + "grad_norm": 21263.11328125, + "learning_rate": 5.007379087042566e-05, + "loss": 0.418, + "step": 117400 + }, + { + "epoch": 0.6059714891575216, + "grad_norm": 22789.671875, + "learning_rate": 5.00372607466287e-05, + "loss": 0.4196, + "step": 117450 + }, + { + "epoch": 0.6062294591401345, + "grad_norm": 21276.09765625, + "learning_rate": 5.000073060294267e-05, + "loss": 0.4125, + "step": 117500 + }, + { + "epoch": 0.6064874291227472, + "grad_norm": 22501.169921875, + "learning_rate": 4.9964200458866654e-05, + "loss": 0.4152, + "step": 117550 + }, + { + "epoch": 0.6067453991053601, + "grad_norm": 21645.912109375, + "learning_rate": 4.992767033389976e-05, + "loss": 0.4253, + "step": 117600 + }, + { + "epoch": 0.6070033690879729, + "grad_norm": 21256.7109375, + "learning_rate": 4.9891140247541025e-05, + "loss": 0.4214, + "step": 117650 + }, + { + "epoch": 0.6072613390705858, + "grad_norm": 22883.98046875, + "learning_rate": 4.985461021928952e-05, + "loss": 0.4238, + "step": 117700 + }, + { + "epoch": 0.6075193090531986, + "grad_norm": 21366.412109375, + "learning_rate": 4.981808026864426e-05, + "loss": 0.4225, + "step": 117750 + }, + { + "epoch": 0.6077772790358114, + "grad_norm": 24185.3515625, + "learning_rate": 4.978155041510425e-05, + "loss": 0.4196, + "step": 117800 + }, + { + "epoch": 0.6080352490184242, + "grad_norm": 21638.009765625, + "learning_rate": 4.974502067816838e-05, + "loss": 0.4221, + "step": 117850 + }, + { + "epoch": 0.608293219001037, + "grad_norm": 20867.111328125, + "learning_rate": 4.970849107733554e-05, + "loss": 0.4225, + "step": 117900 + }, + { + "epoch": 0.6085511889836499, + "grad_norm": 21785.69140625, + "learning_rate": 4.967196163210451e-05, + "loss": 0.4166, + "step": 117950 + }, + { + "epoch": 0.6088091589662626, + "grad_norm": 24691.8515625, + "learning_rate": 4.963543236197401e-05, + "loss": 0.4226, + "step": 118000 + }, + { + "epoch": 0.6090671289488755, + "grad_norm": 21214.1484375, + "learning_rate": 4.9598903286442675e-05, + "loss": 0.418, + "step": 118050 + }, + { + "epoch": 0.6093250989314883, + "grad_norm": 22802.849609375, + "learning_rate": 4.956237442500898e-05, + "loss": 0.4227, + "step": 118100 + }, + { + "epoch": 0.6095830689141012, + "grad_norm": 25204.90625, + "learning_rate": 4.952584579717135e-05, + "loss": 0.4152, + "step": 118150 + }, + { + "epoch": 0.6098410388967139, + "grad_norm": 21970.19140625, + "learning_rate": 4.9489317422428044e-05, + "loss": 0.4197, + "step": 118200 + }, + { + "epoch": 0.6100990088793268, + "grad_norm": 22331.013671875, + "learning_rate": 4.945278932027723e-05, + "loss": 0.4161, + "step": 118250 + }, + { + "epoch": 0.6103569788619396, + "grad_norm": 27234.177734375, + "learning_rate": 4.941626151021686e-05, + "loss": 0.4204, + "step": 118300 + }, + { + "epoch": 0.6106149488445525, + "grad_norm": 22253.0078125, + "learning_rate": 4.937973401174481e-05, + "loss": 0.4202, + "step": 118350 + }, + { + "epoch": 0.6108729188271653, + "grad_norm": 20930.27734375, + "learning_rate": 4.934320684435871e-05, + "loss": 0.4169, + "step": 118400 + }, + { + "epoch": 0.6111308888097781, + "grad_norm": 22569.205078125, + "learning_rate": 4.930668002755609e-05, + "loss": 0.4177, + "step": 118450 + }, + { + "epoch": 0.6113888587923909, + "grad_norm": 23197.943359375, + "learning_rate": 4.9270153580834256e-05, + "loss": 0.414, + "step": 118500 + }, + { + "epoch": 0.6116468287750038, + "grad_norm": 21927.1875, + "learning_rate": 4.923362752369029e-05, + "loss": 0.4203, + "step": 118550 + }, + { + "epoch": 0.6119047987576166, + "grad_norm": 23422.181640625, + "learning_rate": 4.919710187562112e-05, + "loss": 0.4213, + "step": 118600 + }, + { + "epoch": 0.6121627687402293, + "grad_norm": 23351.67578125, + "learning_rate": 4.9160576656123416e-05, + "loss": 0.4213, + "step": 118650 + }, + { + "epoch": 0.6124207387228422, + "grad_norm": 21228.416015625, + "learning_rate": 4.9124051884693664e-05, + "loss": 0.4192, + "step": 118700 + }, + { + "epoch": 0.612678708705455, + "grad_norm": 22555.9609375, + "learning_rate": 4.908752758082802e-05, + "loss": 0.4189, + "step": 118750 + }, + { + "epoch": 0.6129366786880679, + "grad_norm": 21010.859375, + "learning_rate": 4.905100376402251e-05, + "loss": 0.4194, + "step": 118800 + }, + { + "epoch": 0.6131946486706806, + "grad_norm": 23468.78515625, + "learning_rate": 4.901448045377279e-05, + "loss": 0.4151, + "step": 118850 + }, + { + "epoch": 0.6134526186532935, + "grad_norm": 23818.189453125, + "learning_rate": 4.8977957669574334e-05, + "loss": 0.4184, + "step": 118900 + }, + { + "epoch": 0.6137105886359063, + "grad_norm": 22162.76171875, + "learning_rate": 4.8941435430922294e-05, + "loss": 0.4181, + "step": 118950 + }, + { + "epoch": 0.6139685586185192, + "grad_norm": 22983.45703125, + "learning_rate": 4.8904913757311506e-05, + "loss": 0.4196, + "step": 119000 + }, + { + "epoch": 0.614226528601132, + "grad_norm": 22748.150390625, + "learning_rate": 4.886839266823656e-05, + "loss": 0.4195, + "step": 119050 + }, + { + "epoch": 0.6144844985837448, + "grad_norm": 23146.306640625, + "learning_rate": 4.8831872183191684e-05, + "loss": 0.4219, + "step": 119100 + }, + { + "epoch": 0.6147424685663576, + "grad_norm": 24951.591796875, + "learning_rate": 4.879535232167084e-05, + "loss": 0.4165, + "step": 119150 + }, + { + "epoch": 0.6150004385489705, + "grad_norm": 24381.689453125, + "learning_rate": 4.875883310316758e-05, + "loss": 0.4179, + "step": 119200 + }, + { + "epoch": 0.6152584085315833, + "grad_norm": 21191.4609375, + "learning_rate": 4.872231454717518e-05, + "loss": 0.4155, + "step": 119250 + }, + { + "epoch": 0.615516378514196, + "grad_norm": 21586.84375, + "learning_rate": 4.8685796673186526e-05, + "loss": 0.412, + "step": 119300 + }, + { + "epoch": 0.6157743484968089, + "grad_norm": 20381.505859375, + "learning_rate": 4.864927950069416e-05, + "loss": 0.4171, + "step": 119350 + }, + { + "epoch": 0.6160323184794217, + "grad_norm": 23258.296875, + "learning_rate": 4.861276304919026e-05, + "loss": 0.418, + "step": 119400 + }, + { + "epoch": 0.6162902884620346, + "grad_norm": 23629.14453125, + "learning_rate": 4.857624733816657e-05, + "loss": 0.4221, + "step": 119450 + }, + { + "epoch": 0.6165482584446473, + "grad_norm": 22892.7734375, + "learning_rate": 4.853973238711449e-05, + "loss": 0.4278, + "step": 119500 + }, + { + "epoch": 0.6168062284272602, + "grad_norm": 21639.669921875, + "learning_rate": 4.850321821552497e-05, + "loss": 0.4224, + "step": 119550 + }, + { + "epoch": 0.617064198409873, + "grad_norm": 21392.951171875, + "learning_rate": 4.84667048428886e-05, + "loss": 0.4192, + "step": 119600 + }, + { + "epoch": 0.6173221683924859, + "grad_norm": 22603.51953125, + "learning_rate": 4.843019228869548e-05, + "loss": 0.4169, + "step": 119650 + }, + { + "epoch": 0.6175801383750986, + "grad_norm": 22470.62109375, + "learning_rate": 4.8393680572435324e-05, + "loss": 0.4175, + "step": 119700 + }, + { + "epoch": 0.6178381083577115, + "grad_norm": 26185.634765625, + "learning_rate": 4.835716971359737e-05, + "loss": 0.4208, + "step": 119750 + }, + { + "epoch": 0.6180960783403243, + "grad_norm": 21508.12109375, + "learning_rate": 4.832065973167041e-05, + "loss": 0.4194, + "step": 119800 + }, + { + "epoch": 0.6183540483229372, + "grad_norm": 20717.205078125, + "learning_rate": 4.8284150646142784e-05, + "loss": 0.424, + "step": 119850 + }, + { + "epoch": 0.61861201830555, + "grad_norm": 20015.30078125, + "learning_rate": 4.8247642476502284e-05, + "loss": 0.4189, + "step": 119900 + }, + { + "epoch": 0.6188699882881628, + "grad_norm": 21596.349609375, + "learning_rate": 4.821113524223634e-05, + "loss": 0.4218, + "step": 119950 + }, + { + "epoch": 0.6191279582707756, + "grad_norm": 22051.921875, + "learning_rate": 4.817462896283173e-05, + "loss": 0.4184, + "step": 120000 + }, + { + "epoch": 0.6191279582707756, + "eval_loss": 0.40621376037597656, + "eval_runtime": 3588.5932, + "eval_samples_per_second": 864.16, + "eval_steps_per_second": 1.688, + "step": 120000 + }, + { + "epoch": 0.6193859282533885, + "grad_norm": 22562.478515625, + "learning_rate": 4.813812365777486e-05, + "loss": 0.4171, + "step": 120050 + }, + { + "epoch": 0.6196438982360013, + "grad_norm": 22531.505859375, + "learning_rate": 4.81016193465515e-05, + "loss": 0.4171, + "step": 120100 + }, + { + "epoch": 0.619901868218614, + "grad_norm": 21869.177734375, + "learning_rate": 4.8065116048647005e-05, + "loss": 0.4184, + "step": 120150 + }, + { + "epoch": 0.6201598382012269, + "grad_norm": 23087.56640625, + "learning_rate": 4.802861378354607e-05, + "loss": 0.4177, + "step": 120200 + }, + { + "epoch": 0.6204178081838397, + "grad_norm": 22546.060546875, + "learning_rate": 4.7992112570732925e-05, + "loss": 0.4213, + "step": 120250 + }, + { + "epoch": 0.6206757781664526, + "grad_norm": 22802.8984375, + "learning_rate": 4.795561242969122e-05, + "loss": 0.4218, + "step": 120300 + }, + { + "epoch": 0.6209337481490653, + "grad_norm": 19467.32421875, + "learning_rate": 4.791911337990401e-05, + "loss": 0.4141, + "step": 120350 + }, + { + "epoch": 0.6211917181316782, + "grad_norm": 25076.169921875, + "learning_rate": 4.78826154408538e-05, + "loss": 0.4178, + "step": 120400 + }, + { + "epoch": 0.621449688114291, + "grad_norm": 20815.935546875, + "learning_rate": 4.784611863202244e-05, + "loss": 0.4217, + "step": 120450 + }, + { + "epoch": 0.6217076580969039, + "grad_norm": 21686.271484375, + "learning_rate": 4.780962297289126e-05, + "loss": 0.4124, + "step": 120500 + }, + { + "epoch": 0.6219656280795167, + "grad_norm": 22759.310546875, + "learning_rate": 4.777312848294092e-05, + "loss": 0.4159, + "step": 120550 + }, + { + "epoch": 0.6222235980621295, + "grad_norm": 25325.75390625, + "learning_rate": 4.773663518165148e-05, + "loss": 0.4176, + "step": 120600 + }, + { + "epoch": 0.6224815680447423, + "grad_norm": 23474.958984375, + "learning_rate": 4.7700143088502344e-05, + "loss": 0.4143, + "step": 120650 + }, + { + "epoch": 0.6227395380273552, + "grad_norm": 25355.40625, + "learning_rate": 4.766365222297229e-05, + "loss": 0.4262, + "step": 120700 + }, + { + "epoch": 0.622997508009968, + "grad_norm": 22215.14453125, + "learning_rate": 4.762716260453945e-05, + "loss": 0.4149, + "step": 120750 + }, + { + "epoch": 0.6232554779925807, + "grad_norm": 23521.607421875, + "learning_rate": 4.759067425268126e-05, + "loss": 0.4223, + "step": 120800 + }, + { + "epoch": 0.6235134479751936, + "grad_norm": 24524.02734375, + "learning_rate": 4.7554187186874513e-05, + "loss": 0.4256, + "step": 120850 + }, + { + "epoch": 0.6237714179578064, + "grad_norm": 19954.322265625, + "learning_rate": 4.7517701426595266e-05, + "loss": 0.4119, + "step": 120900 + }, + { + "epoch": 0.6240293879404193, + "grad_norm": 21612.1953125, + "learning_rate": 4.748121699131893e-05, + "loss": 0.4196, + "step": 120950 + }, + { + "epoch": 0.624287357923032, + "grad_norm": 20466.0, + "learning_rate": 4.744473390052019e-05, + "loss": 0.4181, + "step": 121000 + }, + { + "epoch": 0.6245453279056449, + "grad_norm": 19992.173828125, + "learning_rate": 4.740825217367304e-05, + "loss": 0.4159, + "step": 121050 + }, + { + "epoch": 0.6248032978882577, + "grad_norm": 21553.1328125, + "learning_rate": 4.737177183025067e-05, + "loss": 0.4157, + "step": 121100 + }, + { + "epoch": 0.6250612678708706, + "grad_norm": 22242.568359375, + "learning_rate": 4.73352928897256e-05, + "loss": 0.4153, + "step": 121150 + }, + { + "epoch": 0.6253192378534834, + "grad_norm": 23883.212890625, + "learning_rate": 4.7298815371569606e-05, + "loss": 0.4173, + "step": 121200 + }, + { + "epoch": 0.6255772078360962, + "grad_norm": 22386.505859375, + "learning_rate": 4.7262339295253645e-05, + "loss": 0.4178, + "step": 121250 + }, + { + "epoch": 0.625835177818709, + "grad_norm": 22051.859375, + "learning_rate": 4.722586468024797e-05, + "loss": 0.4111, + "step": 121300 + }, + { + "epoch": 0.6260931478013219, + "grad_norm": 21374.4765625, + "learning_rate": 4.7189391546021996e-05, + "loss": 0.418, + "step": 121350 + }, + { + "epoch": 0.6263511177839347, + "grad_norm": 22240.453125, + "learning_rate": 4.7152919912044406e-05, + "loss": 0.4196, + "step": 121400 + }, + { + "epoch": 0.6266090877665474, + "grad_norm": 26278.798828125, + "learning_rate": 4.711644979778302e-05, + "loss": 0.4165, + "step": 121450 + }, + { + "epoch": 0.6268670577491603, + "grad_norm": 22151.77734375, + "learning_rate": 4.707998122270492e-05, + "loss": 0.422, + "step": 121500 + }, + { + "epoch": 0.6271250277317731, + "grad_norm": 21278.99609375, + "learning_rate": 4.7043514206276276e-05, + "loss": 0.4202, + "step": 121550 + }, + { + "epoch": 0.627382997714386, + "grad_norm": 24062.6015625, + "learning_rate": 4.70070487679625e-05, + "loss": 0.4174, + "step": 121600 + }, + { + "epoch": 0.6276409676969987, + "grad_norm": 21124.400390625, + "learning_rate": 4.697058492722815e-05, + "loss": 0.4156, + "step": 121650 + }, + { + "epoch": 0.6278989376796116, + "grad_norm": 22513.48046875, + "learning_rate": 4.6934122703536894e-05, + "loss": 0.4198, + "step": 121700 + }, + { + "epoch": 0.6281569076622244, + "grad_norm": 24250.720703125, + "learning_rate": 4.689766211635159e-05, + "loss": 0.4197, + "step": 121750 + }, + { + "epoch": 0.6284148776448373, + "grad_norm": 23831.220703125, + "learning_rate": 4.686120318513415e-05, + "loss": 0.415, + "step": 121800 + }, + { + "epoch": 0.62867284762745, + "grad_norm": 24005.458984375, + "learning_rate": 4.682474592934569e-05, + "loss": 0.4154, + "step": 121850 + }, + { + "epoch": 0.6289308176100629, + "grad_norm": 21365.09375, + "learning_rate": 4.6788290368446355e-05, + "loss": 0.4164, + "step": 121900 + }, + { + "epoch": 0.6291887875926757, + "grad_norm": 23601.689453125, + "learning_rate": 4.675183652189545e-05, + "loss": 0.418, + "step": 121950 + }, + { + "epoch": 0.6294467575752886, + "grad_norm": 21023.33203125, + "learning_rate": 4.671538440915129e-05, + "loss": 0.4181, + "step": 122000 + }, + { + "epoch": 0.6297047275579014, + "grad_norm": 22292.671875, + "learning_rate": 4.667893404967133e-05, + "loss": 0.4203, + "step": 122050 + }, + { + "epoch": 0.6299626975405141, + "grad_norm": 21975.3671875, + "learning_rate": 4.664248546291207e-05, + "loss": 0.4162, + "step": 122100 + }, + { + "epoch": 0.630220667523127, + "grad_norm": 22591.34765625, + "learning_rate": 4.660603866832906e-05, + "loss": 0.4146, + "step": 122150 + }, + { + "epoch": 0.6304786375057398, + "grad_norm": 23449.529296875, + "learning_rate": 4.6569593685376895e-05, + "loss": 0.4205, + "step": 122200 + }, + { + "epoch": 0.6307366074883527, + "grad_norm": 21614.046875, + "learning_rate": 4.653315053350918e-05, + "loss": 0.4173, + "step": 122250 + }, + { + "epoch": 0.6309945774709654, + "grad_norm": 26004.5859375, + "learning_rate": 4.649670923217859e-05, + "loss": 0.4137, + "step": 122300 + }, + { + "epoch": 0.6312525474535783, + "grad_norm": 23640.357421875, + "learning_rate": 4.646026980083676e-05, + "loss": 0.4165, + "step": 122350 + }, + { + "epoch": 0.6315105174361911, + "grad_norm": 23575.3984375, + "learning_rate": 4.6423832258934396e-05, + "loss": 0.4179, + "step": 122400 + }, + { + "epoch": 0.631768487418804, + "grad_norm": 26795.05078125, + "learning_rate": 4.63873966259211e-05, + "loss": 0.4171, + "step": 122450 + }, + { + "epoch": 0.6320264574014167, + "grad_norm": 22246.931640625, + "learning_rate": 4.6350962921245515e-05, + "loss": 0.4188, + "step": 122500 + }, + { + "epoch": 0.6322844273840296, + "grad_norm": 22268.3359375, + "learning_rate": 4.63145311643553e-05, + "loss": 0.4141, + "step": 122550 + }, + { + "epoch": 0.6325423973666424, + "grad_norm": 23749.38671875, + "learning_rate": 4.627810137469696e-05, + "loss": 0.4132, + "step": 122600 + }, + { + "epoch": 0.6328003673492553, + "grad_norm": 22449.15625, + "learning_rate": 4.624167357171606e-05, + "loss": 0.4177, + "step": 122650 + }, + { + "epoch": 0.6330583373318681, + "grad_norm": 22132.927734375, + "learning_rate": 4.6205247774857e-05, + "loss": 0.4211, + "step": 122700 + }, + { + "epoch": 0.6333163073144809, + "grad_norm": 20199.654296875, + "learning_rate": 4.616882400356323e-05, + "loss": 0.4127, + "step": 122750 + }, + { + "epoch": 0.6335742772970937, + "grad_norm": 23172.29296875, + "learning_rate": 4.613240227727699e-05, + "loss": 0.4173, + "step": 122800 + }, + { + "epoch": 0.6338322472797066, + "grad_norm": 23373.6640625, + "learning_rate": 4.609598261543954e-05, + "loss": 0.4139, + "step": 122850 + }, + { + "epoch": 0.6340902172623194, + "grad_norm": 22187.794921875, + "learning_rate": 4.6059565037490965e-05, + "loss": 0.4233, + "step": 122900 + }, + { + "epoch": 0.6343481872449321, + "grad_norm": 21762.28515625, + "learning_rate": 4.602314956287027e-05, + "loss": 0.4195, + "step": 122950 + }, + { + "epoch": 0.634606157227545, + "grad_norm": 24228.3125, + "learning_rate": 4.598673621101535e-05, + "loss": 0.4218, + "step": 123000 + }, + { + "epoch": 0.6348641272101578, + "grad_norm": 20360.208984375, + "learning_rate": 4.595032500136291e-05, + "loss": 0.4266, + "step": 123050 + }, + { + "epoch": 0.6351220971927707, + "grad_norm": 22763.712890625, + "learning_rate": 4.5913915953348574e-05, + "loss": 0.4153, + "step": 123100 + }, + { + "epoch": 0.6353800671753834, + "grad_norm": 25601.05078125, + "learning_rate": 4.5877509086406766e-05, + "loss": 0.4201, + "step": 123150 + }, + { + "epoch": 0.6356380371579963, + "grad_norm": 22695.91015625, + "learning_rate": 4.584110441997081e-05, + "loss": 0.4174, + "step": 123200 + }, + { + "epoch": 0.6358960071406091, + "grad_norm": 24915.857421875, + "learning_rate": 4.5804701973472755e-05, + "loss": 0.416, + "step": 123250 + }, + { + "epoch": 0.636153977123222, + "grad_norm": 24066.427734375, + "learning_rate": 4.576830176634356e-05, + "loss": 0.415, + "step": 123300 + }, + { + "epoch": 0.6364119471058348, + "grad_norm": 25726.71484375, + "learning_rate": 4.573190381801293e-05, + "loss": 0.4204, + "step": 123350 + }, + { + "epoch": 0.6366699170884476, + "grad_norm": 24271.998046875, + "learning_rate": 4.56955081479094e-05, + "loss": 0.4166, + "step": 123400 + }, + { + "epoch": 0.6369278870710604, + "grad_norm": 20897.818359375, + "learning_rate": 4.5659114775460286e-05, + "loss": 0.4156, + "step": 123450 + }, + { + "epoch": 0.6371858570536733, + "grad_norm": 24409.841796875, + "learning_rate": 4.562272372009163e-05, + "loss": 0.4208, + "step": 123500 + }, + { + "epoch": 0.6374438270362861, + "grad_norm": 24757.927734375, + "learning_rate": 4.5586335001228296e-05, + "loss": 0.4167, + "step": 123550 + }, + { + "epoch": 0.6377017970188988, + "grad_norm": 22433.091796875, + "learning_rate": 4.554994863829387e-05, + "loss": 0.4206, + "step": 123600 + }, + { + "epoch": 0.6379597670015117, + "grad_norm": 22757.798828125, + "learning_rate": 4.5513564650710706e-05, + "loss": 0.4113, + "step": 123650 + }, + { + "epoch": 0.6382177369841245, + "grad_norm": 22652.9140625, + "learning_rate": 4.547718305789984e-05, + "loss": 0.4224, + "step": 123700 + }, + { + "epoch": 0.6384757069667374, + "grad_norm": 25416.0390625, + "learning_rate": 4.5440803879281086e-05, + "loss": 0.4129, + "step": 123750 + }, + { + "epoch": 0.6387336769493501, + "grad_norm": 22621.40625, + "learning_rate": 4.5404427134272926e-05, + "loss": 0.4204, + "step": 123800 + }, + { + "epoch": 0.638991646931963, + "grad_norm": 24213.93359375, + "learning_rate": 4.536805284229258e-05, + "loss": 0.4109, + "step": 123850 + }, + { + "epoch": 0.6392496169145758, + "grad_norm": 20231.091796875, + "learning_rate": 4.5331681022755946e-05, + "loss": 0.4221, + "step": 123900 + }, + { + "epoch": 0.6395075868971887, + "grad_norm": 22513.21875, + "learning_rate": 4.529531169507757e-05, + "loss": 0.4189, + "step": 123950 + }, + { + "epoch": 0.6397655568798014, + "grad_norm": 19454.783203125, + "learning_rate": 4.5258944878670714e-05, + "loss": 0.4138, + "step": 124000 + }, + { + "epoch": 0.6400235268624143, + "grad_norm": 23547.423828125, + "learning_rate": 4.522258059294727e-05, + "loss": 0.4206, + "step": 124050 + }, + { + "epoch": 0.6402814968450271, + "grad_norm": 23985.0703125, + "learning_rate": 4.5186218857317825e-05, + "loss": 0.4186, + "step": 124100 + }, + { + "epoch": 0.64053946682764, + "grad_norm": 22254.078125, + "learning_rate": 4.5149859691191517e-05, + "loss": 0.4076, + "step": 124150 + }, + { + "epoch": 0.6407974368102528, + "grad_norm": 24060.70703125, + "learning_rate": 4.5113503113976194e-05, + "loss": 0.4207, + "step": 124200 + }, + { + "epoch": 0.6410554067928655, + "grad_norm": 21521.923828125, + "learning_rate": 4.5077149145078275e-05, + "loss": 0.4134, + "step": 124250 + }, + { + "epoch": 0.6413133767754784, + "grad_norm": 22107.48828125, + "learning_rate": 4.504079780390282e-05, + "loss": 0.4095, + "step": 124300 + }, + { + "epoch": 0.6415713467580912, + "grad_norm": 22610.880859375, + "learning_rate": 4.5004449109853485e-05, + "loss": 0.4216, + "step": 124350 + }, + { + "epoch": 0.6418293167407041, + "grad_norm": 22752.83984375, + "learning_rate": 4.496810308233247e-05, + "loss": 0.4225, + "step": 124400 + }, + { + "epoch": 0.6420872867233168, + "grad_norm": 22029.88671875, + "learning_rate": 4.4931759740740596e-05, + "loss": 0.4138, + "step": 124450 + }, + { + "epoch": 0.6423452567059297, + "grad_norm": 24989.2421875, + "learning_rate": 4.489541910447722e-05, + "loss": 0.4166, + "step": 124500 + }, + { + "epoch": 0.6426032266885425, + "grad_norm": 25843.16796875, + "learning_rate": 4.485908119294031e-05, + "loss": 0.4132, + "step": 124550 + }, + { + "epoch": 0.6428611966711554, + "grad_norm": 23847.01171875, + "learning_rate": 4.4822746025526286e-05, + "loss": 0.4256, + "step": 124600 + }, + { + "epoch": 0.6431191666537681, + "grad_norm": 21634.71484375, + "learning_rate": 4.478641362163019e-05, + "loss": 0.4182, + "step": 124650 + }, + { + "epoch": 0.643377136636381, + "grad_norm": 22252.021484375, + "learning_rate": 4.475008400064554e-05, + "loss": 0.419, + "step": 124700 + }, + { + "epoch": 0.6436351066189938, + "grad_norm": 24151.951171875, + "learning_rate": 4.471375718196439e-05, + "loss": 0.4201, + "step": 124750 + }, + { + "epoch": 0.6438930766016067, + "grad_norm": 23570.310546875, + "learning_rate": 4.4677433184977315e-05, + "loss": 0.4131, + "step": 124800 + }, + { + "epoch": 0.6441510465842195, + "grad_norm": 23886.896484375, + "learning_rate": 4.464111202907332e-05, + "loss": 0.4172, + "step": 124850 + }, + { + "epoch": 0.6444090165668322, + "grad_norm": 23476.888671875, + "learning_rate": 4.4604793733639973e-05, + "loss": 0.419, + "step": 124900 + }, + { + "epoch": 0.6446669865494451, + "grad_norm": 22735.759765625, + "learning_rate": 4.456847831806324e-05, + "loss": 0.4214, + "step": 124950 + }, + { + "epoch": 0.644924956532058, + "grad_norm": 25508.525390625, + "learning_rate": 4.4532165801727626e-05, + "loss": 0.4184, + "step": 125000 + }, + { + "epoch": 0.644924956532058, + "eval_loss": 0.40382638573646545, + "eval_runtime": 3215.6548, + "eval_samples_per_second": 964.382, + "eval_steps_per_second": 1.884, + "step": 125000 + }, + { + "epoch": 0.6451829265146708, + "grad_norm": 23686.8671875, + "learning_rate": 4.449585620401601e-05, + "loss": 0.4115, + "step": 125050 + }, + { + "epoch": 0.6454408964972835, + "grad_norm": 22472.7421875, + "learning_rate": 4.445954954430976e-05, + "loss": 0.4187, + "step": 125100 + }, + { + "epoch": 0.6456988664798964, + "grad_norm": 25044.5859375, + "learning_rate": 4.442324584198871e-05, + "loss": 0.4188, + "step": 125150 + }, + { + "epoch": 0.6459568364625092, + "grad_norm": 23489.119140625, + "learning_rate": 4.4386945116431025e-05, + "loss": 0.4212, + "step": 125200 + }, + { + "epoch": 0.6462148064451221, + "grad_norm": 23150.12109375, + "learning_rate": 4.435064738701335e-05, + "loss": 0.4155, + "step": 125250 + }, + { + "epoch": 0.6464727764277348, + "grad_norm": 22082.09765625, + "learning_rate": 4.4314352673110696e-05, + "loss": 0.4208, + "step": 125300 + }, + { + "epoch": 0.6467307464103477, + "grad_norm": 23107.71484375, + "learning_rate": 4.427806099409652e-05, + "loss": 0.4172, + "step": 125350 + }, + { + "epoch": 0.6469887163929605, + "grad_norm": 23660.607421875, + "learning_rate": 4.4241772369342554e-05, + "loss": 0.4156, + "step": 125400 + }, + { + "epoch": 0.6472466863755734, + "grad_norm": 22054.47265625, + "learning_rate": 4.420548681821901e-05, + "loss": 0.4174, + "step": 125450 + }, + { + "epoch": 0.6475046563581862, + "grad_norm": 22386.654296875, + "learning_rate": 4.416920436009439e-05, + "loss": 0.4164, + "step": 125500 + }, + { + "epoch": 0.647762626340799, + "grad_norm": 22394.78125, + "learning_rate": 4.413292501433557e-05, + "loss": 0.4128, + "step": 125550 + }, + { + "epoch": 0.6480205963234118, + "grad_norm": 21871.1953125, + "learning_rate": 4.4096648800307796e-05, + "loss": 0.4174, + "step": 125600 + }, + { + "epoch": 0.6482785663060247, + "grad_norm": 21630.826171875, + "learning_rate": 4.406037573737456e-05, + "loss": 0.4146, + "step": 125650 + }, + { + "epoch": 0.6485365362886375, + "grad_norm": 20917.244140625, + "learning_rate": 4.4024105844897744e-05, + "loss": 0.4172, + "step": 125700 + }, + { + "epoch": 0.6487945062712502, + "grad_norm": 21545.53515625, + "learning_rate": 4.3987839142237505e-05, + "loss": 0.4189, + "step": 125750 + }, + { + "epoch": 0.6490524762538631, + "grad_norm": 27708.19140625, + "learning_rate": 4.395157564875234e-05, + "loss": 0.4127, + "step": 125800 + }, + { + "epoch": 0.6493104462364759, + "grad_norm": 23791.052734375, + "learning_rate": 4.391531538379895e-05, + "loss": 0.4146, + "step": 125850 + }, + { + "epoch": 0.6495684162190888, + "grad_norm": 23441.0078125, + "learning_rate": 4.387905836673239e-05, + "loss": 0.4191, + "step": 125900 + }, + { + "epoch": 0.6498263862017015, + "grad_norm": 21998.982421875, + "learning_rate": 4.3842804616905944e-05, + "loss": 0.4165, + "step": 125950 + }, + { + "epoch": 0.6500843561843144, + "grad_norm": 26170.572265625, + "learning_rate": 4.380655415367116e-05, + "loss": 0.4106, + "step": 126000 + }, + { + "epoch": 0.6503423261669272, + "grad_norm": 23915.345703125, + "learning_rate": 4.3770306996377866e-05, + "loss": 0.417, + "step": 126050 + }, + { + "epoch": 0.6506002961495401, + "grad_norm": 22807.23828125, + "learning_rate": 4.373406316437404e-05, + "loss": 0.4138, + "step": 126100 + }, + { + "epoch": 0.6508582661321528, + "grad_norm": 22825.060546875, + "learning_rate": 4.369782267700598e-05, + "loss": 0.4159, + "step": 126150 + }, + { + "epoch": 0.6511162361147657, + "grad_norm": 21670.83984375, + "learning_rate": 4.366158555361812e-05, + "loss": 0.4131, + "step": 126200 + }, + { + "epoch": 0.6513742060973785, + "grad_norm": 24840.630859375, + "learning_rate": 4.362535181355319e-05, + "loss": 0.4072, + "step": 126250 + }, + { + "epoch": 0.6516321760799914, + "grad_norm": 24121.158203125, + "learning_rate": 4.358912147615199e-05, + "loss": 0.4085, + "step": 126300 + }, + { + "epoch": 0.6518901460626042, + "grad_norm": 21738.236328125, + "learning_rate": 4.355289456075363e-05, + "loss": 0.4154, + "step": 126350 + }, + { + "epoch": 0.6521481160452169, + "grad_norm": 24880.833984375, + "learning_rate": 4.3516671086695296e-05, + "loss": 0.4154, + "step": 126400 + }, + { + "epoch": 0.6524060860278298, + "grad_norm": 21572.140625, + "learning_rate": 4.348045107331239e-05, + "loss": 0.4185, + "step": 126450 + }, + { + "epoch": 0.6526640560104426, + "grad_norm": 24076.17578125, + "learning_rate": 4.344423453993849e-05, + "loss": 0.4132, + "step": 126500 + }, + { + "epoch": 0.6529220259930555, + "grad_norm": 23531.365234375, + "learning_rate": 4.340802150590522e-05, + "loss": 0.4179, + "step": 126550 + }, + { + "epoch": 0.6531799959756682, + "grad_norm": 24287.568359375, + "learning_rate": 4.337181199054243e-05, + "loss": 0.4136, + "step": 126600 + }, + { + "epoch": 0.6534379659582811, + "grad_norm": 23352.52734375, + "learning_rate": 4.3335606013178046e-05, + "loss": 0.4177, + "step": 126650 + }, + { + "epoch": 0.6536959359408939, + "grad_norm": 22291.494140625, + "learning_rate": 4.3299403593138144e-05, + "loss": 0.4155, + "step": 126700 + }, + { + "epoch": 0.6539539059235068, + "grad_norm": 20745.798828125, + "learning_rate": 4.3263204749746836e-05, + "loss": 0.4139, + "step": 126750 + }, + { + "epoch": 0.6542118759061195, + "grad_norm": 24670.357421875, + "learning_rate": 4.322700950232639e-05, + "loss": 0.423, + "step": 126800 + }, + { + "epoch": 0.6544698458887324, + "grad_norm": 23067.81640625, + "learning_rate": 4.31908178701971e-05, + "loss": 0.4174, + "step": 126850 + }, + { + "epoch": 0.6547278158713452, + "grad_norm": 25275.47265625, + "learning_rate": 4.315462987267739e-05, + "loss": 0.4181, + "step": 126900 + }, + { + "epoch": 0.6549857858539581, + "grad_norm": 21032.4375, + "learning_rate": 4.311844552908372e-05, + "loss": 0.4111, + "step": 126950 + }, + { + "epoch": 0.6552437558365709, + "grad_norm": 21629.0625, + "learning_rate": 4.308226485873056e-05, + "loss": 0.4129, + "step": 127000 + }, + { + "epoch": 0.6555017258191836, + "grad_norm": 24375.935546875, + "learning_rate": 4.3046087880930466e-05, + "loss": 0.4129, + "step": 127050 + }, + { + "epoch": 0.6557596958017965, + "grad_norm": 21224.63671875, + "learning_rate": 4.3009914614994e-05, + "loss": 0.4156, + "step": 127100 + }, + { + "epoch": 0.6560176657844093, + "grad_norm": 24836.560546875, + "learning_rate": 4.297374508022977e-05, + "loss": 0.4133, + "step": 127150 + }, + { + "epoch": 0.6562756357670222, + "grad_norm": 22769.599609375, + "learning_rate": 4.293757929594435e-05, + "loss": 0.4151, + "step": 127200 + }, + { + "epoch": 0.6565336057496349, + "grad_norm": 22936.603515625, + "learning_rate": 4.2901417281442345e-05, + "loss": 0.4173, + "step": 127250 + }, + { + "epoch": 0.6567915757322478, + "grad_norm": 21296.39453125, + "learning_rate": 4.286525905602634e-05, + "loss": 0.4121, + "step": 127300 + }, + { + "epoch": 0.6570495457148606, + "grad_norm": 24282.591796875, + "learning_rate": 4.282910463899689e-05, + "loss": 0.4086, + "step": 127350 + }, + { + "epoch": 0.6573075156974735, + "grad_norm": 22443.6015625, + "learning_rate": 4.2792954049652545e-05, + "loss": 0.4183, + "step": 127400 + }, + { + "epoch": 0.6575654856800862, + "grad_norm": 21437.98046875, + "learning_rate": 4.275680730728976e-05, + "loss": 0.4172, + "step": 127450 + }, + { + "epoch": 0.6578234556626991, + "grad_norm": 24970.3125, + "learning_rate": 4.2720664431202987e-05, + "loss": 0.4187, + "step": 127500 + }, + { + "epoch": 0.6580814256453119, + "grad_norm": 21128.349609375, + "learning_rate": 4.268452544068457e-05, + "loss": 0.4142, + "step": 127550 + }, + { + "epoch": 0.6583393956279248, + "grad_norm": 26429.14453125, + "learning_rate": 4.2648390355024836e-05, + "loss": 0.4115, + "step": 127600 + }, + { + "epoch": 0.6585973656105376, + "grad_norm": 22542.380859375, + "learning_rate": 4.261225919351195e-05, + "loss": 0.4144, + "step": 127650 + }, + { + "epoch": 0.6588553355931503, + "grad_norm": 23179.853515625, + "learning_rate": 4.257613197543207e-05, + "loss": 0.4164, + "step": 127700 + }, + { + "epoch": 0.6591133055757632, + "grad_norm": 24641.048828125, + "learning_rate": 4.254000872006918e-05, + "loss": 0.4175, + "step": 127750 + }, + { + "epoch": 0.659371275558376, + "grad_norm": 23836.771484375, + "learning_rate": 4.250388944670517e-05, + "loss": 0.4201, + "step": 127800 + }, + { + "epoch": 0.6596292455409889, + "grad_norm": 23714.7578125, + "learning_rate": 4.2467774174619836e-05, + "loss": 0.4102, + "step": 127850 + }, + { + "epoch": 0.6598872155236016, + "grad_norm": 23630.2890625, + "learning_rate": 4.2431662923090785e-05, + "loss": 0.411, + "step": 127900 + }, + { + "epoch": 0.6601451855062145, + "grad_norm": 23018.384765625, + "learning_rate": 4.239555571139353e-05, + "loss": 0.4113, + "step": 127950 + }, + { + "epoch": 0.6604031554888273, + "grad_norm": 23594.041015625, + "learning_rate": 4.235945255880137e-05, + "loss": 0.4153, + "step": 128000 + }, + { + "epoch": 0.6606611254714402, + "grad_norm": 24231.07421875, + "learning_rate": 4.232335348458549e-05, + "loss": 0.4159, + "step": 128050 + }, + { + "epoch": 0.6609190954540529, + "grad_norm": 22362.98828125, + "learning_rate": 4.228725850801486e-05, + "loss": 0.4218, + "step": 128100 + }, + { + "epoch": 0.6611770654366658, + "grad_norm": 23008.44140625, + "learning_rate": 4.225116764835631e-05, + "loss": 0.416, + "step": 128150 + }, + { + "epoch": 0.6614350354192786, + "grad_norm": 23027.1875, + "learning_rate": 4.221508092487441e-05, + "loss": 0.4163, + "step": 128200 + }, + { + "epoch": 0.6616930054018915, + "grad_norm": 25121.61328125, + "learning_rate": 4.2178998356831553e-05, + "loss": 0.4167, + "step": 128250 + }, + { + "epoch": 0.6619509753845043, + "grad_norm": 24767.4140625, + "learning_rate": 4.214291996348794e-05, + "loss": 0.4176, + "step": 128300 + }, + { + "epoch": 0.662208945367117, + "grad_norm": 24596.533203125, + "learning_rate": 4.210684576410151e-05, + "loss": 0.4183, + "step": 128350 + }, + { + "epoch": 0.6624669153497299, + "grad_norm": 21095.8671875, + "learning_rate": 4.2070775777927976e-05, + "loss": 0.4151, + "step": 128400 + }, + { + "epoch": 0.6627248853323428, + "grad_norm": 25389.1640625, + "learning_rate": 4.203471002422077e-05, + "loss": 0.4226, + "step": 128450 + }, + { + "epoch": 0.6629828553149556, + "grad_norm": 24613.94921875, + "learning_rate": 4.199864852223113e-05, + "loss": 0.4093, + "step": 128500 + }, + { + "epoch": 0.6632408252975683, + "grad_norm": 23665.59765625, + "learning_rate": 4.196259129120796e-05, + "loss": 0.4135, + "step": 128550 + }, + { + "epoch": 0.6634987952801812, + "grad_norm": 22946.5234375, + "learning_rate": 4.192653835039795e-05, + "loss": 0.4151, + "step": 128600 + }, + { + "epoch": 0.663756765262794, + "grad_norm": 22438.23046875, + "learning_rate": 4.189048971904541e-05, + "loss": 0.4064, + "step": 128650 + }, + { + "epoch": 0.6640147352454069, + "grad_norm": 22760.623046875, + "learning_rate": 4.185444541639243e-05, + "loss": 0.4084, + "step": 128700 + }, + { + "epoch": 0.6642727052280196, + "grad_norm": 25223.484375, + "learning_rate": 4.1818405461678763e-05, + "loss": 0.4151, + "step": 128750 + }, + { + "epoch": 0.6645306752106325, + "grad_norm": 31547.962890625, + "learning_rate": 4.178236987414182e-05, + "loss": 0.4115, + "step": 128800 + }, + { + "epoch": 0.6647886451932453, + "grad_norm": 19114.953125, + "learning_rate": 4.174633867301674e-05, + "loss": 0.4109, + "step": 128850 + }, + { + "epoch": 0.6650466151758582, + "grad_norm": 22819.888671875, + "learning_rate": 4.1710311877536226e-05, + "loss": 0.4123, + "step": 128900 + }, + { + "epoch": 0.6653045851584709, + "grad_norm": 22868.62890625, + "learning_rate": 4.167428950693073e-05, + "loss": 0.413, + "step": 128950 + }, + { + "epoch": 0.6655625551410838, + "grad_norm": 23062.359375, + "learning_rate": 4.163827158042826e-05, + "loss": 0.4152, + "step": 129000 + }, + { + "epoch": 0.6658205251236966, + "grad_norm": 25990.505859375, + "learning_rate": 4.160225811725453e-05, + "loss": 0.4176, + "step": 129050 + }, + { + "epoch": 0.6660784951063095, + "grad_norm": 21594.1953125, + "learning_rate": 4.156624913663279e-05, + "loss": 0.4136, + "step": 129100 + }, + { + "epoch": 0.6663364650889223, + "grad_norm": 21145.869140625, + "learning_rate": 4.153024465778393e-05, + "loss": 0.4216, + "step": 129150 + }, + { + "epoch": 0.666594435071535, + "grad_norm": 22634.7734375, + "learning_rate": 4.149424469992649e-05, + "loss": 0.4114, + "step": 129200 + }, + { + "epoch": 0.6668524050541479, + "grad_norm": 23526.46875, + "learning_rate": 4.145824928227652e-05, + "loss": 0.4217, + "step": 129250 + }, + { + "epoch": 0.6671103750367607, + "grad_norm": 22295.880859375, + "learning_rate": 4.142225842404769e-05, + "loss": 0.4169, + "step": 129300 + }, + { + "epoch": 0.6673683450193736, + "grad_norm": 22282.421875, + "learning_rate": 4.13862721444512e-05, + "loss": 0.4195, + "step": 129350 + }, + { + "epoch": 0.6676263150019863, + "grad_norm": 21856.337890625, + "learning_rate": 4.135029046269585e-05, + "loss": 0.4229, + "step": 129400 + }, + { + "epoch": 0.6678842849845992, + "grad_norm": 20999.04296875, + "learning_rate": 4.131431339798796e-05, + "loss": 0.4168, + "step": 129450 + }, + { + "epoch": 0.668142254967212, + "grad_norm": 24684.484375, + "learning_rate": 4.12783409695314e-05, + "loss": 0.4117, + "step": 129500 + }, + { + "epoch": 0.6684002249498249, + "grad_norm": 24120.349609375, + "learning_rate": 4.124237319652753e-05, + "loss": 0.4186, + "step": 129550 + }, + { + "epoch": 0.6686581949324376, + "grad_norm": 23283.736328125, + "learning_rate": 4.1206410098175265e-05, + "loss": 0.4176, + "step": 129600 + }, + { + "epoch": 0.6689161649150505, + "grad_norm": 21902.6875, + "learning_rate": 4.117045169367102e-05, + "loss": 0.4153, + "step": 129650 + }, + { + "epoch": 0.6691741348976633, + "grad_norm": 22762.6015625, + "learning_rate": 4.1134498002208674e-05, + "loss": 0.414, + "step": 129700 + }, + { + "epoch": 0.6694321048802762, + "grad_norm": 20947.083984375, + "learning_rate": 4.109854904297965e-05, + "loss": 0.4113, + "step": 129750 + }, + { + "epoch": 0.669690074862889, + "grad_norm": 24687.189453125, + "learning_rate": 4.106260483517276e-05, + "loss": 0.4207, + "step": 129800 + }, + { + "epoch": 0.6699480448455017, + "grad_norm": 24164.724609375, + "learning_rate": 4.102666539797435e-05, + "loss": 0.4116, + "step": 129850 + }, + { + "epoch": 0.6702060148281146, + "grad_norm": 23408.68359375, + "learning_rate": 4.099073075056818e-05, + "loss": 0.4181, + "step": 129900 + }, + { + "epoch": 0.6704639848107274, + "grad_norm": 22822.3515625, + "learning_rate": 4.0954800912135516e-05, + "loss": 0.4176, + "step": 129950 + }, + { + "epoch": 0.6707219547933403, + "grad_norm": 21576.173828125, + "learning_rate": 4.091887590185494e-05, + "loss": 0.4165, + "step": 130000 + }, + { + "epoch": 0.6707219547933403, + "eval_loss": 0.40186887979507446, + "eval_runtime": 3150.7117, + "eval_samples_per_second": 984.26, + "eval_steps_per_second": 1.922, + "step": 130000 + }, + { + "epoch": 0.670979924775953, + "grad_norm": 21987.3671875, + "learning_rate": 4.0882955738902576e-05, + "loss": 0.4176, + "step": 130050 + }, + { + "epoch": 0.6712378947585659, + "grad_norm": 23900.74609375, + "learning_rate": 4.0847040442451895e-05, + "loss": 0.4183, + "step": 130100 + }, + { + "epoch": 0.6714958647411787, + "grad_norm": 22624.236328125, + "learning_rate": 4.081113003167378e-05, + "loss": 0.4146, + "step": 130150 + }, + { + "epoch": 0.6717538347237916, + "grad_norm": 22636.490234375, + "learning_rate": 4.0775224525736546e-05, + "loss": 0.4107, + "step": 130200 + }, + { + "epoch": 0.6720118047064043, + "grad_norm": 22667.66796875, + "learning_rate": 4.07393239438058e-05, + "loss": 0.4151, + "step": 130250 + }, + { + "epoch": 0.6722697746890172, + "grad_norm": 20381.720703125, + "learning_rate": 4.070342830504465e-05, + "loss": 0.4167, + "step": 130300 + }, + { + "epoch": 0.67252774467163, + "grad_norm": 22913.248046875, + "learning_rate": 4.0667537628613424e-05, + "loss": 0.4116, + "step": 130350 + }, + { + "epoch": 0.6727857146542429, + "grad_norm": 23168.865234375, + "learning_rate": 4.063165193366992e-05, + "loss": 0.413, + "step": 130400 + }, + { + "epoch": 0.6730436846368557, + "grad_norm": 21597.861328125, + "learning_rate": 4.059577123936918e-05, + "loss": 0.4179, + "step": 130450 + }, + { + "epoch": 0.6733016546194684, + "grad_norm": 20305.806640625, + "learning_rate": 4.055989556486365e-05, + "loss": 0.4199, + "step": 130500 + }, + { + "epoch": 0.6735596246020813, + "grad_norm": 23520.173828125, + "learning_rate": 4.052402492930311e-05, + "loss": 0.4154, + "step": 130550 + }, + { + "epoch": 0.6738175945846941, + "grad_norm": 23356.85546875, + "learning_rate": 4.048815935183453e-05, + "loss": 0.4154, + "step": 130600 + }, + { + "epoch": 0.674075564567307, + "grad_norm": 22958.611328125, + "learning_rate": 4.0452298851602324e-05, + "loss": 0.4149, + "step": 130650 + }, + { + "epoch": 0.6743335345499197, + "grad_norm": 24888.25390625, + "learning_rate": 4.04164434477481e-05, + "loss": 0.4166, + "step": 130700 + }, + { + "epoch": 0.6745915045325326, + "grad_norm": 22958.189453125, + "learning_rate": 4.0380593159410806e-05, + "loss": 0.4159, + "step": 130750 + }, + { + "epoch": 0.6748494745151454, + "grad_norm": 21863.55859375, + "learning_rate": 4.03447480057266e-05, + "loss": 0.4142, + "step": 130800 + }, + { + "epoch": 0.6751074444977583, + "grad_norm": 23096.375, + "learning_rate": 4.030890800582895e-05, + "loss": 0.4108, + "step": 130850 + }, + { + "epoch": 0.675365414480371, + "grad_norm": 23506.576171875, + "learning_rate": 4.027307317884854e-05, + "loss": 0.4111, + "step": 130900 + }, + { + "epoch": 0.6756233844629839, + "grad_norm": 26913.11328125, + "learning_rate": 4.023724354391331e-05, + "loss": 0.4145, + "step": 130950 + }, + { + "epoch": 0.6758813544455967, + "grad_norm": 22008.958984375, + "learning_rate": 4.020141912014846e-05, + "loss": 0.4118, + "step": 131000 + }, + { + "epoch": 0.6761393244282096, + "grad_norm": 21431.857421875, + "learning_rate": 4.016559992667632e-05, + "loss": 0.417, + "step": 131050 + }, + { + "epoch": 0.6763972944108223, + "grad_norm": 24077.453125, + "learning_rate": 4.0129785982616524e-05, + "loss": 0.4121, + "step": 131100 + }, + { + "epoch": 0.6766552643934352, + "grad_norm": 22978.5390625, + "learning_rate": 4.009397730708583e-05, + "loss": 0.4074, + "step": 131150 + }, + { + "epoch": 0.676913234376048, + "grad_norm": 25474.740234375, + "learning_rate": 4.005817391919826e-05, + "loss": 0.4159, + "step": 131200 + }, + { + "epoch": 0.6771712043586608, + "grad_norm": 23532.416015625, + "learning_rate": 4.0022375838064904e-05, + "loss": 0.4202, + "step": 131250 + }, + { + "epoch": 0.6774291743412737, + "grad_norm": 23746.072265625, + "learning_rate": 3.998658308279414e-05, + "loss": 0.4157, + "step": 131300 + }, + { + "epoch": 0.6776871443238864, + "grad_norm": 21691.6875, + "learning_rate": 3.995079567249142e-05, + "loss": 0.4158, + "step": 131350 + }, + { + "epoch": 0.6779451143064993, + "grad_norm": 24167.923828125, + "learning_rate": 3.991501362625937e-05, + "loss": 0.4165, + "step": 131400 + }, + { + "epoch": 0.6782030842891121, + "grad_norm": 22420.27734375, + "learning_rate": 3.9879236963197784e-05, + "loss": 0.418, + "step": 131450 + }, + { + "epoch": 0.678461054271725, + "grad_norm": 22116.75, + "learning_rate": 3.984346570240352e-05, + "loss": 0.4152, + "step": 131500 + }, + { + "epoch": 0.6787190242543377, + "grad_norm": 23841.001953125, + "learning_rate": 3.9807699862970596e-05, + "loss": 0.4179, + "step": 131550 + }, + { + "epoch": 0.6789769942369506, + "grad_norm": 22931.126953125, + "learning_rate": 3.977193946399011e-05, + "loss": 0.4171, + "step": 131600 + }, + { + "epoch": 0.6792349642195634, + "grad_norm": 24939.294921875, + "learning_rate": 3.973618452455031e-05, + "loss": 0.4147, + "step": 131650 + }, + { + "epoch": 0.6794929342021763, + "grad_norm": 22026.615234375, + "learning_rate": 3.970043506373644e-05, + "loss": 0.4084, + "step": 131700 + }, + { + "epoch": 0.679750904184789, + "grad_norm": 24636.595703125, + "learning_rate": 3.9664691100630904e-05, + "loss": 0.4137, + "step": 131750 + }, + { + "epoch": 0.6800088741674019, + "grad_norm": 25599.443359375, + "learning_rate": 3.962895265431311e-05, + "loss": 0.4167, + "step": 131800 + }, + { + "epoch": 0.6802668441500147, + "grad_norm": 23514.0078125, + "learning_rate": 3.9593219743859575e-05, + "loss": 0.408, + "step": 131850 + }, + { + "epoch": 0.6805248141326276, + "grad_norm": 21798.9609375, + "learning_rate": 3.9557492388343844e-05, + "loss": 0.4129, + "step": 131900 + }, + { + "epoch": 0.6807827841152404, + "grad_norm": 24803.248046875, + "learning_rate": 3.952177060683644e-05, + "loss": 0.4126, + "step": 131950 + }, + { + "epoch": 0.6810407540978531, + "grad_norm": 23215.529296875, + "learning_rate": 3.948605441840501e-05, + "loss": 0.4114, + "step": 132000 + }, + { + "epoch": 0.681298724080466, + "grad_norm": 21179.626953125, + "learning_rate": 3.945034384211412e-05, + "loss": 0.4139, + "step": 132050 + }, + { + "epoch": 0.6815566940630788, + "grad_norm": 22894.04296875, + "learning_rate": 3.941463889702543e-05, + "loss": 0.4144, + "step": 132100 + }, + { + "epoch": 0.6818146640456917, + "grad_norm": 22581.392578125, + "learning_rate": 3.937893960219751e-05, + "loss": 0.4163, + "step": 132150 + }, + { + "epoch": 0.6820726340283044, + "grad_norm": 27557.634765625, + "learning_rate": 3.9343245976685966e-05, + "loss": 0.4194, + "step": 132200 + }, + { + "epoch": 0.6823306040109173, + "grad_norm": 24157.97265625, + "learning_rate": 3.9307558039543355e-05, + "loss": 0.4089, + "step": 132250 + }, + { + "epoch": 0.6825885739935301, + "grad_norm": 23363.904296875, + "learning_rate": 3.927187580981922e-05, + "loss": 0.4108, + "step": 132300 + }, + { + "epoch": 0.682846543976143, + "grad_norm": 24005.15625, + "learning_rate": 3.9236199306560054e-05, + "loss": 0.4103, + "step": 132350 + }, + { + "epoch": 0.6831045139587557, + "grad_norm": 23476.4609375, + "learning_rate": 3.920052854880925e-05, + "loss": 0.4189, + "step": 132400 + }, + { + "epoch": 0.6833624839413686, + "grad_norm": 23734.173828125, + "learning_rate": 3.91648635556072e-05, + "loss": 0.4183, + "step": 132450 + }, + { + "epoch": 0.6836204539239814, + "grad_norm": 22112.642578125, + "learning_rate": 3.912920434599117e-05, + "loss": 0.4139, + "step": 132500 + }, + { + "epoch": 0.6838784239065943, + "grad_norm": 23442.96484375, + "learning_rate": 3.909355093899537e-05, + "loss": 0.4137, + "step": 132550 + }, + { + "epoch": 0.6841363938892071, + "grad_norm": 22873.734375, + "learning_rate": 3.905790335365087e-05, + "loss": 0.4097, + "step": 132600 + }, + { + "epoch": 0.6843943638718198, + "grad_norm": 24382.9140625, + "learning_rate": 3.902226160898567e-05, + "loss": 0.4134, + "step": 132650 + }, + { + "epoch": 0.6846523338544327, + "grad_norm": 23238.1953125, + "learning_rate": 3.898662572402468e-05, + "loss": 0.4137, + "step": 132700 + }, + { + "epoch": 0.6849103038370455, + "grad_norm": 21690.37890625, + "learning_rate": 3.89509957177896e-05, + "loss": 0.4114, + "step": 132750 + }, + { + "epoch": 0.6851682738196584, + "grad_norm": 25762.189453125, + "learning_rate": 3.891537160929907e-05, + "loss": 0.4134, + "step": 132800 + }, + { + "epoch": 0.6854262438022711, + "grad_norm": 22006.044921875, + "learning_rate": 3.88797534175685e-05, + "loss": 0.4132, + "step": 132850 + }, + { + "epoch": 0.685684213784884, + "grad_norm": 22149.5546875, + "learning_rate": 3.8844141161610256e-05, + "loss": 0.4154, + "step": 132900 + }, + { + "epoch": 0.6859421837674968, + "grad_norm": 23865.419921875, + "learning_rate": 3.880853486043343e-05, + "loss": 0.4135, + "step": 132950 + }, + { + "epoch": 0.6862001537501097, + "grad_norm": 22708.126953125, + "learning_rate": 3.877293453304399e-05, + "loss": 0.4143, + "step": 133000 + }, + { + "epoch": 0.6864581237327224, + "grad_norm": 19948.517578125, + "learning_rate": 3.8737340198444683e-05, + "loss": 0.4181, + "step": 133050 + }, + { + "epoch": 0.6867160937153353, + "grad_norm": 22594.826171875, + "learning_rate": 3.870175187563509e-05, + "loss": 0.4108, + "step": 133100 + }, + { + "epoch": 0.6869740636979481, + "grad_norm": 24876.56640625, + "learning_rate": 3.866616958361159e-05, + "loss": 0.4136, + "step": 133150 + }, + { + "epoch": 0.687232033680561, + "grad_norm": 20055.0859375, + "learning_rate": 3.8630593341367285e-05, + "loss": 0.4176, + "step": 133200 + }, + { + "epoch": 0.6874900036631737, + "grad_norm": 24807.9140625, + "learning_rate": 3.8595023167892096e-05, + "loss": 0.4084, + "step": 133250 + }, + { + "epoch": 0.6877479736457865, + "grad_norm": 21060.78125, + "learning_rate": 3.8559459082172696e-05, + "loss": 0.4086, + "step": 133300 + }, + { + "epoch": 0.6880059436283994, + "grad_norm": 22740.255859375, + "learning_rate": 3.852390110319252e-05, + "loss": 0.4109, + "step": 133350 + }, + { + "epoch": 0.6882639136110122, + "grad_norm": 24095.68359375, + "learning_rate": 3.848834924993169e-05, + "loss": 0.4118, + "step": 133400 + }, + { + "epoch": 0.6885218835936251, + "grad_norm": 20011.78125, + "learning_rate": 3.8452803541367136e-05, + "loss": 0.4133, + "step": 133450 + }, + { + "epoch": 0.6887798535762378, + "grad_norm": 21369.7265625, + "learning_rate": 3.8417263996472444e-05, + "loss": 0.4104, + "step": 133500 + }, + { + "epoch": 0.6890378235588507, + "grad_norm": 22532.251953125, + "learning_rate": 3.8381730634217946e-05, + "loss": 0.415, + "step": 133550 + }, + { + "epoch": 0.6892957935414635, + "grad_norm": 21174.34765625, + "learning_rate": 3.8346203473570677e-05, + "loss": 0.4121, + "step": 133600 + }, + { + "epoch": 0.6895537635240764, + "grad_norm": 21758.87109375, + "learning_rate": 3.831068253349431e-05, + "loss": 0.4181, + "step": 133650 + }, + { + "epoch": 0.6898117335066891, + "grad_norm": 21809.083984375, + "learning_rate": 3.827516783294927e-05, + "loss": 0.41, + "step": 133700 + }, + { + "epoch": 0.690069703489302, + "grad_norm": 21419.69921875, + "learning_rate": 3.8239659390892593e-05, + "loss": 0.4166, + "step": 133750 + }, + { + "epoch": 0.6903276734719148, + "grad_norm": 20746.517578125, + "learning_rate": 3.820415722627802e-05, + "loss": 0.4168, + "step": 133800 + }, + { + "epoch": 0.6905856434545277, + "grad_norm": 22737.89453125, + "learning_rate": 3.816866135805589e-05, + "loss": 0.4119, + "step": 133850 + }, + { + "epoch": 0.6908436134371404, + "grad_norm": 23691.408203125, + "learning_rate": 3.813317180517324e-05, + "loss": 0.4105, + "step": 133900 + }, + { + "epoch": 0.6911015834197533, + "grad_norm": 22899.70703125, + "learning_rate": 3.8097688586573684e-05, + "loss": 0.412, + "step": 133950 + }, + { + "epoch": 0.6913595534023661, + "grad_norm": 25553.763671875, + "learning_rate": 3.8062211721197475e-05, + "loss": 0.4158, + "step": 134000 + }, + { + "epoch": 0.691617523384979, + "grad_norm": 22099.93359375, + "learning_rate": 3.802674122798152e-05, + "loss": 0.4149, + "step": 134050 + }, + { + "epoch": 0.6918754933675918, + "grad_norm": 25735.91015625, + "learning_rate": 3.799127712585922e-05, + "loss": 0.4058, + "step": 134100 + }, + { + "epoch": 0.6921334633502045, + "grad_norm": 21259.95703125, + "learning_rate": 3.795581943376067e-05, + "loss": 0.4192, + "step": 134150 + }, + { + "epoch": 0.6923914333328174, + "grad_norm": 22438.23046875, + "learning_rate": 3.7920368170612476e-05, + "loss": 0.414, + "step": 134200 + }, + { + "epoch": 0.6926494033154302, + "grad_norm": 24721.974609375, + "learning_rate": 3.788492335533786e-05, + "loss": 0.4154, + "step": 134250 + }, + { + "epoch": 0.6929073732980431, + "grad_norm": 24267.611328125, + "learning_rate": 3.7849485006856545e-05, + "loss": 0.4108, + "step": 134300 + }, + { + "epoch": 0.6931653432806558, + "grad_norm": 25588.193359375, + "learning_rate": 3.781405314408486e-05, + "loss": 0.4169, + "step": 134350 + }, + { + "epoch": 0.6934233132632687, + "grad_norm": 22651.216796875, + "learning_rate": 3.7778627785935626e-05, + "loss": 0.4112, + "step": 134400 + }, + { + "epoch": 0.6936812832458815, + "grad_norm": 24765.76953125, + "learning_rate": 3.774320895131823e-05, + "loss": 0.4173, + "step": 134450 + }, + { + "epoch": 0.6939392532284944, + "grad_norm": 25384.44921875, + "learning_rate": 3.7707796659138584e-05, + "loss": 0.4097, + "step": 134500 + }, + { + "epoch": 0.6941972232111071, + "grad_norm": 21145.587890625, + "learning_rate": 3.767239092829903e-05, + "loss": 0.4125, + "step": 134550 + }, + { + "epoch": 0.69445519319372, + "grad_norm": 22693.28515625, + "learning_rate": 3.763699177769849e-05, + "loss": 0.4111, + "step": 134600 + }, + { + "epoch": 0.6947131631763328, + "grad_norm": 20415.33984375, + "learning_rate": 3.760159922623235e-05, + "loss": 0.4178, + "step": 134650 + }, + { + "epoch": 0.6949711331589457, + "grad_norm": 23304.33984375, + "learning_rate": 3.756621329279247e-05, + "loss": 0.4142, + "step": 134700 + }, + { + "epoch": 0.6952291031415585, + "grad_norm": 22485.029296875, + "learning_rate": 3.7530833996267156e-05, + "loss": 0.4129, + "step": 134750 + }, + { + "epoch": 0.6954870731241712, + "grad_norm": 20506.5625, + "learning_rate": 3.7495461355541206e-05, + "loss": 0.4104, + "step": 134800 + }, + { + "epoch": 0.6957450431067841, + "grad_norm": 26106.26953125, + "learning_rate": 3.746009538949584e-05, + "loss": 0.4122, + "step": 134850 + }, + { + "epoch": 0.6960030130893969, + "grad_norm": 25230.55859375, + "learning_rate": 3.742473611700874e-05, + "loss": 0.4173, + "step": 134900 + }, + { + "epoch": 0.6962609830720098, + "grad_norm": 23462.197265625, + "learning_rate": 3.738938355695402e-05, + "loss": 0.4211, + "step": 134950 + }, + { + "epoch": 0.6965189530546225, + "grad_norm": 22550.8359375, + "learning_rate": 3.735403772820213e-05, + "loss": 0.4154, + "step": 135000 + }, + { + "epoch": 0.6965189530546225, + "eval_loss": 0.399837851524353, + "eval_runtime": 3136.0222, + "eval_samples_per_second": 988.871, + "eval_steps_per_second": 1.931, + "step": 135000 + }, + { + "epoch": 0.6967769230372354, + "grad_norm": 22235.15234375, + "learning_rate": 3.731869864962004e-05, + "loss": 0.4183, + "step": 135050 + }, + { + "epoch": 0.6970348930198482, + "grad_norm": 21969.208984375, + "learning_rate": 3.728336634007105e-05, + "loss": 0.41, + "step": 135100 + }, + { + "epoch": 0.6972928630024611, + "grad_norm": 22907.32421875, + "learning_rate": 3.724804081841488e-05, + "loss": 0.4213, + "step": 135150 + }, + { + "epoch": 0.6975508329850738, + "grad_norm": 22994.646484375, + "learning_rate": 3.721272210350757e-05, + "loss": 0.4103, + "step": 135200 + }, + { + "epoch": 0.6978088029676867, + "grad_norm": 22118.224609375, + "learning_rate": 3.717741021420162e-05, + "loss": 0.4195, + "step": 135250 + }, + { + "epoch": 0.6980667729502995, + "grad_norm": 19673.6484375, + "learning_rate": 3.7142105169345764e-05, + "loss": 0.4105, + "step": 135300 + }, + { + "epoch": 0.6983247429329124, + "grad_norm": 23110.041015625, + "learning_rate": 3.71068069877852e-05, + "loss": 0.4132, + "step": 135350 + }, + { + "epoch": 0.6985827129155251, + "grad_norm": 26589.453125, + "learning_rate": 3.707151568836144e-05, + "loss": 0.4171, + "step": 135400 + }, + { + "epoch": 0.6988406828981379, + "grad_norm": 25272.74609375, + "learning_rate": 3.7036231289912206e-05, + "loss": 0.4098, + "step": 135450 + }, + { + "epoch": 0.6990986528807508, + "grad_norm": 23238.626953125, + "learning_rate": 3.700095381127172e-05, + "loss": 0.4102, + "step": 135500 + }, + { + "epoch": 0.6993566228633636, + "grad_norm": 25412.8203125, + "learning_rate": 3.696568327127036e-05, + "loss": 0.4131, + "step": 135550 + }, + { + "epoch": 0.6996145928459765, + "grad_norm": 22329.0703125, + "learning_rate": 3.693041968873488e-05, + "loss": 0.4196, + "step": 135600 + }, + { + "epoch": 0.6998725628285892, + "grad_norm": 23497.068359375, + "learning_rate": 3.6895163082488294e-05, + "loss": 0.4137, + "step": 135650 + }, + { + "epoch": 0.7001305328112021, + "grad_norm": 23415.0859375, + "learning_rate": 3.6859913471349906e-05, + "loss": 0.4088, + "step": 135700 + }, + { + "epoch": 0.7003885027938149, + "grad_norm": 24474.064453125, + "learning_rate": 3.682467087413525e-05, + "loss": 0.4122, + "step": 135750 + }, + { + "epoch": 0.7006464727764278, + "grad_norm": 24427.3359375, + "learning_rate": 3.678943530965615e-05, + "loss": 0.4133, + "step": 135800 + }, + { + "epoch": 0.7009044427590405, + "grad_norm": 24399.58203125, + "learning_rate": 3.675420679672068e-05, + "loss": 0.4113, + "step": 135850 + }, + { + "epoch": 0.7011624127416534, + "grad_norm": 22070.033203125, + "learning_rate": 3.671898535413313e-05, + "loss": 0.4099, + "step": 135900 + }, + { + "epoch": 0.7014203827242662, + "grad_norm": 21846.20703125, + "learning_rate": 3.668377100069404e-05, + "loss": 0.4164, + "step": 135950 + }, + { + "epoch": 0.7016783527068791, + "grad_norm": 21927.2265625, + "learning_rate": 3.664856375520012e-05, + "loss": 0.4124, + "step": 136000 + }, + { + "epoch": 0.7019363226894918, + "grad_norm": 22155.341796875, + "learning_rate": 3.6613363636444344e-05, + "loss": 0.416, + "step": 136050 + }, + { + "epoch": 0.7021942926721046, + "grad_norm": 23344.486328125, + "learning_rate": 3.6578170663215826e-05, + "loss": 0.4162, + "step": 136100 + }, + { + "epoch": 0.7024522626547175, + "grad_norm": 23390.642578125, + "learning_rate": 3.6542984854299936e-05, + "loss": 0.4082, + "step": 136150 + }, + { + "epoch": 0.7027102326373303, + "grad_norm": 22980.90625, + "learning_rate": 3.6507806228478125e-05, + "loss": 0.4067, + "step": 136200 + }, + { + "epoch": 0.7029682026199432, + "grad_norm": 22321.662109375, + "learning_rate": 3.6472634804528095e-05, + "loss": 0.4129, + "step": 136250 + }, + { + "epoch": 0.7032261726025559, + "grad_norm": 22719.455078125, + "learning_rate": 3.643747060122366e-05, + "loss": 0.4169, + "step": 136300 + }, + { + "epoch": 0.7034841425851688, + "grad_norm": 25283.494140625, + "learning_rate": 3.640231363733481e-05, + "loss": 0.4081, + "step": 136350 + }, + { + "epoch": 0.7037421125677816, + "grad_norm": 24430.919921875, + "learning_rate": 3.636716393162764e-05, + "loss": 0.4187, + "step": 136400 + }, + { + "epoch": 0.7040000825503945, + "grad_norm": 23372.662109375, + "learning_rate": 3.633202150286435e-05, + "loss": 0.4117, + "step": 136450 + }, + { + "epoch": 0.7042580525330072, + "grad_norm": 23912.595703125, + "learning_rate": 3.6296886369803346e-05, + "loss": 0.4126, + "step": 136500 + }, + { + "epoch": 0.7045160225156201, + "grad_norm": 24092.0390625, + "learning_rate": 3.626175855119903e-05, + "loss": 0.4163, + "step": 136550 + }, + { + "epoch": 0.7047739924982329, + "grad_norm": 23452.2421875, + "learning_rate": 3.6226638065802e-05, + "loss": 0.4088, + "step": 136600 + }, + { + "epoch": 0.7050319624808458, + "grad_norm": 24399.787109375, + "learning_rate": 3.6191524932358845e-05, + "loss": 0.4139, + "step": 136650 + }, + { + "epoch": 0.7052899324634585, + "grad_norm": 23295.599609375, + "learning_rate": 3.6156419169612287e-05, + "loss": 0.4112, + "step": 136700 + }, + { + "epoch": 0.7055479024460714, + "grad_norm": 25809.876953125, + "learning_rate": 3.6121320796301126e-05, + "loss": 0.4141, + "step": 136750 + }, + { + "epoch": 0.7058058724286842, + "grad_norm": 21679.818359375, + "learning_rate": 3.608622983116018e-05, + "loss": 0.4183, + "step": 136800 + }, + { + "epoch": 0.706063842411297, + "grad_norm": 24492.578125, + "learning_rate": 3.6051146292920334e-05, + "loss": 0.4103, + "step": 136850 + }, + { + "epoch": 0.7063218123939099, + "grad_norm": 24805.59375, + "learning_rate": 3.601607020030847e-05, + "loss": 0.4129, + "step": 136900 + }, + { + "epoch": 0.7065797823765226, + "grad_norm": 23000.9765625, + "learning_rate": 3.5981001572047566e-05, + "loss": 0.4091, + "step": 136950 + }, + { + "epoch": 0.7068377523591355, + "grad_norm": 24590.6875, + "learning_rate": 3.594594042685655e-05, + "loss": 0.4061, + "step": 137000 + }, + { + "epoch": 0.7070957223417483, + "grad_norm": 22223.16015625, + "learning_rate": 3.5910886783450416e-05, + "loss": 0.4174, + "step": 137050 + }, + { + "epoch": 0.7073536923243612, + "grad_norm": 28207.7578125, + "learning_rate": 3.587584066054007e-05, + "loss": 0.4119, + "step": 137100 + }, + { + "epoch": 0.7076116623069739, + "grad_norm": 23703.271484375, + "learning_rate": 3.584080207683249e-05, + "loss": 0.4104, + "step": 137150 + }, + { + "epoch": 0.7078696322895868, + "grad_norm": 24903.92578125, + "learning_rate": 3.580577105103059e-05, + "loss": 0.4139, + "step": 137200 + }, + { + "epoch": 0.7081276022721996, + "grad_norm": 21130.029296875, + "learning_rate": 3.5770747601833235e-05, + "loss": 0.4208, + "step": 137250 + }, + { + "epoch": 0.7083855722548125, + "grad_norm": 22223.611328125, + "learning_rate": 3.5735731747935306e-05, + "loss": 0.4118, + "step": 137300 + }, + { + "epoch": 0.7086435422374252, + "grad_norm": 21862.12109375, + "learning_rate": 3.570072350802753e-05, + "loss": 0.4101, + "step": 137350 + }, + { + "epoch": 0.708901512220038, + "grad_norm": 22504.25390625, + "learning_rate": 3.566572290079667e-05, + "loss": 0.4187, + "step": 137400 + }, + { + "epoch": 0.7091594822026509, + "grad_norm": 21898.53125, + "learning_rate": 3.563072994492535e-05, + "loss": 0.4068, + "step": 137450 + }, + { + "epoch": 0.7094174521852638, + "grad_norm": 21629.5859375, + "learning_rate": 3.559574465909215e-05, + "loss": 0.4107, + "step": 137500 + }, + { + "epoch": 0.7096754221678765, + "grad_norm": 23078.080078125, + "learning_rate": 3.5560767061971515e-05, + "loss": 0.4093, + "step": 137550 + }, + { + "epoch": 0.7099333921504893, + "grad_norm": 21831.11328125, + "learning_rate": 3.5525797172233826e-05, + "loss": 0.4083, + "step": 137600 + }, + { + "epoch": 0.7101913621331022, + "grad_norm": 20934.220703125, + "learning_rate": 3.5490835008545334e-05, + "loss": 0.4143, + "step": 137650 + }, + { + "epoch": 0.710449332115715, + "grad_norm": 21335.0, + "learning_rate": 3.545588058956816e-05, + "loss": 0.4104, + "step": 137700 + }, + { + "epoch": 0.7107073020983279, + "grad_norm": 20424.279296875, + "learning_rate": 3.542093393396031e-05, + "loss": 0.4117, + "step": 137750 + }, + { + "epoch": 0.7109652720809406, + "grad_norm": 24527.76171875, + "learning_rate": 3.5385995060375596e-05, + "loss": 0.4128, + "step": 137800 + }, + { + "epoch": 0.7112232420635535, + "grad_norm": 23370.17578125, + "learning_rate": 3.535106398746376e-05, + "loss": 0.4149, + "step": 137850 + }, + { + "epoch": 0.7114812120461663, + "grad_norm": 22996.2890625, + "learning_rate": 3.531614073387028e-05, + "loss": 0.412, + "step": 137900 + }, + { + "epoch": 0.7117391820287792, + "grad_norm": 26592.931640625, + "learning_rate": 3.528122531823657e-05, + "loss": 0.4111, + "step": 137950 + }, + { + "epoch": 0.7119971520113919, + "grad_norm": 22353.35546875, + "learning_rate": 3.5246317759199745e-05, + "loss": 0.412, + "step": 138000 + }, + { + "epoch": 0.7122551219940048, + "grad_norm": 22266.91796875, + "learning_rate": 3.521141807539281e-05, + "loss": 0.4113, + "step": 138050 + }, + { + "epoch": 0.7125130919766176, + "grad_norm": 21723.318359375, + "learning_rate": 3.517652628544457e-05, + "loss": 0.4058, + "step": 138100 + }, + { + "epoch": 0.7127710619592305, + "grad_norm": 23738.322265625, + "learning_rate": 3.5141642407979535e-05, + "loss": 0.4072, + "step": 138150 + }, + { + "epoch": 0.7130290319418432, + "grad_norm": 25993.587890625, + "learning_rate": 3.5106766461618083e-05, + "loss": 0.4066, + "step": 138200 + }, + { + "epoch": 0.713287001924456, + "grad_norm": 23321.55859375, + "learning_rate": 3.50718984649763e-05, + "loss": 0.4104, + "step": 138250 + }, + { + "epoch": 0.7135449719070689, + "grad_norm": 22022.267578125, + "learning_rate": 3.503703843666605e-05, + "loss": 0.4096, + "step": 138300 + }, + { + "epoch": 0.7138029418896817, + "grad_norm": 22249.640625, + "learning_rate": 3.500218639529493e-05, + "loss": 0.4121, + "step": 138350 + }, + { + "epoch": 0.7140609118722946, + "grad_norm": 21145.283203125, + "learning_rate": 3.496734235946632e-05, + "loss": 0.4126, + "step": 138400 + }, + { + "epoch": 0.7143188818549073, + "grad_norm": 22439.38671875, + "learning_rate": 3.493250634777924e-05, + "loss": 0.4076, + "step": 138450 + }, + { + "epoch": 0.7145768518375202, + "grad_norm": 25641.93359375, + "learning_rate": 3.4897678378828516e-05, + "loss": 0.4105, + "step": 138500 + }, + { + "epoch": 0.714834821820133, + "grad_norm": 22200.46875, + "learning_rate": 3.486285847120465e-05, + "loss": 0.4097, + "step": 138550 + }, + { + "epoch": 0.7150927918027459, + "grad_norm": 22691.666015625, + "learning_rate": 3.482804664349381e-05, + "loss": 0.4154, + "step": 138600 + }, + { + "epoch": 0.7153507617853586, + "grad_norm": 22139.16796875, + "learning_rate": 3.479324291427788e-05, + "loss": 0.4124, + "step": 138650 + }, + { + "epoch": 0.7156087317679715, + "grad_norm": 23695.7578125, + "learning_rate": 3.4758447302134414e-05, + "loss": 0.4174, + "step": 138700 + }, + { + "epoch": 0.7158667017505843, + "grad_norm": 24720.06640625, + "learning_rate": 3.472365982563666e-05, + "loss": 0.4095, + "step": 138750 + }, + { + "epoch": 0.7161246717331972, + "grad_norm": 22861.171875, + "learning_rate": 3.4688880503353474e-05, + "loss": 0.4039, + "step": 138800 + }, + { + "epoch": 0.7163826417158099, + "grad_norm": 22751.833984375, + "learning_rate": 3.465410935384939e-05, + "loss": 0.4175, + "step": 138850 + }, + { + "epoch": 0.7166406116984227, + "grad_norm": 22689.5, + "learning_rate": 3.461934639568457e-05, + "loss": 0.4133, + "step": 138900 + }, + { + "epoch": 0.7168985816810356, + "grad_norm": 23292.1328125, + "learning_rate": 3.458459164741482e-05, + "loss": 0.4062, + "step": 138950 + }, + { + "epoch": 0.7171565516636484, + "grad_norm": 22390.515625, + "learning_rate": 3.4549845127591563e-05, + "loss": 0.4169, + "step": 139000 + }, + { + "epoch": 0.7174145216462613, + "grad_norm": 23531.9921875, + "learning_rate": 3.451510685476178e-05, + "loss": 0.4084, + "step": 139050 + }, + { + "epoch": 0.717672491628874, + "grad_norm": 23847.154296875, + "learning_rate": 3.448037684746812e-05, + "loss": 0.4134, + "step": 139100 + }, + { + "epoch": 0.7179304616114869, + "grad_norm": 22651.15234375, + "learning_rate": 3.4445655124248774e-05, + "loss": 0.4118, + "step": 139150 + }, + { + "epoch": 0.7181884315940997, + "grad_norm": 21893.123046875, + "learning_rate": 3.441094170363755e-05, + "loss": 0.4065, + "step": 139200 + }, + { + "epoch": 0.7184464015767126, + "grad_norm": 22238.685546875, + "learning_rate": 3.4376236604163756e-05, + "loss": 0.4164, + "step": 139250 + }, + { + "epoch": 0.7187043715593253, + "grad_norm": 25605.083984375, + "learning_rate": 3.434153984435234e-05, + "loss": 0.4105, + "step": 139300 + }, + { + "epoch": 0.7189623415419382, + "grad_norm": 22414.0703125, + "learning_rate": 3.430685144272374e-05, + "loss": 0.4095, + "step": 139350 + }, + { + "epoch": 0.719220311524551, + "grad_norm": 22067.443359375, + "learning_rate": 3.4272171417793954e-05, + "loss": 0.4105, + "step": 139400 + }, + { + "epoch": 0.7194782815071639, + "grad_norm": 22398.36328125, + "learning_rate": 3.423749978807454e-05, + "loss": 0.4065, + "step": 139450 + }, + { + "epoch": 0.7197362514897766, + "grad_norm": 25660.017578125, + "learning_rate": 3.420283657207248e-05, + "loss": 0.4139, + "step": 139500 + }, + { + "epoch": 0.7199942214723895, + "grad_norm": 27245.4609375, + "learning_rate": 3.416818178829039e-05, + "loss": 0.4106, + "step": 139550 + }, + { + "epoch": 0.7202521914550023, + "grad_norm": 22430.6484375, + "learning_rate": 3.413353545522628e-05, + "loss": 0.4103, + "step": 139600 + }, + { + "epoch": 0.7205101614376151, + "grad_norm": 25269.876953125, + "learning_rate": 3.409889759137373e-05, + "loss": 0.4073, + "step": 139650 + }, + { + "epoch": 0.720768131420228, + "grad_norm": 22811.275390625, + "learning_rate": 3.406426821522172e-05, + "loss": 0.4156, + "step": 139700 + }, + { + "epoch": 0.7210261014028407, + "grad_norm": 21838.966796875, + "learning_rate": 3.402964734525477e-05, + "loss": 0.4132, + "step": 139750 + }, + { + "epoch": 0.7212840713854536, + "grad_norm": 22130.935546875, + "learning_rate": 3.39950349999528e-05, + "loss": 0.418, + "step": 139800 + }, + { + "epoch": 0.7215420413680664, + "grad_norm": 22744.779296875, + "learning_rate": 3.396043119779123e-05, + "loss": 0.4098, + "step": 139850 + }, + { + "epoch": 0.7218000113506793, + "grad_norm": 22559.07421875, + "learning_rate": 3.392583595724093e-05, + "loss": 0.4159, + "step": 139900 + }, + { + "epoch": 0.722057981333292, + "grad_norm": 20920.349609375, + "learning_rate": 3.3891249296768116e-05, + "loss": 0.406, + "step": 139950 + }, + { + "epoch": 0.7223159513159049, + "grad_norm": 20708.716796875, + "learning_rate": 3.38566712348345e-05, + "loss": 0.4102, + "step": 140000 + }, + { + "epoch": 0.7223159513159049, + "eval_loss": 0.39852654933929443, + "eval_runtime": 3128.1309, + "eval_samples_per_second": 991.365, + "eval_steps_per_second": 1.936, + "step": 140000 + }, + { + "epoch": 0.7225739212985177, + "grad_norm": 24440.734375, + "learning_rate": 3.382210178989718e-05, + "loss": 0.4144, + "step": 140050 + }, + { + "epoch": 0.7228318912811306, + "grad_norm": 22715.88671875, + "learning_rate": 3.378754098040867e-05, + "loss": 0.4146, + "step": 140100 + }, + { + "epoch": 0.7230898612637433, + "grad_norm": 23713.474609375, + "learning_rate": 3.375298882481683e-05, + "loss": 0.4089, + "step": 140150 + }, + { + "epoch": 0.7233478312463562, + "grad_norm": 24705.048828125, + "learning_rate": 3.371844534156497e-05, + "loss": 0.4052, + "step": 140200 + }, + { + "epoch": 0.723605801228969, + "grad_norm": 22624.98046875, + "learning_rate": 3.368391054909169e-05, + "loss": 0.4155, + "step": 140250 + }, + { + "epoch": 0.7238637712115819, + "grad_norm": 24774.72265625, + "learning_rate": 3.364938446583103e-05, + "loss": 0.4058, + "step": 140300 + }, + { + "epoch": 0.7241217411941946, + "grad_norm": 24109.02734375, + "learning_rate": 3.361486711021235e-05, + "loss": 0.4169, + "step": 140350 + }, + { + "epoch": 0.7243797111768074, + "grad_norm": 20315.724609375, + "learning_rate": 3.3580358500660284e-05, + "loss": 0.4135, + "step": 140400 + }, + { + "epoch": 0.7246376811594203, + "grad_norm": 26642.84765625, + "learning_rate": 3.3545858655594935e-05, + "loss": 0.4182, + "step": 140450 + }, + { + "epoch": 0.7248956511420331, + "grad_norm": 23466.93359375, + "learning_rate": 3.351136759343161e-05, + "loss": 0.4098, + "step": 140500 + }, + { + "epoch": 0.725153621124646, + "grad_norm": 25247.11328125, + "learning_rate": 3.3476885332580985e-05, + "loss": 0.4085, + "step": 140550 + }, + { + "epoch": 0.7254115911072587, + "grad_norm": 25220.11328125, + "learning_rate": 3.3442411891449e-05, + "loss": 0.4139, + "step": 140600 + }, + { + "epoch": 0.7256695610898716, + "grad_norm": 21836.095703125, + "learning_rate": 3.3407947288436936e-05, + "loss": 0.4127, + "step": 140650 + }, + { + "epoch": 0.7259275310724844, + "grad_norm": 22301.443359375, + "learning_rate": 3.3373491541941346e-05, + "loss": 0.4127, + "step": 140700 + }, + { + "epoch": 0.7261855010550973, + "grad_norm": 21902.615234375, + "learning_rate": 3.333904467035399e-05, + "loss": 0.4111, + "step": 140750 + }, + { + "epoch": 0.72644347103771, + "grad_norm": 21408.71484375, + "learning_rate": 3.3304606692061984e-05, + "loss": 0.4095, + "step": 140800 + }, + { + "epoch": 0.7267014410203229, + "grad_norm": 26146.03515625, + "learning_rate": 3.3270177625447626e-05, + "loss": 0.4096, + "step": 140850 + }, + { + "epoch": 0.7269594110029357, + "grad_norm": 22772.9921875, + "learning_rate": 3.323575748888852e-05, + "loss": 0.4109, + "step": 140900 + }, + { + "epoch": 0.7272173809855486, + "grad_norm": 24654.810546875, + "learning_rate": 3.320134630075742e-05, + "loss": 0.4135, + "step": 140950 + }, + { + "epoch": 0.7274753509681613, + "grad_norm": 23458.103515625, + "learning_rate": 3.31669440794224e-05, + "loss": 0.4128, + "step": 141000 + }, + { + "epoch": 0.7277333209507741, + "grad_norm": 22455.630859375, + "learning_rate": 3.3132550843246654e-05, + "loss": 0.411, + "step": 141050 + }, + { + "epoch": 0.727991290933387, + "grad_norm": 22372.08203125, + "learning_rate": 3.3098166610588655e-05, + "loss": 0.413, + "step": 141100 + }, + { + "epoch": 0.7282492609159998, + "grad_norm": 22878.216796875, + "learning_rate": 3.306379139980206e-05, + "loss": 0.4054, + "step": 141150 + }, + { + "epoch": 0.7285072308986127, + "grad_norm": 22959.708984375, + "learning_rate": 3.302942522923563e-05, + "loss": 0.4114, + "step": 141200 + }, + { + "epoch": 0.7287652008812254, + "grad_norm": 22574.986328125, + "learning_rate": 3.2995068117233417e-05, + "loss": 0.4105, + "step": 141250 + }, + { + "epoch": 0.7290231708638383, + "grad_norm": 23770.279296875, + "learning_rate": 3.2960720082134555e-05, + "loss": 0.4091, + "step": 141300 + }, + { + "epoch": 0.7292811408464511, + "grad_norm": 23017.416015625, + "learning_rate": 3.292638114227338e-05, + "loss": 0.411, + "step": 141350 + }, + { + "epoch": 0.729539110829064, + "grad_norm": 23605.982421875, + "learning_rate": 3.289205131597932e-05, + "loss": 0.4097, + "step": 141400 + }, + { + "epoch": 0.7297970808116767, + "grad_norm": 22409.12890625, + "learning_rate": 3.2857730621577006e-05, + "loss": 0.4096, + "step": 141450 + }, + { + "epoch": 0.7300550507942896, + "grad_norm": 22681.11328125, + "learning_rate": 3.282341907738613e-05, + "loss": 0.4066, + "step": 141500 + }, + { + "epoch": 0.7303130207769024, + "grad_norm": 27188.859375, + "learning_rate": 3.278911670172154e-05, + "loss": 0.4104, + "step": 141550 + }, + { + "epoch": 0.7305709907595153, + "grad_norm": 25134.85546875, + "learning_rate": 3.2754823512893225e-05, + "loss": 0.4105, + "step": 141600 + }, + { + "epoch": 0.730828960742128, + "grad_norm": 21408.478515625, + "learning_rate": 3.2720539529206154e-05, + "loss": 0.412, + "step": 141650 + }, + { + "epoch": 0.7310869307247408, + "grad_norm": 21062.59375, + "learning_rate": 3.26862647689605e-05, + "loss": 0.411, + "step": 141700 + }, + { + "epoch": 0.7313449007073537, + "grad_norm": 21591.23828125, + "learning_rate": 3.265199925045143e-05, + "loss": 0.4171, + "step": 141750 + }, + { + "epoch": 0.7316028706899665, + "grad_norm": 23328.751953125, + "learning_rate": 3.261774299196926e-05, + "loss": 0.4127, + "step": 141800 + }, + { + "epoch": 0.7318608406725794, + "grad_norm": 27247.59375, + "learning_rate": 3.258349601179928e-05, + "loss": 0.4087, + "step": 141850 + }, + { + "epoch": 0.7321188106551921, + "grad_norm": 24500.822265625, + "learning_rate": 3.254925832822188e-05, + "loss": 0.4015, + "step": 141900 + }, + { + "epoch": 0.732376780637805, + "grad_norm": 25855.849609375, + "learning_rate": 3.251502995951247e-05, + "loss": 0.4125, + "step": 141950 + }, + { + "epoch": 0.7326347506204178, + "grad_norm": 23075.234375, + "learning_rate": 3.248081092394148e-05, + "loss": 0.4112, + "step": 142000 + }, + { + "epoch": 0.7328927206030307, + "grad_norm": 25166.712890625, + "learning_rate": 3.2446601239774405e-05, + "loss": 0.4121, + "step": 142050 + }, + { + "epoch": 0.7331506905856434, + "grad_norm": 23327.337890625, + "learning_rate": 3.241240092527167e-05, + "loss": 0.41, + "step": 142100 + }, + { + "epoch": 0.7334086605682563, + "grad_norm": 34138.34375, + "learning_rate": 3.237820999868876e-05, + "loss": 0.413, + "step": 142150 + }, + { + "epoch": 0.7336666305508691, + "grad_norm": 23031.2109375, + "learning_rate": 3.234402847827612e-05, + "loss": 0.414, + "step": 142200 + }, + { + "epoch": 0.733924600533482, + "grad_norm": 23237.44921875, + "learning_rate": 3.230985638227921e-05, + "loss": 0.4159, + "step": 142250 + }, + { + "epoch": 0.7341825705160947, + "grad_norm": 21437.705078125, + "learning_rate": 3.2275693728938395e-05, + "loss": 0.4078, + "step": 142300 + }, + { + "epoch": 0.7344405404987076, + "grad_norm": 23815.9140625, + "learning_rate": 3.224154053648906e-05, + "loss": 0.4135, + "step": 142350 + }, + { + "epoch": 0.7346985104813204, + "grad_norm": 26809.724609375, + "learning_rate": 3.2207396823161514e-05, + "loss": 0.409, + "step": 142400 + }, + { + "epoch": 0.7349564804639332, + "grad_norm": 21905.6484375, + "learning_rate": 3.2173262607181e-05, + "loss": 0.41, + "step": 142450 + }, + { + "epoch": 0.735214450446546, + "grad_norm": 23628.076171875, + "learning_rate": 3.2139137906767743e-05, + "loss": 0.4175, + "step": 142500 + }, + { + "epoch": 0.7354724204291588, + "grad_norm": 24156.837890625, + "learning_rate": 3.210502274013679e-05, + "loss": 0.4114, + "step": 142550 + }, + { + "epoch": 0.7357303904117717, + "grad_norm": 21517.404296875, + "learning_rate": 3.207091712549819e-05, + "loss": 0.4112, + "step": 142600 + }, + { + "epoch": 0.7359883603943845, + "grad_norm": 22684.734375, + "learning_rate": 3.203682108105685e-05, + "loss": 0.417, + "step": 142650 + }, + { + "epoch": 0.7362463303769974, + "grad_norm": 22205.361328125, + "learning_rate": 3.2002734625012585e-05, + "loss": 0.4111, + "step": 142700 + }, + { + "epoch": 0.7365043003596101, + "grad_norm": 21131.41796875, + "learning_rate": 3.196865777556008e-05, + "loss": 0.4114, + "step": 142750 + }, + { + "epoch": 0.736762270342223, + "grad_norm": 23506.66796875, + "learning_rate": 3.1934590550888894e-05, + "loss": 0.4183, + "step": 142800 + }, + { + "epoch": 0.7370202403248358, + "grad_norm": 24435.4609375, + "learning_rate": 3.190053296918345e-05, + "loss": 0.4166, + "step": 142850 + }, + { + "epoch": 0.7372782103074487, + "grad_norm": 22610.4296875, + "learning_rate": 3.186648504862303e-05, + "loss": 0.4109, + "step": 142900 + }, + { + "epoch": 0.7375361802900614, + "grad_norm": 24722.974609375, + "learning_rate": 3.183244680738178e-05, + "loss": 0.4093, + "step": 142950 + }, + { + "epoch": 0.7377941502726743, + "grad_norm": 23927.6640625, + "learning_rate": 3.1798418263628596e-05, + "loss": 0.4106, + "step": 143000 + }, + { + "epoch": 0.7380521202552871, + "grad_norm": 23958.216796875, + "learning_rate": 3.176439943552732e-05, + "loss": 0.4067, + "step": 143050 + }, + { + "epoch": 0.7383100902379, + "grad_norm": 23272.818359375, + "learning_rate": 3.1730390341236496e-05, + "loss": 0.4086, + "step": 143100 + }, + { + "epoch": 0.7385680602205127, + "grad_norm": 20998.751953125, + "learning_rate": 3.1696390998909556e-05, + "loss": 0.4099, + "step": 143150 + }, + { + "epoch": 0.7388260302031255, + "grad_norm": 24493.677734375, + "learning_rate": 3.166240142669464e-05, + "loss": 0.413, + "step": 143200 + }, + { + "epoch": 0.7390840001857384, + "grad_norm": 22639.927734375, + "learning_rate": 3.162842164273479e-05, + "loss": 0.4105, + "step": 143250 + }, + { + "epoch": 0.7393419701683512, + "grad_norm": 24407.029296875, + "learning_rate": 3.15944516651677e-05, + "loss": 0.4188, + "step": 143300 + }, + { + "epoch": 0.7395999401509641, + "grad_norm": 26538.68359375, + "learning_rate": 3.156049151212591e-05, + "loss": 0.4135, + "step": 143350 + }, + { + "epoch": 0.7398579101335768, + "grad_norm": 24519.060546875, + "learning_rate": 3.1526541201736695e-05, + "loss": 0.4141, + "step": 143400 + }, + { + "epoch": 0.7401158801161897, + "grad_norm": 21236.681640625, + "learning_rate": 3.149260075212206e-05, + "loss": 0.4096, + "step": 143450 + }, + { + "epoch": 0.7403738500988025, + "grad_norm": 24463.015625, + "learning_rate": 3.1458670181398796e-05, + "loss": 0.4035, + "step": 143500 + }, + { + "epoch": 0.7406318200814154, + "grad_norm": 26984.408203125, + "learning_rate": 3.1424749507678336e-05, + "loss": 0.4115, + "step": 143550 + }, + { + "epoch": 0.7408897900640281, + "grad_norm": 25619.35546875, + "learning_rate": 3.139083874906691e-05, + "loss": 0.4131, + "step": 143600 + }, + { + "epoch": 0.741147760046641, + "grad_norm": 24277.7890625, + "learning_rate": 3.13569379236654e-05, + "loss": 0.3994, + "step": 143650 + }, + { + "epoch": 0.7414057300292538, + "grad_norm": 24007.654296875, + "learning_rate": 3.1323047049569446e-05, + "loss": 0.4091, + "step": 143700 + }, + { + "epoch": 0.7416637000118667, + "grad_norm": 21688.703125, + "learning_rate": 3.12891661448693e-05, + "loss": 0.4176, + "step": 143750 + }, + { + "epoch": 0.7419216699944794, + "grad_norm": 22735.900390625, + "learning_rate": 3.125529522764995e-05, + "loss": 0.4091, + "step": 143800 + }, + { + "epoch": 0.7421796399770922, + "grad_norm": 23359.259765625, + "learning_rate": 3.122143431599105e-05, + "loss": 0.4125, + "step": 143850 + }, + { + "epoch": 0.7424376099597051, + "grad_norm": 26637.263671875, + "learning_rate": 3.118758342796687e-05, + "loss": 0.4137, + "step": 143900 + }, + { + "epoch": 0.7426955799423179, + "grad_norm": 24977.3984375, + "learning_rate": 3.1153742581646406e-05, + "loss": 0.4094, + "step": 143950 + }, + { + "epoch": 0.7429535499249308, + "grad_norm": 25850.91796875, + "learning_rate": 3.111991179509318e-05, + "loss": 0.4092, + "step": 144000 + }, + { + "epoch": 0.7432115199075435, + "grad_norm": 22823.0625, + "learning_rate": 3.1086091086365474e-05, + "loss": 0.4111, + "step": 144050 + }, + { + "epoch": 0.7434694898901564, + "grad_norm": 24187.640625, + "learning_rate": 3.1052280473516076e-05, + "loss": 0.414, + "step": 144100 + }, + { + "epoch": 0.7437274598727692, + "grad_norm": 21726.537109375, + "learning_rate": 3.101847997459249e-05, + "loss": 0.4098, + "step": 144150 + }, + { + "epoch": 0.7439854298553821, + "grad_norm": 23075.27734375, + "learning_rate": 3.098468960763671e-05, + "loss": 0.4084, + "step": 144200 + }, + { + "epoch": 0.7442433998379948, + "grad_norm": 24207.529296875, + "learning_rate": 3.095090939068541e-05, + "loss": 0.4156, + "step": 144250 + }, + { + "epoch": 0.7445013698206077, + "grad_norm": 25209.39453125, + "learning_rate": 3.091713934176982e-05, + "loss": 0.4122, + "step": 144300 + }, + { + "epoch": 0.7447593398032205, + "grad_norm": 24308.361328125, + "learning_rate": 3.088337947891573e-05, + "loss": 0.408, + "step": 144350 + }, + { + "epoch": 0.7450173097858334, + "grad_norm": 22416.990234375, + "learning_rate": 3.0849629820143517e-05, + "loss": 0.4136, + "step": 144400 + }, + { + "epoch": 0.7452752797684461, + "grad_norm": 24417.943359375, + "learning_rate": 3.081589038346806e-05, + "loss": 0.4079, + "step": 144450 + }, + { + "epoch": 0.745533249751059, + "grad_norm": 21519.650390625, + "learning_rate": 3.078216118689885e-05, + "loss": 0.4073, + "step": 144500 + }, + { + "epoch": 0.7457912197336718, + "grad_norm": 22821.1796875, + "learning_rate": 3.074844224843986e-05, + "loss": 0.4058, + "step": 144550 + }, + { + "epoch": 0.7460491897162846, + "grad_norm": 22559.86328125, + "learning_rate": 3.071473358608963e-05, + "loss": 0.413, + "step": 144600 + }, + { + "epoch": 0.7463071596988974, + "grad_norm": 25803.658203125, + "learning_rate": 3.068103521784115e-05, + "loss": 0.4077, + "step": 144650 + }, + { + "epoch": 0.7465651296815102, + "grad_norm": 27601.787109375, + "learning_rate": 3.0647347161681983e-05, + "loss": 0.4057, + "step": 144700 + }, + { + "epoch": 0.7468230996641231, + "grad_norm": 21363.67578125, + "learning_rate": 3.061366943559417e-05, + "loss": 0.4082, + "step": 144750 + }, + { + "epoch": 0.7470810696467359, + "grad_norm": 24007.3046875, + "learning_rate": 3.058000205755421e-05, + "loss": 0.4121, + "step": 144800 + }, + { + "epoch": 0.7473390396293488, + "grad_norm": 29940.8125, + "learning_rate": 3.054634504553312e-05, + "loss": 0.4046, + "step": 144850 + }, + { + "epoch": 0.7475970096119615, + "grad_norm": 24161.90234375, + "learning_rate": 3.0512698417496334e-05, + "loss": 0.4108, + "step": 144900 + }, + { + "epoch": 0.7478549795945744, + "grad_norm": 22984.619140625, + "learning_rate": 3.0479062191403785e-05, + "loss": 0.4158, + "step": 144950 + }, + { + "epoch": 0.7481129495771872, + "grad_norm": 26418.95703125, + "learning_rate": 3.0445436385209836e-05, + "loss": 0.4059, + "step": 145000 + }, + { + "epoch": 0.7481129495771872, + "eval_loss": 0.3971329629421234, + "eval_runtime": 3201.285, + "eval_samples_per_second": 968.711, + "eval_steps_per_second": 1.892, + "step": 145000 + }, + { + "epoch": 0.7483709195598001, + "grad_norm": 22503.525390625, + "learning_rate": 3.0411821016863308e-05, + "loss": 0.4048, + "step": 145050 + }, + { + "epoch": 0.7486288895424128, + "grad_norm": 23114.525390625, + "learning_rate": 3.03782161043074e-05, + "loss": 0.4111, + "step": 145100 + }, + { + "epoch": 0.7488868595250256, + "grad_norm": 23249.794921875, + "learning_rate": 3.0344621665479778e-05, + "loss": 0.4093, + "step": 145150 + }, + { + "epoch": 0.7491448295076385, + "grad_norm": 23568.833984375, + "learning_rate": 3.0311037718312518e-05, + "loss": 0.4166, + "step": 145200 + }, + { + "epoch": 0.7494027994902513, + "grad_norm": 21794.6015625, + "learning_rate": 3.027746428073206e-05, + "loss": 0.4122, + "step": 145250 + }, + { + "epoch": 0.7496607694728641, + "grad_norm": 23710.212890625, + "learning_rate": 3.024390137065929e-05, + "loss": 0.4066, + "step": 145300 + }, + { + "epoch": 0.7499187394554769, + "grad_norm": 23179.240234375, + "learning_rate": 3.0210349006009385e-05, + "loss": 0.4127, + "step": 145350 + }, + { + "epoch": 0.7501767094380898, + "grad_norm": 25111.078125, + "learning_rate": 3.017680720469199e-05, + "loss": 0.4128, + "step": 145400 + }, + { + "epoch": 0.7504346794207026, + "grad_norm": 24289.095703125, + "learning_rate": 3.0143275984611042e-05, + "loss": 0.4167, + "step": 145450 + }, + { + "epoch": 0.7506926494033155, + "grad_norm": 22695.802734375, + "learning_rate": 3.0109755363664893e-05, + "loss": 0.4135, + "step": 145500 + }, + { + "epoch": 0.7509506193859282, + "grad_norm": 26995.833984375, + "learning_rate": 3.0076245359746163e-05, + "loss": 0.4057, + "step": 145550 + }, + { + "epoch": 0.7512085893685411, + "grad_norm": 21887.568359375, + "learning_rate": 3.004274599074185e-05, + "loss": 0.4089, + "step": 145600 + }, + { + "epoch": 0.7514665593511539, + "grad_norm": 27344.78125, + "learning_rate": 3.00092572745333e-05, + "loss": 0.4156, + "step": 145650 + }, + { + "epoch": 0.7517245293337668, + "grad_norm": 25476.15234375, + "learning_rate": 2.9975779228996104e-05, + "loss": 0.4113, + "step": 145700 + }, + { + "epoch": 0.7519824993163795, + "grad_norm": 24602.64453125, + "learning_rate": 2.9942311872000215e-05, + "loss": 0.4077, + "step": 145750 + }, + { + "epoch": 0.7522404692989924, + "grad_norm": 24124.984375, + "learning_rate": 2.990885522140985e-05, + "loss": 0.4122, + "step": 145800 + }, + { + "epoch": 0.7524984392816052, + "grad_norm": 24945.2109375, + "learning_rate": 2.987540929508354e-05, + "loss": 0.409, + "step": 145850 + }, + { + "epoch": 0.752756409264218, + "grad_norm": 26535.109375, + "learning_rate": 2.9841974110874037e-05, + "loss": 0.4132, + "step": 145900 + }, + { + "epoch": 0.7530143792468308, + "grad_norm": 21566.904296875, + "learning_rate": 2.980854968662843e-05, + "loss": 0.4073, + "step": 145950 + }, + { + "epoch": 0.7532723492294436, + "grad_norm": 22965.73828125, + "learning_rate": 2.9775136040188007e-05, + "loss": 0.4124, + "step": 146000 + }, + { + "epoch": 0.7535303192120565, + "grad_norm": 25307.123046875, + "learning_rate": 2.974173318938833e-05, + "loss": 0.4134, + "step": 146050 + }, + { + "epoch": 0.7537882891946693, + "grad_norm": 22280.431640625, + "learning_rate": 2.9708341152059226e-05, + "loss": 0.4085, + "step": 146100 + }, + { + "epoch": 0.7540462591772822, + "grad_norm": 25268.705078125, + "learning_rate": 2.9674959946024662e-05, + "loss": 0.4031, + "step": 146150 + }, + { + "epoch": 0.7543042291598949, + "grad_norm": 20014.28125, + "learning_rate": 2.9641589589102918e-05, + "loss": 0.4093, + "step": 146200 + }, + { + "epoch": 0.7545621991425078, + "grad_norm": 28430.544921875, + "learning_rate": 2.9608230099106427e-05, + "loss": 0.4112, + "step": 146250 + }, + { + "epoch": 0.7548201691251206, + "grad_norm": 21031.328125, + "learning_rate": 2.9574881493841867e-05, + "loss": 0.4084, + "step": 146300 + }, + { + "epoch": 0.7550781391077335, + "grad_norm": 24118.916015625, + "learning_rate": 2.9541543791110032e-05, + "loss": 0.4152, + "step": 146350 + }, + { + "epoch": 0.7553361090903462, + "grad_norm": 20663.740234375, + "learning_rate": 2.950821700870598e-05, + "loss": 0.409, + "step": 146400 + }, + { + "epoch": 0.7555940790729591, + "grad_norm": 23081.328125, + "learning_rate": 2.9474901164418877e-05, + "loss": 0.4089, + "step": 146450 + }, + { + "epoch": 0.7558520490555719, + "grad_norm": 24167.80859375, + "learning_rate": 2.9441596276032085e-05, + "loss": 0.4096, + "step": 146500 + }, + { + "epoch": 0.7561100190381848, + "grad_norm": 24959.595703125, + "learning_rate": 2.940830236132313e-05, + "loss": 0.4109, + "step": 146550 + }, + { + "epoch": 0.7563679890207975, + "grad_norm": 22731.36328125, + "learning_rate": 2.9375019438063622e-05, + "loss": 0.41, + "step": 146600 + }, + { + "epoch": 0.7566259590034103, + "grad_norm": 24127.41015625, + "learning_rate": 2.9341747524019368e-05, + "loss": 0.4078, + "step": 146650 + }, + { + "epoch": 0.7568839289860232, + "grad_norm": 27476.791015625, + "learning_rate": 2.9308486636950254e-05, + "loss": 0.4063, + "step": 146700 + }, + { + "epoch": 0.757141898968636, + "grad_norm": 24664.61328125, + "learning_rate": 2.9275236794610328e-05, + "loss": 0.4086, + "step": 146750 + }, + { + "epoch": 0.7573998689512488, + "grad_norm": 23883.0625, + "learning_rate": 2.9241998014747664e-05, + "loss": 0.4046, + "step": 146800 + }, + { + "epoch": 0.7576578389338616, + "grad_norm": 23431.509765625, + "learning_rate": 2.9208770315104515e-05, + "loss": 0.4054, + "step": 146850 + }, + { + "epoch": 0.7579158089164745, + "grad_norm": 25177.9453125, + "learning_rate": 2.9175553713417176e-05, + "loss": 0.4094, + "step": 146900 + }, + { + "epoch": 0.7581737788990873, + "grad_norm": 22862.201171875, + "learning_rate": 2.9142348227416e-05, + "loss": 0.4073, + "step": 146950 + }, + { + "epoch": 0.7584317488817002, + "grad_norm": 21731.240234375, + "learning_rate": 2.9109153874825478e-05, + "loss": 0.4081, + "step": 147000 + }, + { + "epoch": 0.7586897188643129, + "grad_norm": 24952.87109375, + "learning_rate": 2.9075970673364083e-05, + "loss": 0.4092, + "step": 147050 + }, + { + "epoch": 0.7589476888469258, + "grad_norm": 23138.029296875, + "learning_rate": 2.9042798640744385e-05, + "loss": 0.4051, + "step": 147100 + }, + { + "epoch": 0.7592056588295386, + "grad_norm": 21496.501953125, + "learning_rate": 2.900963779467295e-05, + "loss": 0.4096, + "step": 147150 + }, + { + "epoch": 0.7594636288121515, + "grad_norm": 22243.36328125, + "learning_rate": 2.8976488152850406e-05, + "loss": 0.3985, + "step": 147200 + }, + { + "epoch": 0.7597215987947642, + "grad_norm": 24515.029296875, + "learning_rate": 2.894334973297137e-05, + "loss": 0.4043, + "step": 147250 + }, + { + "epoch": 0.759979568777377, + "grad_norm": 23431.802734375, + "learning_rate": 2.8910222552724553e-05, + "loss": 0.4092, + "step": 147300 + }, + { + "epoch": 0.7602375387599899, + "grad_norm": 24192.44140625, + "learning_rate": 2.8877106629792515e-05, + "loss": 0.413, + "step": 147350 + }, + { + "epoch": 0.7604955087426027, + "grad_norm": 24239.015625, + "learning_rate": 2.884400198185196e-05, + "loss": 0.4064, + "step": 147400 + }, + { + "epoch": 0.7607534787252155, + "grad_norm": 22407.27734375, + "learning_rate": 2.881090862657348e-05, + "loss": 0.4086, + "step": 147450 + }, + { + "epoch": 0.7610114487078283, + "grad_norm": 24915.517578125, + "learning_rate": 2.877782658162166e-05, + "loss": 0.4067, + "step": 147500 + }, + { + "epoch": 0.7612694186904412, + "grad_norm": 23721.33984375, + "learning_rate": 2.8744755864655098e-05, + "loss": 0.4078, + "step": 147550 + }, + { + "epoch": 0.761527388673054, + "grad_norm": 23041.625, + "learning_rate": 2.8711696493326233e-05, + "loss": 0.4092, + "step": 147600 + }, + { + "epoch": 0.7617853586556669, + "grad_norm": 24021.81640625, + "learning_rate": 2.867864848528158e-05, + "loss": 0.4116, + "step": 147650 + }, + { + "epoch": 0.7620433286382796, + "grad_norm": 21309.7890625, + "learning_rate": 2.8645611858161502e-05, + "loss": 0.414, + "step": 147700 + }, + { + "epoch": 0.7623012986208925, + "grad_norm": 21959.544921875, + "learning_rate": 2.8612586629600307e-05, + "loss": 0.4113, + "step": 147750 + }, + { + "epoch": 0.7625592686035053, + "grad_norm": 22090.75, + "learning_rate": 2.857957281722623e-05, + "loss": 0.41, + "step": 147800 + }, + { + "epoch": 0.7628172385861182, + "grad_norm": 21273.6640625, + "learning_rate": 2.854657043866138e-05, + "loss": 0.4043, + "step": 147850 + }, + { + "epoch": 0.7630752085687309, + "grad_norm": 22781.33984375, + "learning_rate": 2.8513579511521825e-05, + "loss": 0.4009, + "step": 147900 + }, + { + "epoch": 0.7633331785513437, + "grad_norm": 24383.95703125, + "learning_rate": 2.8480600053417472e-05, + "loss": 0.4077, + "step": 147950 + }, + { + "epoch": 0.7635911485339566, + "grad_norm": 23988.673828125, + "learning_rate": 2.8447632081952104e-05, + "loss": 0.4048, + "step": 148000 + }, + { + "epoch": 0.7638491185165694, + "grad_norm": 24593.1484375, + "learning_rate": 2.8414675614723397e-05, + "loss": 0.4145, + "step": 148050 + }, + { + "epoch": 0.7641070884991822, + "grad_norm": 25818.216796875, + "learning_rate": 2.838173066932287e-05, + "loss": 0.408, + "step": 148100 + }, + { + "epoch": 0.764365058481795, + "grad_norm": 25780.1796875, + "learning_rate": 2.8348797263335886e-05, + "loss": 0.4109, + "step": 148150 + }, + { + "epoch": 0.7646230284644079, + "grad_norm": 22835.51171875, + "learning_rate": 2.8315875414341687e-05, + "loss": 0.4037, + "step": 148200 + }, + { + "epoch": 0.7648809984470207, + "grad_norm": 22711.501953125, + "learning_rate": 2.8282965139913296e-05, + "loss": 0.4117, + "step": 148250 + }, + { + "epoch": 0.7651389684296336, + "grad_norm": 22654.080078125, + "learning_rate": 2.825006645761758e-05, + "loss": 0.4094, + "step": 148300 + }, + { + "epoch": 0.7653969384122463, + "grad_norm": 23406.8671875, + "learning_rate": 2.821717938501526e-05, + "loss": 0.4096, + "step": 148350 + }, + { + "epoch": 0.7656549083948592, + "grad_norm": 23591.68359375, + "learning_rate": 2.8184303939660745e-05, + "loss": 0.4087, + "step": 148400 + }, + { + "epoch": 0.765912878377472, + "grad_norm": 21550.94140625, + "learning_rate": 2.815144013910237e-05, + "loss": 0.4046, + "step": 148450 + }, + { + "epoch": 0.7661708483600849, + "grad_norm": 23503.48046875, + "learning_rate": 2.8118588000882177e-05, + "loss": 0.4116, + "step": 148500 + }, + { + "epoch": 0.7664288183426976, + "grad_norm": 25247.244140625, + "learning_rate": 2.8085747542536e-05, + "loss": 0.4023, + "step": 148550 + }, + { + "epoch": 0.7666867883253105, + "grad_norm": 23665.91796875, + "learning_rate": 2.805291878159344e-05, + "loss": 0.4117, + "step": 148600 + }, + { + "epoch": 0.7669447583079233, + "grad_norm": 22785.59765625, + "learning_rate": 2.8020101735577837e-05, + "loss": 0.4084, + "step": 148650 + }, + { + "epoch": 0.7672027282905362, + "grad_norm": 20447.72265625, + "learning_rate": 2.7987296422006327e-05, + "loss": 0.4091, + "step": 148700 + }, + { + "epoch": 0.7674606982731489, + "grad_norm": 24965.869140625, + "learning_rate": 2.795450285838974e-05, + "loss": 0.4067, + "step": 148750 + }, + { + "epoch": 0.7677186682557617, + "grad_norm": 24323.09765625, + "learning_rate": 2.7921721062232637e-05, + "loss": 0.4037, + "step": 148800 + }, + { + "epoch": 0.7679766382383746, + "grad_norm": 23956.177734375, + "learning_rate": 2.7888951051033314e-05, + "loss": 0.4079, + "step": 148850 + }, + { + "epoch": 0.7682346082209874, + "grad_norm": 24222.4140625, + "learning_rate": 2.7856192842283756e-05, + "loss": 0.4112, + "step": 148900 + }, + { + "epoch": 0.7684925782036002, + "grad_norm": 24444.046875, + "learning_rate": 2.782344645346966e-05, + "loss": 0.4148, + "step": 148950 + }, + { + "epoch": 0.768750548186213, + "grad_norm": 23160.578125, + "learning_rate": 2.779071190207046e-05, + "loss": 0.4063, + "step": 149000 + }, + { + "epoch": 0.7690085181688259, + "grad_norm": 25806.732421875, + "learning_rate": 2.7757989205559142e-05, + "loss": 0.4112, + "step": 149050 + }, + { + "epoch": 0.7692664881514387, + "grad_norm": 21389.734375, + "learning_rate": 2.7725278381402524e-05, + "loss": 0.4104, + "step": 149100 + }, + { + "epoch": 0.7695244581340516, + "grad_norm": 23550.23828125, + "learning_rate": 2.769257944706098e-05, + "loss": 0.4121, + "step": 149150 + }, + { + "epoch": 0.7697824281166643, + "grad_norm": 21442.373046875, + "learning_rate": 2.765989241998854e-05, + "loss": 0.4087, + "step": 149200 + }, + { + "epoch": 0.7700403980992772, + "grad_norm": 23958.978515625, + "learning_rate": 2.7627217317632993e-05, + "loss": 0.4136, + "step": 149250 + }, + { + "epoch": 0.77029836808189, + "grad_norm": 22143.07421875, + "learning_rate": 2.759455415743556e-05, + "loss": 0.41, + "step": 149300 + }, + { + "epoch": 0.7705563380645029, + "grad_norm": 22873.86328125, + "learning_rate": 2.7561902956831294e-05, + "loss": 0.4094, + "step": 149350 + }, + { + "epoch": 0.7708143080471156, + "grad_norm": 22419.3046875, + "learning_rate": 2.7529263733248734e-05, + "loss": 0.4133, + "step": 149400 + }, + { + "epoch": 0.7710722780297284, + "grad_norm": 22167.474609375, + "learning_rate": 2.7496636504110075e-05, + "loss": 0.4181, + "step": 149450 + }, + { + "epoch": 0.7713302480123413, + "grad_norm": 25449.96875, + "learning_rate": 2.74640212868311e-05, + "loss": 0.412, + "step": 149500 + }, + { + "epoch": 0.7715882179949541, + "grad_norm": 22876.767578125, + "learning_rate": 2.7431418098821154e-05, + "loss": 0.4087, + "step": 149550 + }, + { + "epoch": 0.7718461879775669, + "grad_norm": 25600.65625, + "learning_rate": 2.7398826957483235e-05, + "loss": 0.4133, + "step": 149600 + }, + { + "epoch": 0.7721041579601797, + "grad_norm": 21764.0, + "learning_rate": 2.7366247880213834e-05, + "loss": 0.4073, + "step": 149650 + }, + { + "epoch": 0.7723621279427926, + "grad_norm": 21836.0625, + "learning_rate": 2.7333680884403046e-05, + "loss": 0.4165, + "step": 149700 + }, + { + "epoch": 0.7726200979254054, + "grad_norm": 22049.466796875, + "learning_rate": 2.7301125987434496e-05, + "loss": 0.4104, + "step": 149750 + }, + { + "epoch": 0.7728780679080183, + "grad_norm": 25398.28515625, + "learning_rate": 2.7268583206685348e-05, + "loss": 0.4036, + "step": 149800 + }, + { + "epoch": 0.773136037890631, + "grad_norm": 22303.654296875, + "learning_rate": 2.72360525595263e-05, + "loss": 0.4077, + "step": 149850 + }, + { + "epoch": 0.7733940078732439, + "grad_norm": 24734.65234375, + "learning_rate": 2.7203534063321633e-05, + "loss": 0.409, + "step": 149900 + }, + { + "epoch": 0.7736519778558567, + "grad_norm": 22068.283203125, + "learning_rate": 2.7171027735429023e-05, + "loss": 0.4148, + "step": 149950 + }, + { + "epoch": 0.7739099478384696, + "grad_norm": 23250.4921875, + "learning_rate": 2.7138533593199766e-05, + "loss": 0.4062, + "step": 150000 + }, + { + "epoch": 0.7739099478384696, + "eval_loss": 0.3953176736831665, + "eval_runtime": 3196.6561, + "eval_samples_per_second": 970.114, + "eval_steps_per_second": 1.895, + "step": 150000 + }, + { + "epoch": 0.7741679178210823, + "grad_norm": 26452.75390625, + "learning_rate": 2.710605165397859e-05, + "loss": 0.4098, + "step": 150050 + }, + { + "epoch": 0.7744258878036951, + "grad_norm": 23934.783203125, + "learning_rate": 2.707358193510371e-05, + "loss": 0.4113, + "step": 150100 + }, + { + "epoch": 0.774683857786308, + "grad_norm": 22443.591796875, + "learning_rate": 2.7041124453906884e-05, + "loss": 0.4119, + "step": 150150 + }, + { + "epoch": 0.7749418277689208, + "grad_norm": 23333.529296875, + "learning_rate": 2.7008679227713214e-05, + "loss": 0.4029, + "step": 150200 + }, + { + "epoch": 0.7751997977515336, + "grad_norm": 22431.576171875, + "learning_rate": 2.6976246273841388e-05, + "loss": 0.4045, + "step": 150250 + }, + { + "epoch": 0.7754577677341464, + "grad_norm": 26959.68359375, + "learning_rate": 2.694382560960348e-05, + "loss": 0.4072, + "step": 150300 + }, + { + "epoch": 0.7757157377167593, + "grad_norm": 21064.66015625, + "learning_rate": 2.6911417252304994e-05, + "loss": 0.411, + "step": 150350 + }, + { + "epoch": 0.7759737076993721, + "grad_norm": 23242.583984375, + "learning_rate": 2.6879021219244906e-05, + "loss": 0.4075, + "step": 150400 + }, + { + "epoch": 0.776231677681985, + "grad_norm": 24738.037109375, + "learning_rate": 2.6846637527715546e-05, + "loss": 0.4069, + "step": 150450 + }, + { + "epoch": 0.7764896476645977, + "grad_norm": 23944.759765625, + "learning_rate": 2.681426619500277e-05, + "loss": 0.403, + "step": 150500 + }, + { + "epoch": 0.7767476176472106, + "grad_norm": 22064.611328125, + "learning_rate": 2.678190723838572e-05, + "loss": 0.4045, + "step": 150550 + }, + { + "epoch": 0.7770055876298234, + "grad_norm": 24025.298828125, + "learning_rate": 2.6749560675137002e-05, + "loss": 0.4087, + "step": 150600 + }, + { + "epoch": 0.7772635576124363, + "grad_norm": 20863.119140625, + "learning_rate": 2.6717226522522553e-05, + "loss": 0.4087, + "step": 150650 + }, + { + "epoch": 0.777521527595049, + "grad_norm": 24537.642578125, + "learning_rate": 2.668490479780179e-05, + "loss": 0.4127, + "step": 150700 + }, + { + "epoch": 0.7777794975776618, + "grad_norm": 24400.193359375, + "learning_rate": 2.665259551822733e-05, + "loss": 0.4066, + "step": 150750 + }, + { + "epoch": 0.7780374675602747, + "grad_norm": 25251.81640625, + "learning_rate": 2.6620298701045322e-05, + "loss": 0.4111, + "step": 150800 + }, + { + "epoch": 0.7782954375428875, + "grad_norm": 23078.0, + "learning_rate": 2.658801436349511e-05, + "loss": 0.4109, + "step": 150850 + }, + { + "epoch": 0.7785534075255003, + "grad_norm": 20437.556640625, + "learning_rate": 2.655574252280949e-05, + "loss": 0.4096, + "step": 150900 + }, + { + "epoch": 0.7788113775081131, + "grad_norm": 24091.796875, + "learning_rate": 2.652348319621457e-05, + "loss": 0.4097, + "step": 150950 + }, + { + "epoch": 0.779069347490726, + "grad_norm": 22893.6640625, + "learning_rate": 2.6491236400929686e-05, + "loss": 0.4093, + "step": 151000 + }, + { + "epoch": 0.7793273174733388, + "grad_norm": 22871.80859375, + "learning_rate": 2.645900215416761e-05, + "loss": 0.407, + "step": 151050 + }, + { + "epoch": 0.7795852874559517, + "grad_norm": 21766.30078125, + "learning_rate": 2.642678047313435e-05, + "loss": 0.4071, + "step": 151100 + }, + { + "epoch": 0.7798432574385644, + "grad_norm": 24945.544921875, + "learning_rate": 2.639457137502919e-05, + "loss": 0.4073, + "step": 151150 + }, + { + "epoch": 0.7801012274211773, + "grad_norm": 22374.009765625, + "learning_rate": 2.636237487704475e-05, + "loss": 0.409, + "step": 151200 + }, + { + "epoch": 0.7803591974037901, + "grad_norm": 23499.08984375, + "learning_rate": 2.6330190996366875e-05, + "loss": 0.4087, + "step": 151250 + }, + { + "epoch": 0.780617167386403, + "grad_norm": 24672.017578125, + "learning_rate": 2.629801975017469e-05, + "loss": 0.4075, + "step": 151300 + }, + { + "epoch": 0.7808751373690157, + "grad_norm": 23105.05078125, + "learning_rate": 2.6265861155640626e-05, + "loss": 0.4031, + "step": 151350 + }, + { + "epoch": 0.7811331073516286, + "grad_norm": 23226.171875, + "learning_rate": 2.6233715229930282e-05, + "loss": 0.4137, + "step": 151400 + }, + { + "epoch": 0.7813910773342414, + "grad_norm": 24494.732421875, + "learning_rate": 2.620158199020255e-05, + "loss": 0.4089, + "step": 151450 + }, + { + "epoch": 0.7816490473168543, + "grad_norm": 24024.236328125, + "learning_rate": 2.616946145360952e-05, + "loss": 0.4084, + "step": 151500 + }, + { + "epoch": 0.781907017299467, + "grad_norm": 21957.2265625, + "learning_rate": 2.613735363729649e-05, + "loss": 0.4079, + "step": 151550 + }, + { + "epoch": 0.7821649872820798, + "grad_norm": 22637.291015625, + "learning_rate": 2.6105258558402056e-05, + "loss": 0.4093, + "step": 151600 + }, + { + "epoch": 0.7824229572646927, + "grad_norm": 27436.56640625, + "learning_rate": 2.607317623405787e-05, + "loss": 0.4054, + "step": 151650 + }, + { + "epoch": 0.7826809272473055, + "grad_norm": 21909.509765625, + "learning_rate": 2.6041106681388922e-05, + "loss": 0.4052, + "step": 151700 + }, + { + "epoch": 0.7829388972299183, + "grad_norm": 22887.494140625, + "learning_rate": 2.6009049917513283e-05, + "loss": 0.408, + "step": 151750 + }, + { + "epoch": 0.7831968672125311, + "grad_norm": 20771.53125, + "learning_rate": 2.5977005959542222e-05, + "loss": 0.4052, + "step": 151800 + }, + { + "epoch": 0.783454837195144, + "grad_norm": 22012.322265625, + "learning_rate": 2.5944974824580244e-05, + "loss": 0.4053, + "step": 151850 + }, + { + "epoch": 0.7837128071777568, + "grad_norm": 25365.822265625, + "learning_rate": 2.5912956529724865e-05, + "loss": 0.4141, + "step": 151900 + }, + { + "epoch": 0.7839707771603697, + "grad_norm": 23211.658203125, + "learning_rate": 2.5880951092066885e-05, + "loss": 0.4094, + "step": 151950 + }, + { + "epoch": 0.7842287471429824, + "grad_norm": 21514.79296875, + "learning_rate": 2.584895852869018e-05, + "loss": 0.4056, + "step": 152000 + }, + { + "epoch": 0.7844867171255953, + "grad_norm": 23275.76953125, + "learning_rate": 2.581697885667176e-05, + "loss": 0.4076, + "step": 152050 + }, + { + "epoch": 0.7847446871082081, + "grad_norm": 24080.478515625, + "learning_rate": 2.578501209308174e-05, + "loss": 0.409, + "step": 152100 + }, + { + "epoch": 0.785002657090821, + "grad_norm": 23384.275390625, + "learning_rate": 2.5753058254983376e-05, + "loss": 0.4063, + "step": 152150 + }, + { + "epoch": 0.7852606270734337, + "grad_norm": 22736.451171875, + "learning_rate": 2.572111735943298e-05, + "loss": 0.4054, + "step": 152200 + }, + { + "epoch": 0.7855185970560465, + "grad_norm": 24730.462890625, + "learning_rate": 2.568918942348002e-05, + "loss": 0.4074, + "step": 152250 + }, + { + "epoch": 0.7857765670386594, + "grad_norm": 23020.759765625, + "learning_rate": 2.5657274464166996e-05, + "loss": 0.4143, + "step": 152300 + }, + { + "epoch": 0.7860345370212722, + "grad_norm": 22263.357421875, + "learning_rate": 2.56253724985295e-05, + "loss": 0.4075, + "step": 152350 + }, + { + "epoch": 0.786292507003885, + "grad_norm": 23515.408203125, + "learning_rate": 2.5593483543596165e-05, + "loss": 0.4055, + "step": 152400 + }, + { + "epoch": 0.7865504769864978, + "grad_norm": 21960.447265625, + "learning_rate": 2.55616076163887e-05, + "loss": 0.407, + "step": 152450 + }, + { + "epoch": 0.7868084469691107, + "grad_norm": 26880.94140625, + "learning_rate": 2.55297447339219e-05, + "loss": 0.4029, + "step": 152500 + }, + { + "epoch": 0.7870664169517235, + "grad_norm": 22276.259765625, + "learning_rate": 2.5497894913203492e-05, + "loss": 0.4038, + "step": 152550 + }, + { + "epoch": 0.7873243869343364, + "grad_norm": 22566.541015625, + "learning_rate": 2.5466058171234336e-05, + "loss": 0.4055, + "step": 152600 + }, + { + "epoch": 0.7875823569169491, + "grad_norm": 24620.486328125, + "learning_rate": 2.543423452500826e-05, + "loss": 0.4031, + "step": 152650 + }, + { + "epoch": 0.787840326899562, + "grad_norm": 24162.99609375, + "learning_rate": 2.540242399151208e-05, + "loss": 0.4075, + "step": 152700 + }, + { + "epoch": 0.7880982968821748, + "grad_norm": 25309.958984375, + "learning_rate": 2.537062658772572e-05, + "loss": 0.4052, + "step": 152750 + }, + { + "epoch": 0.7883562668647877, + "grad_norm": 22024.390625, + "learning_rate": 2.533884233062192e-05, + "loss": 0.4036, + "step": 152800 + }, + { + "epoch": 0.7886142368474004, + "grad_norm": 22356.041015625, + "learning_rate": 2.530707123716657e-05, + "loss": 0.4065, + "step": 152850 + }, + { + "epoch": 0.7888722068300132, + "grad_norm": 22957.642578125, + "learning_rate": 2.527531332431844e-05, + "loss": 0.403, + "step": 152900 + }, + { + "epoch": 0.7891301768126261, + "grad_norm": 22161.298828125, + "learning_rate": 2.52435686090293e-05, + "loss": 0.4046, + "step": 152950 + }, + { + "epoch": 0.7893881467952389, + "grad_norm": 22849.720703125, + "learning_rate": 2.5211837108243847e-05, + "loss": 0.4045, + "step": 153000 + }, + { + "epoch": 0.7896461167778517, + "grad_norm": 25891.248046875, + "learning_rate": 2.5180118838899756e-05, + "loss": 0.4083, + "step": 153050 + }, + { + "epoch": 0.7899040867604645, + "grad_norm": 23150.634765625, + "learning_rate": 2.5148413817927598e-05, + "loss": 0.4104, + "step": 153100 + }, + { + "epoch": 0.7901620567430774, + "grad_norm": 23457.515625, + "learning_rate": 2.511672206225094e-05, + "loss": 0.4101, + "step": 153150 + }, + { + "epoch": 0.7904200267256902, + "grad_norm": 21316.8828125, + "learning_rate": 2.508504358878621e-05, + "loss": 0.4091, + "step": 153200 + }, + { + "epoch": 0.7906779967083031, + "grad_norm": 25747.87109375, + "learning_rate": 2.5053378414442748e-05, + "loss": 0.4131, + "step": 153250 + }, + { + "epoch": 0.7909359666909158, + "grad_norm": 21499.56640625, + "learning_rate": 2.502172655612286e-05, + "loss": 0.4028, + "step": 153300 + }, + { + "epoch": 0.7911939366735287, + "grad_norm": 22949.970703125, + "learning_rate": 2.499008803072162e-05, + "loss": 0.4078, + "step": 153350 + }, + { + "epoch": 0.7914519066561415, + "grad_norm": 26207.181640625, + "learning_rate": 2.495846285512714e-05, + "loss": 0.4064, + "step": 153400 + }, + { + "epoch": 0.7917098766387544, + "grad_norm": 25037.625, + "learning_rate": 2.4926851046220246e-05, + "loss": 0.4067, + "step": 153450 + }, + { + "epoch": 0.7919678466213671, + "grad_norm": 24114.482421875, + "learning_rate": 2.4895252620874775e-05, + "loss": 0.4123, + "step": 153500 + }, + { + "epoch": 0.79222581660398, + "grad_norm": 24953.568359375, + "learning_rate": 2.4863667595957325e-05, + "loss": 0.4083, + "step": 153550 + }, + { + "epoch": 0.7924837865865928, + "grad_norm": 24928.2265625, + "learning_rate": 2.483209598832736e-05, + "loss": 0.4066, + "step": 153600 + }, + { + "epoch": 0.7927417565692056, + "grad_norm": 24045.166015625, + "learning_rate": 2.4800537814837227e-05, + "loss": 0.4056, + "step": 153650 + }, + { + "epoch": 0.7929997265518184, + "grad_norm": 24591.826171875, + "learning_rate": 2.476899309233205e-05, + "loss": 0.4094, + "step": 153700 + }, + { + "epoch": 0.7932576965344312, + "grad_norm": 23336.810546875, + "learning_rate": 2.4737461837649782e-05, + "loss": 0.41, + "step": 153750 + }, + { + "epoch": 0.7935156665170441, + "grad_norm": 23454.171875, + "learning_rate": 2.4705944067621216e-05, + "loss": 0.4068, + "step": 153800 + }, + { + "epoch": 0.7937736364996569, + "grad_norm": 25322.201171875, + "learning_rate": 2.467443979906991e-05, + "loss": 0.4097, + "step": 153850 + }, + { + "epoch": 0.7940316064822697, + "grad_norm": 24731.580078125, + "learning_rate": 2.464294904881222e-05, + "loss": 0.4028, + "step": 153900 + }, + { + "epoch": 0.7942895764648825, + "grad_norm": 21753.568359375, + "learning_rate": 2.4611471833657356e-05, + "loss": 0.4148, + "step": 153950 + }, + { + "epoch": 0.7945475464474954, + "grad_norm": 26548.966796875, + "learning_rate": 2.458000817040717e-05, + "loss": 0.4074, + "step": 154000 + }, + { + "epoch": 0.7948055164301082, + "grad_norm": 21149.470703125, + "learning_rate": 2.4548558075856414e-05, + "loss": 0.408, + "step": 154050 + }, + { + "epoch": 0.7950634864127211, + "grad_norm": 25742.859375, + "learning_rate": 2.4517121566792517e-05, + "loss": 0.405, + "step": 154100 + }, + { + "epoch": 0.7953214563953338, + "grad_norm": 20954.91796875, + "learning_rate": 2.4485698659995658e-05, + "loss": 0.3975, + "step": 154150 + }, + { + "epoch": 0.7955794263779467, + "grad_norm": 23551.646484375, + "learning_rate": 2.445428937223884e-05, + "loss": 0.4059, + "step": 154200 + }, + { + "epoch": 0.7958373963605595, + "grad_norm": 25214.693359375, + "learning_rate": 2.4422893720287654e-05, + "loss": 0.4008, + "step": 154250 + }, + { + "epoch": 0.7960953663431724, + "grad_norm": 25346.916015625, + "learning_rate": 2.4391511720900545e-05, + "loss": 0.4035, + "step": 154300 + }, + { + "epoch": 0.7963533363257851, + "grad_norm": 21641.23828125, + "learning_rate": 2.43601433908286e-05, + "loss": 0.4069, + "step": 154350 + }, + { + "epoch": 0.7966113063083979, + "grad_norm": 22860.998046875, + "learning_rate": 2.4328788746815628e-05, + "loss": 0.4022, + "step": 154400 + }, + { + "epoch": 0.7968692762910108, + "grad_norm": 21989.96484375, + "learning_rate": 2.429744780559813e-05, + "loss": 0.4055, + "step": 154450 + }, + { + "epoch": 0.7971272462736236, + "grad_norm": 24413.74609375, + "learning_rate": 2.4266120583905272e-05, + "loss": 0.412, + "step": 154500 + }, + { + "epoch": 0.7973852162562364, + "grad_norm": 24805.859375, + "learning_rate": 2.4234807098458957e-05, + "loss": 0.41, + "step": 154550 + }, + { + "epoch": 0.7976431862388492, + "grad_norm": 23658.326171875, + "learning_rate": 2.42035073659737e-05, + "loss": 0.41, + "step": 154600 + }, + { + "epoch": 0.7979011562214621, + "grad_norm": 25225.228515625, + "learning_rate": 2.417222140315669e-05, + "loss": 0.4069, + "step": 154650 + }, + { + "epoch": 0.7981591262040749, + "grad_norm": 23417.3828125, + "learning_rate": 2.414094922670777e-05, + "loss": 0.4102, + "step": 154700 + }, + { + "epoch": 0.7984170961866878, + "grad_norm": 25014.5078125, + "learning_rate": 2.4109690853319422e-05, + "loss": 0.412, + "step": 154750 + }, + { + "epoch": 0.7986750661693005, + "grad_norm": 25523.3125, + "learning_rate": 2.407844629967674e-05, + "loss": 0.4102, + "step": 154800 + }, + { + "epoch": 0.7989330361519134, + "grad_norm": 23173.44921875, + "learning_rate": 2.404721558245752e-05, + "loss": 0.407, + "step": 154850 + }, + { + "epoch": 0.7991910061345262, + "grad_norm": 24673.5078125, + "learning_rate": 2.401599871833204e-05, + "loss": 0.4054, + "step": 154900 + }, + { + "epoch": 0.799448976117139, + "grad_norm": 24709.765625, + "learning_rate": 2.398479572396331e-05, + "loss": 0.4097, + "step": 154950 + }, + { + "epoch": 0.7997069460997518, + "grad_norm": 22404.29296875, + "learning_rate": 2.395360661600687e-05, + "loss": 0.4072, + "step": 155000 + }, + { + "epoch": 0.7997069460997518, + "eval_loss": 0.39372530579566956, + "eval_runtime": 3195.8879, + "eval_samples_per_second": 970.347, + "eval_steps_per_second": 1.895, + "step": 155000 + }, + { + "epoch": 0.7999649160823646, + "grad_norm": 24004.09375, + "learning_rate": 2.3922431411110834e-05, + "loss": 0.4016, + "step": 155050 + }, + { + "epoch": 0.8002228860649775, + "grad_norm": 25013.6484375, + "learning_rate": 2.3891270125915992e-05, + "loss": 0.4068, + "step": 155100 + }, + { + "epoch": 0.8004808560475903, + "grad_norm": 23532.982421875, + "learning_rate": 2.3860122777055553e-05, + "loss": 0.4036, + "step": 155150 + }, + { + "epoch": 0.8007388260302031, + "grad_norm": 27413.044921875, + "learning_rate": 2.3828989381155426e-05, + "loss": 0.4098, + "step": 155200 + }, + { + "epoch": 0.8009967960128159, + "grad_norm": 25821.794921875, + "learning_rate": 2.379786995483399e-05, + "loss": 0.4076, + "step": 155250 + }, + { + "epoch": 0.8012547659954288, + "grad_norm": 23864.154296875, + "learning_rate": 2.37667645147022e-05, + "loss": 0.4082, + "step": 155300 + }, + { + "epoch": 0.8015127359780416, + "grad_norm": 22892.451171875, + "learning_rate": 2.3735673077363534e-05, + "loss": 0.4116, + "step": 155350 + }, + { + "epoch": 0.8017707059606545, + "grad_norm": 24638.51953125, + "learning_rate": 2.3704595659413987e-05, + "loss": 0.4015, + "step": 155400 + }, + { + "epoch": 0.8020286759432672, + "grad_norm": 23007.734375, + "learning_rate": 2.3673532277442112e-05, + "loss": 0.4075, + "step": 155450 + }, + { + "epoch": 0.8022866459258801, + "grad_norm": 25629.17578125, + "learning_rate": 2.364248294802892e-05, + "loss": 0.4031, + "step": 155500 + }, + { + "epoch": 0.8025446159084929, + "grad_norm": 23949.939453125, + "learning_rate": 2.3611447687747955e-05, + "loss": 0.4091, + "step": 155550 + }, + { + "epoch": 0.8028025858911058, + "grad_norm": 23120.3515625, + "learning_rate": 2.3580426513165228e-05, + "loss": 0.4106, + "step": 155600 + }, + { + "epoch": 0.8030605558737185, + "grad_norm": 26965.955078125, + "learning_rate": 2.3549419440839236e-05, + "loss": 0.4054, + "step": 155650 + }, + { + "epoch": 0.8033185258563313, + "grad_norm": 23370.33984375, + "learning_rate": 2.3518426487320948e-05, + "loss": 0.407, + "step": 155700 + }, + { + "epoch": 0.8035764958389442, + "grad_norm": 22571.12890625, + "learning_rate": 2.3487447669153833e-05, + "loss": 0.4118, + "step": 155750 + }, + { + "epoch": 0.803834465821557, + "grad_norm": 24092.56640625, + "learning_rate": 2.3456483002873768e-05, + "loss": 0.4053, + "step": 155800 + }, + { + "epoch": 0.8040924358041698, + "grad_norm": 24549.140625, + "learning_rate": 2.3425532505009072e-05, + "loss": 0.405, + "step": 155850 + }, + { + "epoch": 0.8043504057867826, + "grad_norm": 23510.904296875, + "learning_rate": 2.3394596192080574e-05, + "loss": 0.4049, + "step": 155900 + }, + { + "epoch": 0.8046083757693955, + "grad_norm": 23147.369140625, + "learning_rate": 2.3363674080601416e-05, + "loss": 0.4032, + "step": 155950 + }, + { + "epoch": 0.8048663457520083, + "grad_norm": 21877.10546875, + "learning_rate": 2.3332766187077264e-05, + "loss": 0.4006, + "step": 156000 + }, + { + "epoch": 0.8051243157346211, + "grad_norm": 24041.384765625, + "learning_rate": 2.330187252800614e-05, + "loss": 0.4056, + "step": 156050 + }, + { + "epoch": 0.8053822857172339, + "grad_norm": 23452.453125, + "learning_rate": 2.327099311987848e-05, + "loss": 0.4071, + "step": 156100 + }, + { + "epoch": 0.8056402556998468, + "grad_norm": 23023.5859375, + "learning_rate": 2.3240127979177123e-05, + "loss": 0.4095, + "step": 156150 + }, + { + "epoch": 0.8058982256824596, + "grad_norm": 23684.615234375, + "learning_rate": 2.3209277122377255e-05, + "loss": 0.4023, + "step": 156200 + }, + { + "epoch": 0.8061561956650725, + "grad_norm": 22598.732421875, + "learning_rate": 2.31784405659465e-05, + "loss": 0.4013, + "step": 156250 + }, + { + "epoch": 0.8064141656476852, + "grad_norm": 21835.93359375, + "learning_rate": 2.3147618326344804e-05, + "loss": 0.4072, + "step": 156300 + }, + { + "epoch": 0.806672135630298, + "grad_norm": 26343.41015625, + "learning_rate": 2.311681042002448e-05, + "loss": 0.4154, + "step": 156350 + }, + { + "epoch": 0.8069301056129109, + "grad_norm": 24116.162109375, + "learning_rate": 2.3086016863430193e-05, + "loss": 0.4032, + "step": 156400 + }, + { + "epoch": 0.8071880755955237, + "grad_norm": 23874.53515625, + "learning_rate": 2.3055237672998946e-05, + "loss": 0.4063, + "step": 156450 + }, + { + "epoch": 0.8074460455781365, + "grad_norm": 25624.203125, + "learning_rate": 2.302447286516006e-05, + "loss": 0.4034, + "step": 156500 + }, + { + "epoch": 0.8077040155607493, + "grad_norm": 22652.2109375, + "learning_rate": 2.2993722456335236e-05, + "loss": 0.4049, + "step": 156550 + }, + { + "epoch": 0.8079619855433622, + "grad_norm": 26234.255859375, + "learning_rate": 2.2962986462938385e-05, + "loss": 0.4035, + "step": 156600 + }, + { + "epoch": 0.808219955525975, + "grad_norm": 24374.974609375, + "learning_rate": 2.293226490137584e-05, + "loss": 0.4052, + "step": 156650 + }, + { + "epoch": 0.8084779255085878, + "grad_norm": 24195.4296875, + "learning_rate": 2.2901557788046146e-05, + "loss": 0.4072, + "step": 156700 + }, + { + "epoch": 0.8087358954912006, + "grad_norm": 24590.525390625, + "learning_rate": 2.2870865139340165e-05, + "loss": 0.4092, + "step": 156750 + }, + { + "epoch": 0.8089938654738135, + "grad_norm": 20863.509765625, + "learning_rate": 2.2840186971641083e-05, + "loss": 0.4073, + "step": 156800 + }, + { + "epoch": 0.8092518354564263, + "grad_norm": 23662.16015625, + "learning_rate": 2.2809523301324238e-05, + "loss": 0.4101, + "step": 156850 + }, + { + "epoch": 0.8095098054390392, + "grad_norm": 21700.666015625, + "learning_rate": 2.2778874144757357e-05, + "loss": 0.4075, + "step": 156900 + }, + { + "epoch": 0.8097677754216519, + "grad_norm": 29026.71484375, + "learning_rate": 2.274823951830036e-05, + "loss": 0.4005, + "step": 156950 + }, + { + "epoch": 0.8100257454042648, + "grad_norm": 27310.48828125, + "learning_rate": 2.2717619438305397e-05, + "loss": 0.4058, + "step": 157000 + }, + { + "epoch": 0.8102837153868776, + "grad_norm": 25008.673828125, + "learning_rate": 2.2687013921116895e-05, + "loss": 0.404, + "step": 157050 + }, + { + "epoch": 0.8105416853694904, + "grad_norm": 22623.57421875, + "learning_rate": 2.2656422983071452e-05, + "loss": 0.4059, + "step": 157100 + }, + { + "epoch": 0.8107996553521032, + "grad_norm": 23960.427734375, + "learning_rate": 2.2625846640497965e-05, + "loss": 0.4096, + "step": 157150 + }, + { + "epoch": 0.811057625334716, + "grad_norm": 22415.021484375, + "learning_rate": 2.2595284909717475e-05, + "loss": 0.4061, + "step": 157200 + }, + { + "epoch": 0.8113155953173289, + "grad_norm": 23358.822265625, + "learning_rate": 2.2564737807043233e-05, + "loss": 0.4003, + "step": 157250 + }, + { + "epoch": 0.8115735652999417, + "grad_norm": 21686.9765625, + "learning_rate": 2.2534205348780702e-05, + "loss": 0.4063, + "step": 157300 + }, + { + "epoch": 0.8118315352825545, + "grad_norm": 22949.484375, + "learning_rate": 2.2503687551227504e-05, + "loss": 0.407, + "step": 157350 + }, + { + "epoch": 0.8120895052651673, + "grad_norm": 21776.201171875, + "learning_rate": 2.2473184430673444e-05, + "loss": 0.4073, + "step": 157400 + }, + { + "epoch": 0.8123474752477802, + "grad_norm": 25641.17578125, + "learning_rate": 2.244269600340055e-05, + "loss": 0.4074, + "step": 157450 + }, + { + "epoch": 0.812605445230393, + "grad_norm": 22723.42578125, + "learning_rate": 2.2412222285682867e-05, + "loss": 0.4119, + "step": 157500 + }, + { + "epoch": 0.8128634152130059, + "grad_norm": 24244.48046875, + "learning_rate": 2.2381763293786746e-05, + "loss": 0.4157, + "step": 157550 + }, + { + "epoch": 0.8131213851956186, + "grad_norm": 26826.337890625, + "learning_rate": 2.235131904397058e-05, + "loss": 0.4102, + "step": 157600 + }, + { + "epoch": 0.8133793551782315, + "grad_norm": 23157.0546875, + "learning_rate": 2.232088955248491e-05, + "loss": 0.4121, + "step": 157650 + }, + { + "epoch": 0.8136373251608443, + "grad_norm": 23352.009765625, + "learning_rate": 2.229047483557245e-05, + "loss": 0.4054, + "step": 157700 + }, + { + "epoch": 0.8138952951434572, + "grad_norm": 24417.2734375, + "learning_rate": 2.2260074909467925e-05, + "loss": 0.4092, + "step": 157750 + }, + { + "epoch": 0.8141532651260699, + "grad_norm": 22345.669921875, + "learning_rate": 2.2229689790398283e-05, + "loss": 0.402, + "step": 157800 + }, + { + "epoch": 0.8144112351086827, + "grad_norm": 22904.20703125, + "learning_rate": 2.2199319494582492e-05, + "loss": 0.4067, + "step": 157850 + }, + { + "epoch": 0.8146692050912956, + "grad_norm": 24132.306640625, + "learning_rate": 2.216896403823162e-05, + "loss": 0.4094, + "step": 157900 + }, + { + "epoch": 0.8149271750739084, + "grad_norm": 24649.001953125, + "learning_rate": 2.2138623437548833e-05, + "loss": 0.4048, + "step": 157950 + }, + { + "epoch": 0.8151851450565212, + "grad_norm": 24956.458984375, + "learning_rate": 2.210829770872933e-05, + "loss": 0.4038, + "step": 158000 + }, + { + "epoch": 0.815443115039134, + "grad_norm": 24047.3515625, + "learning_rate": 2.2077986867960437e-05, + "loss": 0.407, + "step": 158050 + }, + { + "epoch": 0.8157010850217469, + "grad_norm": 22895.953125, + "learning_rate": 2.2047690931421476e-05, + "loss": 0.4033, + "step": 158100 + }, + { + "epoch": 0.8159590550043597, + "grad_norm": 22524.640625, + "learning_rate": 2.201740991528383e-05, + "loss": 0.4136, + "step": 158150 + }, + { + "epoch": 0.8162170249869725, + "grad_norm": 22507.46875, + "learning_rate": 2.1987143835710928e-05, + "loss": 0.4043, + "step": 158200 + }, + { + "epoch": 0.8164749949695853, + "grad_norm": 24044.5390625, + "learning_rate": 2.1956892708858202e-05, + "loss": 0.4099, + "step": 158250 + }, + { + "epoch": 0.8167329649521982, + "grad_norm": 26112.05859375, + "learning_rate": 2.1926656550873103e-05, + "loss": 0.4087, + "step": 158300 + }, + { + "epoch": 0.816990934934811, + "grad_norm": 25168.59375, + "learning_rate": 2.189643537789517e-05, + "loss": 0.4059, + "step": 158350 + }, + { + "epoch": 0.8172489049174239, + "grad_norm": 31289.392578125, + "learning_rate": 2.1866229206055804e-05, + "loss": 0.4048, + "step": 158400 + }, + { + "epoch": 0.8175068749000366, + "grad_norm": 27301.970703125, + "learning_rate": 2.1836038051478508e-05, + "loss": 0.4111, + "step": 158450 + }, + { + "epoch": 0.8177648448826494, + "grad_norm": 22742.66015625, + "learning_rate": 2.180586193027877e-05, + "loss": 0.3998, + "step": 158500 + }, + { + "epoch": 0.8180228148652623, + "grad_norm": 26745.51171875, + "learning_rate": 2.177570085856395e-05, + "loss": 0.4069, + "step": 158550 + }, + { + "epoch": 0.8182807848478751, + "grad_norm": 24821.93359375, + "learning_rate": 2.1745554852433502e-05, + "loss": 0.4057, + "step": 158600 + }, + { + "epoch": 0.8185387548304879, + "grad_norm": 24082.908203125, + "learning_rate": 2.1715423927978755e-05, + "loss": 0.4042, + "step": 158650 + }, + { + "epoch": 0.8187967248131007, + "grad_norm": 23584.001953125, + "learning_rate": 2.168530810128302e-05, + "loss": 0.4062, + "step": 158700 + }, + { + "epoch": 0.8190546947957136, + "grad_norm": 25795.326171875, + "learning_rate": 2.1655207388421532e-05, + "loss": 0.4101, + "step": 158750 + }, + { + "epoch": 0.8193126647783264, + "grad_norm": 22298.908203125, + "learning_rate": 2.1625121805461483e-05, + "loss": 0.4004, + "step": 158800 + }, + { + "epoch": 0.8195706347609392, + "grad_norm": 24439.970703125, + "learning_rate": 2.1595051368461943e-05, + "loss": 0.4078, + "step": 158850 + }, + { + "epoch": 0.819828604743552, + "grad_norm": 24895.5546875, + "learning_rate": 2.1564996093473975e-05, + "loss": 0.4008, + "step": 158900 + }, + { + "epoch": 0.8200865747261649, + "grad_norm": 27615.1171875, + "learning_rate": 2.153495599654048e-05, + "loss": 0.4051, + "step": 158950 + }, + { + "epoch": 0.8203445447087777, + "grad_norm": 22537.25390625, + "learning_rate": 2.150493109369628e-05, + "loss": 0.4078, + "step": 159000 + }, + { + "epoch": 0.8206025146913906, + "grad_norm": 23422.39453125, + "learning_rate": 2.1474921400968085e-05, + "loss": 0.3999, + "step": 159050 + }, + { + "epoch": 0.8208604846740033, + "grad_norm": 24678.099609375, + "learning_rate": 2.1444926934374475e-05, + "loss": 0.4038, + "step": 159100 + }, + { + "epoch": 0.8211184546566161, + "grad_norm": 25680.623046875, + "learning_rate": 2.1414947709925963e-05, + "loss": 0.4082, + "step": 159150 + }, + { + "epoch": 0.821376424639229, + "grad_norm": 26526.724609375, + "learning_rate": 2.1384983743624813e-05, + "loss": 0.4076, + "step": 159200 + }, + { + "epoch": 0.8216343946218418, + "grad_norm": 21391.701171875, + "learning_rate": 2.1355035051465265e-05, + "loss": 0.4003, + "step": 159250 + }, + { + "epoch": 0.8218923646044546, + "grad_norm": 22676.607421875, + "learning_rate": 2.1325101649433327e-05, + "loss": 0.4087, + "step": 159300 + }, + { + "epoch": 0.8221503345870674, + "grad_norm": 23139.802734375, + "learning_rate": 2.1295183553506855e-05, + "loss": 0.4102, + "step": 159350 + }, + { + "epoch": 0.8224083045696803, + "grad_norm": 23598.369140625, + "learning_rate": 2.1265280779655593e-05, + "loss": 0.4027, + "step": 159400 + }, + { + "epoch": 0.8226662745522931, + "grad_norm": 24068.453125, + "learning_rate": 2.1235393343841008e-05, + "loss": 0.4097, + "step": 159450 + }, + { + "epoch": 0.8229242445349059, + "grad_norm": 26833.779296875, + "learning_rate": 2.1205521262016476e-05, + "loss": 0.4094, + "step": 159500 + }, + { + "epoch": 0.8231822145175187, + "grad_norm": 21122.98046875, + "learning_rate": 2.1175664550127123e-05, + "loss": 0.4074, + "step": 159550 + }, + { + "epoch": 0.8234401845001316, + "grad_norm": 24398.310546875, + "learning_rate": 2.1145823224109884e-05, + "loss": 0.4081, + "step": 159600 + }, + { + "epoch": 0.8236981544827444, + "grad_norm": 20830.05078125, + "learning_rate": 2.111599729989348e-05, + "loss": 0.4031, + "step": 159650 + }, + { + "epoch": 0.8239561244653573, + "grad_norm": 24353.29296875, + "learning_rate": 2.108618679339841e-05, + "loss": 0.4037, + "step": 159700 + }, + { + "epoch": 0.82421409444797, + "grad_norm": 22828.130859375, + "learning_rate": 2.1056391720536928e-05, + "loss": 0.4021, + "step": 159750 + }, + { + "epoch": 0.8244720644305829, + "grad_norm": 21661.53515625, + "learning_rate": 2.1026612097213106e-05, + "loss": 0.4117, + "step": 159800 + }, + { + "epoch": 0.8247300344131957, + "grad_norm": 20191.279296875, + "learning_rate": 2.0996847939322707e-05, + "loss": 0.4088, + "step": 159850 + }, + { + "epoch": 0.8249880043958085, + "grad_norm": 23767.8125, + "learning_rate": 2.0967099262753258e-05, + "loss": 0.4035, + "step": 159900 + }, + { + "epoch": 0.8252459743784213, + "grad_norm": 24693.4609375, + "learning_rate": 2.093736608338405e-05, + "loss": 0.4135, + "step": 159950 + }, + { + "epoch": 0.8255039443610341, + "grad_norm": 22759.341796875, + "learning_rate": 2.0907648417086027e-05, + "loss": 0.4048, + "step": 160000 + }, + { + "epoch": 0.8255039443610341, + "eval_loss": 0.3925068974494934, + "eval_runtime": 3187.046, + "eval_samples_per_second": 973.039, + "eval_steps_per_second": 1.901, + "step": 160000 + }, + { + "epoch": 0.825761914343647, + "grad_norm": 25066.45703125, + "learning_rate": 2.0877946279721983e-05, + "loss": 0.4017, + "step": 160050 + }, + { + "epoch": 0.8260198843262598, + "grad_norm": 24734.384765625, + "learning_rate": 2.084825968714626e-05, + "loss": 0.4091, + "step": 160100 + }, + { + "epoch": 0.8262778543088726, + "grad_norm": 26498.201171875, + "learning_rate": 2.0818588655205045e-05, + "loss": 0.4028, + "step": 160150 + }, + { + "epoch": 0.8265358242914854, + "grad_norm": 23436.36328125, + "learning_rate": 2.0788933199736143e-05, + "loss": 0.4019, + "step": 160200 + }, + { + "epoch": 0.8267937942740983, + "grad_norm": 23851.89453125, + "learning_rate": 2.075929333656904e-05, + "loss": 0.4055, + "step": 160250 + }, + { + "epoch": 0.8270517642567111, + "grad_norm": 23416.0625, + "learning_rate": 2.0729669081524977e-05, + "loss": 0.4075, + "step": 160300 + }, + { + "epoch": 0.8273097342393239, + "grad_norm": 22208.994140625, + "learning_rate": 2.070006045041673e-05, + "loss": 0.4047, + "step": 160350 + }, + { + "epoch": 0.8275677042219367, + "grad_norm": 21291.3515625, + "learning_rate": 2.067046745904888e-05, + "loss": 0.405, + "step": 160400 + }, + { + "epoch": 0.8278256742045496, + "grad_norm": 24646.279296875, + "learning_rate": 2.0640890123217565e-05, + "loss": 0.4076, + "step": 160450 + }, + { + "epoch": 0.8280836441871624, + "grad_norm": 22018.609375, + "learning_rate": 2.0611328458710595e-05, + "loss": 0.406, + "step": 160500 + }, + { + "epoch": 0.8283416141697753, + "grad_norm": 30070.40234375, + "learning_rate": 2.0581782481307415e-05, + "loss": 0.4099, + "step": 160550 + }, + { + "epoch": 0.828599584152388, + "grad_norm": 24574.34375, + "learning_rate": 2.0552252206779098e-05, + "loss": 0.4035, + "step": 160600 + }, + { + "epoch": 0.8288575541350008, + "grad_norm": 23137.224609375, + "learning_rate": 2.0522737650888313e-05, + "loss": 0.4006, + "step": 160650 + }, + { + "epoch": 0.8291155241176137, + "grad_norm": 22633.23828125, + "learning_rate": 2.0493238829389393e-05, + "loss": 0.4064, + "step": 160700 + }, + { + "epoch": 0.8293734941002265, + "grad_norm": 23670.525390625, + "learning_rate": 2.046375575802822e-05, + "loss": 0.4084, + "step": 160750 + }, + { + "epoch": 0.8296314640828393, + "grad_norm": 24236.7890625, + "learning_rate": 2.043428845254229e-05, + "loss": 0.413, + "step": 160800 + }, + { + "epoch": 0.8298894340654521, + "grad_norm": 25734.12890625, + "learning_rate": 2.0404836928660676e-05, + "loss": 0.3992, + "step": 160850 + }, + { + "epoch": 0.830147404048065, + "grad_norm": 23417.83203125, + "learning_rate": 2.037540120210401e-05, + "loss": 0.4069, + "step": 160900 + }, + { + "epoch": 0.8304053740306778, + "grad_norm": 24619.853515625, + "learning_rate": 2.0345981288584575e-05, + "loss": 0.4002, + "step": 160950 + }, + { + "epoch": 0.8306633440132906, + "grad_norm": 21862.111328125, + "learning_rate": 2.031657720380608e-05, + "loss": 0.4012, + "step": 161000 + }, + { + "epoch": 0.8309213139959034, + "grad_norm": 23347.91015625, + "learning_rate": 2.0287188963463906e-05, + "loss": 0.4061, + "step": 161050 + }, + { + "epoch": 0.8311792839785163, + "grad_norm": 25119.107421875, + "learning_rate": 2.02578165832449e-05, + "loss": 0.4061, + "step": 161100 + }, + { + "epoch": 0.8314372539611291, + "grad_norm": 22684.50390625, + "learning_rate": 2.0228460078827466e-05, + "loss": 0.4062, + "step": 161150 + }, + { + "epoch": 0.831695223943742, + "grad_norm": 39309.30859375, + "learning_rate": 2.0199119465881565e-05, + "loss": 0.4091, + "step": 161200 + }, + { + "epoch": 0.8319531939263547, + "grad_norm": 22076.8125, + "learning_rate": 2.0169794760068632e-05, + "loss": 0.4052, + "step": 161250 + }, + { + "epoch": 0.8322111639089675, + "grad_norm": 26682.44140625, + "learning_rate": 2.0140485977041636e-05, + "loss": 0.405, + "step": 161300 + }, + { + "epoch": 0.8324691338915804, + "grad_norm": 24586.09375, + "learning_rate": 2.011119313244502e-05, + "loss": 0.4066, + "step": 161350 + }, + { + "epoch": 0.8327271038741932, + "grad_norm": 26363.5703125, + "learning_rate": 2.008191624191475e-05, + "loss": 0.4027, + "step": 161400 + }, + { + "epoch": 0.832985073856806, + "grad_norm": 24361.9921875, + "learning_rate": 2.0052655321078246e-05, + "loss": 0.4041, + "step": 161450 + }, + { + "epoch": 0.8332430438394188, + "grad_norm": 22026.951171875, + "learning_rate": 2.0023410385554466e-05, + "loss": 0.4068, + "step": 161500 + }, + { + "epoch": 0.8335010138220317, + "grad_norm": 24540.068359375, + "learning_rate": 1.9994181450953725e-05, + "loss": 0.4036, + "step": 161550 + }, + { + "epoch": 0.8337589838046445, + "grad_norm": 25837.857421875, + "learning_rate": 1.9964968532877916e-05, + "loss": 0.4052, + "step": 161600 + }, + { + "epoch": 0.8340169537872573, + "grad_norm": 23252.900390625, + "learning_rate": 1.993577164692031e-05, + "loss": 0.4021, + "step": 161650 + }, + { + "epoch": 0.8342749237698701, + "grad_norm": 25305.177734375, + "learning_rate": 1.990659080866562e-05, + "loss": 0.4089, + "step": 161700 + }, + { + "epoch": 0.834532893752483, + "grad_norm": 25317.89453125, + "learning_rate": 1.9877426033690066e-05, + "loss": 0.4082, + "step": 161750 + }, + { + "epoch": 0.8347908637350958, + "grad_norm": 25872.2109375, + "learning_rate": 1.984827733756117e-05, + "loss": 0.4021, + "step": 161800 + }, + { + "epoch": 0.8350488337177087, + "grad_norm": 23915.955078125, + "learning_rate": 1.9819144735837998e-05, + "loss": 0.4054, + "step": 161850 + }, + { + "epoch": 0.8353068037003214, + "grad_norm": 25145.380859375, + "learning_rate": 1.9790028244070946e-05, + "loss": 0.4119, + "step": 161900 + }, + { + "epoch": 0.8355647736829342, + "grad_norm": 24318.28125, + "learning_rate": 1.976092787780184e-05, + "loss": 0.4015, + "step": 161950 + }, + { + "epoch": 0.8358227436655471, + "grad_norm": 22675.845703125, + "learning_rate": 1.973184365256388e-05, + "loss": 0.4107, + "step": 162000 + }, + { + "epoch": 0.83608071364816, + "grad_norm": 23785.451171875, + "learning_rate": 1.9702775583881656e-05, + "loss": 0.408, + "step": 162050 + }, + { + "epoch": 0.8363386836307727, + "grad_norm": 22790.47265625, + "learning_rate": 1.9673723687271174e-05, + "loss": 0.406, + "step": 162100 + }, + { + "epoch": 0.8365966536133855, + "grad_norm": 24380.498046875, + "learning_rate": 1.9644687978239746e-05, + "loss": 0.4105, + "step": 162150 + }, + { + "epoch": 0.8368546235959984, + "grad_norm": 23812.814453125, + "learning_rate": 1.9615668472286085e-05, + "loss": 0.4032, + "step": 162200 + }, + { + "epoch": 0.8371125935786112, + "grad_norm": 22820.734375, + "learning_rate": 1.9586665184900232e-05, + "loss": 0.4072, + "step": 162250 + }, + { + "epoch": 0.837370563561224, + "grad_norm": 22347.779296875, + "learning_rate": 1.955767813156359e-05, + "loss": 0.4045, + "step": 162300 + }, + { + "epoch": 0.8376285335438368, + "grad_norm": 24328.546875, + "learning_rate": 1.9528707327748852e-05, + "loss": 0.4097, + "step": 162350 + }, + { + "epoch": 0.8378865035264497, + "grad_norm": 23850.13671875, + "learning_rate": 1.9499752788920146e-05, + "loss": 0.4085, + "step": 162400 + }, + { + "epoch": 0.8381444735090625, + "grad_norm": 24967.3203125, + "learning_rate": 1.9470814530532756e-05, + "loss": 0.4056, + "step": 162450 + }, + { + "epoch": 0.8384024434916753, + "grad_norm": 23740.197265625, + "learning_rate": 1.9441892568033426e-05, + "loss": 0.4112, + "step": 162500 + }, + { + "epoch": 0.8386604134742881, + "grad_norm": 26039.447265625, + "learning_rate": 1.941298691686012e-05, + "loss": 0.405, + "step": 162550 + }, + { + "epoch": 0.838918383456901, + "grad_norm": 22781.23828125, + "learning_rate": 1.9384097592442102e-05, + "loss": 0.4043, + "step": 162600 + }, + { + "epoch": 0.8391763534395138, + "grad_norm": 25735.17578125, + "learning_rate": 1.935522461019998e-05, + "loss": 0.4021, + "step": 162650 + }, + { + "epoch": 0.8394343234221266, + "grad_norm": 26452.810546875, + "learning_rate": 1.932636798554552e-05, + "loss": 0.4093, + "step": 162700 + }, + { + "epoch": 0.8396922934047394, + "grad_norm": 24199.3515625, + "learning_rate": 1.929752773388189e-05, + "loss": 0.4003, + "step": 162750 + }, + { + "epoch": 0.8399502633873522, + "grad_norm": 27610.30859375, + "learning_rate": 1.9268703870603434e-05, + "loss": 0.4035, + "step": 162800 + }, + { + "epoch": 0.8402082333699651, + "grad_norm": 23799.3359375, + "learning_rate": 1.9239896411095777e-05, + "loss": 0.4072, + "step": 162850 + }, + { + "epoch": 0.8404662033525779, + "grad_norm": 24182.162109375, + "learning_rate": 1.9211105370735784e-05, + "loss": 0.4056, + "step": 162900 + }, + { + "epoch": 0.8407241733351907, + "grad_norm": 21251.0625, + "learning_rate": 1.918233076489153e-05, + "loss": 0.4073, + "step": 162950 + }, + { + "epoch": 0.8409821433178035, + "grad_norm": 22723.09765625, + "learning_rate": 1.9153572608922383e-05, + "loss": 0.4041, + "step": 163000 + }, + { + "epoch": 0.8412401133004164, + "grad_norm": 23557.125, + "learning_rate": 1.9124830918178876e-05, + "loss": 0.4064, + "step": 163050 + }, + { + "epoch": 0.8414980832830292, + "grad_norm": 24273.71484375, + "learning_rate": 1.9096105708002754e-05, + "loss": 0.4072, + "step": 163100 + }, + { + "epoch": 0.841756053265642, + "grad_norm": 24078.10546875, + "learning_rate": 1.9067396993726994e-05, + "loss": 0.409, + "step": 163150 + }, + { + "epoch": 0.8420140232482548, + "grad_norm": 23370.31640625, + "learning_rate": 1.9038704790675738e-05, + "loss": 0.4082, + "step": 163200 + }, + { + "epoch": 0.8422719932308677, + "grad_norm": 23478.564453125, + "learning_rate": 1.901002911416432e-05, + "loss": 0.4082, + "step": 163250 + }, + { + "epoch": 0.8425299632134805, + "grad_norm": 22697.802734375, + "learning_rate": 1.898136997949929e-05, + "loss": 0.4107, + "step": 163300 + }, + { + "epoch": 0.8427879331960934, + "grad_norm": 25571.9765625, + "learning_rate": 1.8952727401978326e-05, + "loss": 0.3996, + "step": 163350 + }, + { + "epoch": 0.8430459031787061, + "grad_norm": 24950.283203125, + "learning_rate": 1.8924101396890264e-05, + "loss": 0.403, + "step": 163400 + }, + { + "epoch": 0.8433038731613189, + "grad_norm": 22436.380859375, + "learning_rate": 1.8895491979515162e-05, + "loss": 0.4041, + "step": 163450 + }, + { + "epoch": 0.8435618431439318, + "grad_norm": 25954.529296875, + "learning_rate": 1.8866899165124097e-05, + "loss": 0.4003, + "step": 163500 + }, + { + "epoch": 0.8438198131265446, + "grad_norm": 21477.8828125, + "learning_rate": 1.883832296897944e-05, + "loss": 0.4063, + "step": 163550 + }, + { + "epoch": 0.8440777831091574, + "grad_norm": 24669.7890625, + "learning_rate": 1.8809763406334535e-05, + "loss": 0.4049, + "step": 163600 + }, + { + "epoch": 0.8443357530917702, + "grad_norm": 27181.50390625, + "learning_rate": 1.878122049243398e-05, + "loss": 0.4007, + "step": 163650 + }, + { + "epoch": 0.8445937230743831, + "grad_norm": 25191.591796875, + "learning_rate": 1.8752694242513408e-05, + "loss": 0.4072, + "step": 163700 + }, + { + "epoch": 0.8448516930569959, + "grad_norm": 24557.42578125, + "learning_rate": 1.872418467179956e-05, + "loss": 0.4043, + "step": 163750 + }, + { + "epoch": 0.8451096630396087, + "grad_norm": 25135.6328125, + "learning_rate": 1.8695691795510335e-05, + "loss": 0.4008, + "step": 163800 + }, + { + "epoch": 0.8453676330222215, + "grad_norm": 23372.181640625, + "learning_rate": 1.8667215628854656e-05, + "loss": 0.4073, + "step": 163850 + }, + { + "epoch": 0.8456256030048344, + "grad_norm": 23332.65625, + "learning_rate": 1.8638756187032554e-05, + "loss": 0.3987, + "step": 163900 + }, + { + "epoch": 0.8458835729874472, + "grad_norm": 23423.669921875, + "learning_rate": 1.861031348523512e-05, + "loss": 0.4066, + "step": 163950 + }, + { + "epoch": 0.8461415429700601, + "grad_norm": 25873.208984375, + "learning_rate": 1.858188753864452e-05, + "loss": 0.4015, + "step": 164000 + }, + { + "epoch": 0.8463995129526728, + "grad_norm": 24766.4140625, + "learning_rate": 1.8553478362433964e-05, + "loss": 0.4076, + "step": 164050 + }, + { + "epoch": 0.8466574829352856, + "grad_norm": 25044.45703125, + "learning_rate": 1.852508597176776e-05, + "loss": 0.3972, + "step": 164100 + }, + { + "epoch": 0.8469154529178985, + "grad_norm": 23699.478515625, + "learning_rate": 1.8496710381801157e-05, + "loss": 0.3953, + "step": 164150 + }, + { + "epoch": 0.8471734229005113, + "grad_norm": 22853.53125, + "learning_rate": 1.8468351607680546e-05, + "loss": 0.4095, + "step": 164200 + }, + { + "epoch": 0.8474313928831241, + "grad_norm": 21374.96875, + "learning_rate": 1.8440009664543267e-05, + "loss": 0.4092, + "step": 164250 + }, + { + "epoch": 0.8476893628657369, + "grad_norm": 22454.515625, + "learning_rate": 1.8411684567517694e-05, + "loss": 0.4005, + "step": 164300 + }, + { + "epoch": 0.8479473328483498, + "grad_norm": 23134.24609375, + "learning_rate": 1.8383376331723258e-05, + "loss": 0.4041, + "step": 164350 + }, + { + "epoch": 0.8482053028309626, + "grad_norm": 23000.69921875, + "learning_rate": 1.835508497227028e-05, + "loss": 0.4056, + "step": 164400 + }, + { + "epoch": 0.8484632728135754, + "grad_norm": 23213.333984375, + "learning_rate": 1.8326810504260194e-05, + "loss": 0.4076, + "step": 164450 + }, + { + "epoch": 0.8487212427961882, + "grad_norm": 24883.953125, + "learning_rate": 1.8298552942785353e-05, + "loss": 0.4023, + "step": 164500 + }, + { + "epoch": 0.8489792127788011, + "grad_norm": 23075.015625, + "learning_rate": 1.827031230292908e-05, + "loss": 0.4095, + "step": 164550 + }, + { + "epoch": 0.8492371827614139, + "grad_norm": 24055.23828125, + "learning_rate": 1.824208859976569e-05, + "loss": 0.4034, + "step": 164600 + }, + { + "epoch": 0.8494951527440268, + "grad_norm": 24572.919921875, + "learning_rate": 1.8213881848360438e-05, + "loss": 0.4106, + "step": 164650 + }, + { + "epoch": 0.8497531227266395, + "grad_norm": 26111.40234375, + "learning_rate": 1.8185692063769566e-05, + "loss": 0.4051, + "step": 164700 + }, + { + "epoch": 0.8500110927092523, + "grad_norm": 22763.25, + "learning_rate": 1.8157519261040222e-05, + "loss": 0.4019, + "step": 164750 + }, + { + "epoch": 0.8502690626918652, + "grad_norm": 22230.16796875, + "learning_rate": 1.8129363455210503e-05, + "loss": 0.4085, + "step": 164800 + }, + { + "epoch": 0.850527032674478, + "grad_norm": 24729.40234375, + "learning_rate": 1.8101224661309435e-05, + "loss": 0.4042, + "step": 164850 + }, + { + "epoch": 0.8507850026570908, + "grad_norm": 23329.431640625, + "learning_rate": 1.807310289435696e-05, + "loss": 0.405, + "step": 164900 + }, + { + "epoch": 0.8510429726397036, + "grad_norm": 24267.970703125, + "learning_rate": 1.8044998169363908e-05, + "loss": 0.406, + "step": 164950 + }, + { + "epoch": 0.8513009426223165, + "grad_norm": 23587.689453125, + "learning_rate": 1.80169105013321e-05, + "loss": 0.4069, + "step": 165000 + }, + { + "epoch": 0.8513009426223165, + "eval_loss": 0.3912332057952881, + "eval_runtime": 3189.1337, + "eval_samples_per_second": 972.402, + "eval_steps_per_second": 1.899, + "step": 165000 + }, + { + "epoch": 0.8515589126049293, + "grad_norm": 23356.634765625, + "learning_rate": 1.798883990525412e-05, + "loss": 0.4022, + "step": 165050 + }, + { + "epoch": 0.8518168825875421, + "grad_norm": 23850.75, + "learning_rate": 1.7960786396113542e-05, + "loss": 0.3984, + "step": 165100 + }, + { + "epoch": 0.8520748525701549, + "grad_norm": 23898.03125, + "learning_rate": 1.7932749988884795e-05, + "loss": 0.4035, + "step": 165150 + }, + { + "epoch": 0.8523328225527678, + "grad_norm": 23517.4453125, + "learning_rate": 1.790473069853314e-05, + "loss": 0.4061, + "step": 165200 + }, + { + "epoch": 0.8525907925353806, + "grad_norm": 24264.568359375, + "learning_rate": 1.787672854001478e-05, + "loss": 0.4076, + "step": 165250 + }, + { + "epoch": 0.8528487625179934, + "grad_norm": 23741.220703125, + "learning_rate": 1.7848743528276663e-05, + "loss": 0.4063, + "step": 165300 + }, + { + "epoch": 0.8531067325006062, + "grad_norm": 25368.697265625, + "learning_rate": 1.782077567825669e-05, + "loss": 0.4027, + "step": 165350 + }, + { + "epoch": 0.853364702483219, + "grad_norm": 21610.12890625, + "learning_rate": 1.779282500488355e-05, + "loss": 0.4067, + "step": 165400 + }, + { + "epoch": 0.8536226724658319, + "grad_norm": 26066.560546875, + "learning_rate": 1.7764891523076766e-05, + "loss": 0.4091, + "step": 165450 + }, + { + "epoch": 0.8538806424484447, + "grad_norm": 22909.5234375, + "learning_rate": 1.773697524774669e-05, + "loss": 0.4035, + "step": 165500 + }, + { + "epoch": 0.8541386124310575, + "grad_norm": 23672.54296875, + "learning_rate": 1.7709076193794478e-05, + "loss": 0.407, + "step": 165550 + }, + { + "epoch": 0.8543965824136703, + "grad_norm": 22466.203125, + "learning_rate": 1.7681194376112125e-05, + "loss": 0.4057, + "step": 165600 + }, + { + "epoch": 0.8546545523962832, + "grad_norm": 23236.4296875, + "learning_rate": 1.7653329809582404e-05, + "loss": 0.4058, + "step": 165650 + }, + { + "epoch": 0.854912522378896, + "grad_norm": 23181.5, + "learning_rate": 1.7625482509078873e-05, + "loss": 0.4007, + "step": 165700 + }, + { + "epoch": 0.8551704923615088, + "grad_norm": 20621.5, + "learning_rate": 1.7597652489465877e-05, + "loss": 0.4053, + "step": 165750 + }, + { + "epoch": 0.8554284623441216, + "grad_norm": 23911.7734375, + "learning_rate": 1.756983976559855e-05, + "loss": 0.4043, + "step": 165800 + }, + { + "epoch": 0.8556864323267345, + "grad_norm": 21440.978515625, + "learning_rate": 1.7542044352322768e-05, + "loss": 0.4076, + "step": 165850 + }, + { + "epoch": 0.8559444023093473, + "grad_norm": 22439.712890625, + "learning_rate": 1.7514266264475233e-05, + "loss": 0.3999, + "step": 165900 + }, + { + "epoch": 0.8562023722919601, + "grad_norm": 24814.876953125, + "learning_rate": 1.748650551688328e-05, + "loss": 0.405, + "step": 165950 + }, + { + "epoch": 0.8564603422745729, + "grad_norm": 21705.185546875, + "learning_rate": 1.7458762124365096e-05, + "loss": 0.4007, + "step": 166000 + }, + { + "epoch": 0.8567183122571858, + "grad_norm": 25317.05078125, + "learning_rate": 1.7431036101729604e-05, + "loss": 0.4036, + "step": 166050 + }, + { + "epoch": 0.8569762822397986, + "grad_norm": 23984.142578125, + "learning_rate": 1.7403327463776343e-05, + "loss": 0.4027, + "step": 166100 + }, + { + "epoch": 0.8572342522224115, + "grad_norm": 24149.794921875, + "learning_rate": 1.7375636225295716e-05, + "loss": 0.3986, + "step": 166150 + }, + { + "epoch": 0.8574922222050242, + "grad_norm": 20085.748046875, + "learning_rate": 1.73479624010687e-05, + "loss": 0.4032, + "step": 166200 + }, + { + "epoch": 0.857750192187637, + "grad_norm": 25550.01171875, + "learning_rate": 1.732030600586711e-05, + "loss": 0.4067, + "step": 166250 + }, + { + "epoch": 0.8580081621702499, + "grad_norm": 23439.69921875, + "learning_rate": 1.7292667054453364e-05, + "loss": 0.4058, + "step": 166300 + }, + { + "epoch": 0.8582661321528627, + "grad_norm": 24064.46484375, + "learning_rate": 1.7265045561580606e-05, + "loss": 0.406, + "step": 166350 + }, + { + "epoch": 0.8585241021354755, + "grad_norm": 27679.162109375, + "learning_rate": 1.723744154199264e-05, + "loss": 0.403, + "step": 166400 + }, + { + "epoch": 0.8587820721180883, + "grad_norm": 21371.59765625, + "learning_rate": 1.7209855010423977e-05, + "loss": 0.4103, + "step": 166450 + }, + { + "epoch": 0.8590400421007012, + "grad_norm": 24340.283203125, + "learning_rate": 1.7182285981599766e-05, + "loss": 0.4073, + "step": 166500 + }, + { + "epoch": 0.859298012083314, + "grad_norm": 22603.62109375, + "learning_rate": 1.7154734470235823e-05, + "loss": 0.4026, + "step": 166550 + }, + { + "epoch": 0.8595559820659268, + "grad_norm": 21442.248046875, + "learning_rate": 1.7127200491038607e-05, + "loss": 0.4089, + "step": 166600 + }, + { + "epoch": 0.8598139520485396, + "grad_norm": 22127.478515625, + "learning_rate": 1.7099684058705212e-05, + "loss": 0.4073, + "step": 166650 + }, + { + "epoch": 0.8600719220311525, + "grad_norm": 37660.0859375, + "learning_rate": 1.707218518792342e-05, + "loss": 0.404, + "step": 166700 + }, + { + "epoch": 0.8603298920137653, + "grad_norm": 23772.982421875, + "learning_rate": 1.704470389337153e-05, + "loss": 0.4004, + "step": 166750 + }, + { + "epoch": 0.8605878619963782, + "grad_norm": 24957.23828125, + "learning_rate": 1.7017240189718575e-05, + "loss": 0.4025, + "step": 166800 + }, + { + "epoch": 0.8608458319789909, + "grad_norm": 25014.044921875, + "learning_rate": 1.6989794091624138e-05, + "loss": 0.4037, + "step": 166850 + }, + { + "epoch": 0.8611038019616037, + "grad_norm": 23370.162109375, + "learning_rate": 1.696236561373839e-05, + "loss": 0.4043, + "step": 166900 + }, + { + "epoch": 0.8613617719442166, + "grad_norm": 25212.830078125, + "learning_rate": 1.693495477070217e-05, + "loss": 0.3997, + "step": 166950 + }, + { + "epoch": 0.8616197419268294, + "grad_norm": 22828.701171875, + "learning_rate": 1.69075615771468e-05, + "loss": 0.4063, + "step": 167000 + }, + { + "epoch": 0.8618777119094422, + "grad_norm": 23862.4375, + "learning_rate": 1.6880186047694274e-05, + "loss": 0.4044, + "step": 167050 + }, + { + "epoch": 0.862135681892055, + "grad_norm": 25248.44140625, + "learning_rate": 1.685282819695711e-05, + "loss": 0.4072, + "step": 167100 + }, + { + "epoch": 0.8623936518746679, + "grad_norm": 24765.2421875, + "learning_rate": 1.68254880395384e-05, + "loss": 0.4055, + "step": 167150 + }, + { + "epoch": 0.8626516218572807, + "grad_norm": 22687.32421875, + "learning_rate": 1.6798165590031783e-05, + "loss": 0.4076, + "step": 167200 + }, + { + "epoch": 0.8629095918398935, + "grad_norm": 28427.16015625, + "learning_rate": 1.677086086302146e-05, + "loss": 0.3985, + "step": 167250 + }, + { + "epoch": 0.8631675618225063, + "grad_norm": 24114.146484375, + "learning_rate": 1.6743573873082147e-05, + "loss": 0.3993, + "step": 167300 + }, + { + "epoch": 0.8634255318051192, + "grad_norm": 22007.857421875, + "learning_rate": 1.6716304634779144e-05, + "loss": 0.4054, + "step": 167350 + }, + { + "epoch": 0.863683501787732, + "grad_norm": 24888.619140625, + "learning_rate": 1.6689053162668226e-05, + "loss": 0.3983, + "step": 167400 + }, + { + "epoch": 0.8639414717703447, + "grad_norm": 23306.1640625, + "learning_rate": 1.6661819471295704e-05, + "loss": 0.3985, + "step": 167450 + }, + { + "epoch": 0.8641994417529576, + "grad_norm": 25983.62109375, + "learning_rate": 1.6634603575198387e-05, + "loss": 0.4033, + "step": 167500 + }, + { + "epoch": 0.8644574117355704, + "grad_norm": 21851.826171875, + "learning_rate": 1.6607405488903582e-05, + "loss": 0.4067, + "step": 167550 + }, + { + "epoch": 0.8647153817181833, + "grad_norm": 23041.548828125, + "learning_rate": 1.6580225226929152e-05, + "loss": 0.4054, + "step": 167600 + }, + { + "epoch": 0.8649733517007961, + "grad_norm": 24893.72265625, + "learning_rate": 1.655306280378333e-05, + "loss": 0.4081, + "step": 167650 + }, + { + "epoch": 0.8652313216834089, + "grad_norm": 24462.869140625, + "learning_rate": 1.6525918233964933e-05, + "loss": 0.4093, + "step": 167700 + }, + { + "epoch": 0.8654892916660217, + "grad_norm": 20188.037109375, + "learning_rate": 1.6498791531963197e-05, + "loss": 0.3986, + "step": 167750 + }, + { + "epoch": 0.8657472616486346, + "grad_norm": 24806.51171875, + "learning_rate": 1.6471682712257812e-05, + "loss": 0.3988, + "step": 167800 + }, + { + "epoch": 0.8660052316312474, + "grad_norm": 21647.11328125, + "learning_rate": 1.6444591789318992e-05, + "loss": 0.4083, + "step": 167850 + }, + { + "epoch": 0.8662632016138602, + "grad_norm": 22894.3515625, + "learning_rate": 1.6417518777607277e-05, + "loss": 0.4004, + "step": 167900 + }, + { + "epoch": 0.866521171596473, + "grad_norm": 23173.974609375, + "learning_rate": 1.6390463691573765e-05, + "loss": 0.409, + "step": 167950 + }, + { + "epoch": 0.8667791415790859, + "grad_norm": 24268.001953125, + "learning_rate": 1.6363426545659927e-05, + "loss": 0.4021, + "step": 168000 + }, + { + "epoch": 0.8670371115616987, + "grad_norm": 23466.482421875, + "learning_rate": 1.6336407354297667e-05, + "loss": 0.4067, + "step": 168050 + }, + { + "epoch": 0.8672950815443115, + "grad_norm": 22965.560546875, + "learning_rate": 1.6309406131909298e-05, + "loss": 0.4127, + "step": 168100 + }, + { + "epoch": 0.8675530515269243, + "grad_norm": 22818.5859375, + "learning_rate": 1.6282422892907563e-05, + "loss": 0.4107, + "step": 168150 + }, + { + "epoch": 0.8678110215095372, + "grad_norm": 23358.80859375, + "learning_rate": 1.6255457651695565e-05, + "loss": 0.3985, + "step": 168200 + }, + { + "epoch": 0.86806899149215, + "grad_norm": 24952.044921875, + "learning_rate": 1.6228510422666865e-05, + "loss": 0.4021, + "step": 168250 + }, + { + "epoch": 0.8683269614747628, + "grad_norm": 23554.359375, + "learning_rate": 1.6201581220205353e-05, + "loss": 0.4091, + "step": 168300 + }, + { + "epoch": 0.8685849314573756, + "grad_norm": 23862.92578125, + "learning_rate": 1.6174670058685316e-05, + "loss": 0.4009, + "step": 168350 + }, + { + "epoch": 0.8688429014399884, + "grad_norm": 23549.693359375, + "learning_rate": 1.6147776952471415e-05, + "loss": 0.4062, + "step": 168400 + }, + { + "epoch": 0.8691008714226013, + "grad_norm": 25237.26953125, + "learning_rate": 1.612090191591865e-05, + "loss": 0.4009, + "step": 168450 + }, + { + "epoch": 0.8693588414052141, + "grad_norm": 24368.298828125, + "learning_rate": 1.6094044963372444e-05, + "loss": 0.4052, + "step": 168500 + }, + { + "epoch": 0.8696168113878269, + "grad_norm": 24438.0, + "learning_rate": 1.6067206109168453e-05, + "loss": 0.4077, + "step": 168550 + }, + { + "epoch": 0.8698747813704397, + "grad_norm": 30002.744140625, + "learning_rate": 1.6040385367632786e-05, + "loss": 0.4029, + "step": 168600 + }, + { + "epoch": 0.8701327513530526, + "grad_norm": 24591.333984375, + "learning_rate": 1.6013582753081824e-05, + "loss": 0.4019, + "step": 168650 + }, + { + "epoch": 0.8703907213356654, + "grad_norm": 24005.166015625, + "learning_rate": 1.5986798279822263e-05, + "loss": 0.4046, + "step": 168700 + }, + { + "epoch": 0.8706486913182782, + "grad_norm": 22198.482421875, + "learning_rate": 1.5960031962151167e-05, + "loss": 0.4003, + "step": 168750 + }, + { + "epoch": 0.870906661300891, + "grad_norm": 23392.919921875, + "learning_rate": 1.5933283814355872e-05, + "loss": 0.4039, + "step": 168800 + }, + { + "epoch": 0.8711646312835039, + "grad_norm": 26185.88671875, + "learning_rate": 1.5906553850714003e-05, + "loss": 0.4044, + "step": 168850 + }, + { + "epoch": 0.8714226012661167, + "grad_norm": 34066.59765625, + "learning_rate": 1.5879842085493514e-05, + "loss": 0.4068, + "step": 168900 + }, + { + "epoch": 0.8716805712487296, + "grad_norm": 21913.802734375, + "learning_rate": 1.5853148532952616e-05, + "loss": 0.4083, + "step": 168950 + }, + { + "epoch": 0.8719385412313423, + "grad_norm": 22491.25390625, + "learning_rate": 1.5826473207339802e-05, + "loss": 0.4037, + "step": 169000 + }, + { + "epoch": 0.8721965112139551, + "grad_norm": 23891.447265625, + "learning_rate": 1.579981612289389e-05, + "loss": 0.4033, + "step": 169050 + }, + { + "epoch": 0.872454481196568, + "grad_norm": 24374.109375, + "learning_rate": 1.5773177293843855e-05, + "loss": 0.41, + "step": 169100 + }, + { + "epoch": 0.8727124511791808, + "grad_norm": 24323.197265625, + "learning_rate": 1.574655673440903e-05, + "loss": 0.3999, + "step": 169150 + }, + { + "epoch": 0.8729704211617936, + "grad_norm": 22040.76171875, + "learning_rate": 1.5719954458798943e-05, + "loss": 0.3997, + "step": 169200 + }, + { + "epoch": 0.8732283911444064, + "grad_norm": 32067.173828125, + "learning_rate": 1.5693370481213355e-05, + "loss": 0.4028, + "step": 169250 + }, + { + "epoch": 0.8734863611270193, + "grad_norm": 27840.97265625, + "learning_rate": 1.5666804815842322e-05, + "loss": 0.4082, + "step": 169300 + }, + { + "epoch": 0.8737443311096321, + "grad_norm": 23976.154296875, + "learning_rate": 1.5640257476866033e-05, + "loss": 0.4075, + "step": 169350 + }, + { + "epoch": 0.8740023010922449, + "grad_norm": 22856.724609375, + "learning_rate": 1.5613728478454976e-05, + "loss": 0.4033, + "step": 169400 + }, + { + "epoch": 0.8742602710748577, + "grad_norm": 22639.69140625, + "learning_rate": 1.5587217834769803e-05, + "loss": 0.4052, + "step": 169450 + }, + { + "epoch": 0.8745182410574706, + "grad_norm": 24272.626953125, + "learning_rate": 1.5560725559961386e-05, + "loss": 0.4029, + "step": 169500 + }, + { + "epoch": 0.8747762110400834, + "grad_norm": 23789.333984375, + "learning_rate": 1.553425166817079e-05, + "loss": 0.4078, + "step": 169550 + }, + { + "epoch": 0.8750341810226961, + "grad_norm": 23287.294921875, + "learning_rate": 1.5507796173529248e-05, + "loss": 0.408, + "step": 169600 + }, + { + "epoch": 0.875292151005309, + "grad_norm": 22272.13671875, + "learning_rate": 1.548135909015822e-05, + "loss": 0.4017, + "step": 169650 + }, + { + "epoch": 0.8755501209879218, + "grad_norm": 24645.40234375, + "learning_rate": 1.5454940432169297e-05, + "loss": 0.4001, + "step": 169700 + }, + { + "epoch": 0.8758080909705347, + "grad_norm": 26364.072265625, + "learning_rate": 1.5428540213664243e-05, + "loss": 0.411, + "step": 169750 + }, + { + "epoch": 0.8760660609531475, + "grad_norm": 24535.76171875, + "learning_rate": 1.5402158448734987e-05, + "loss": 0.4042, + "step": 169800 + }, + { + "epoch": 0.8763240309357603, + "grad_norm": 23294.94140625, + "learning_rate": 1.53757951514636e-05, + "loss": 0.4083, + "step": 169850 + }, + { + "epoch": 0.8765820009183731, + "grad_norm": 23390.046875, + "learning_rate": 1.5349450335922295e-05, + "loss": 0.399, + "step": 169900 + }, + { + "epoch": 0.876839970900986, + "grad_norm": 23079.41796875, + "learning_rate": 1.5323124016173455e-05, + "loss": 0.4078, + "step": 169950 + }, + { + "epoch": 0.8770979408835988, + "grad_norm": 24190.23046875, + "learning_rate": 1.529681620626951e-05, + "loss": 0.4013, + "step": 170000 + }, + { + "epoch": 0.8770979408835988, + "eval_loss": 0.39030978083610535, + "eval_runtime": 3197.421, + "eval_samples_per_second": 969.882, + "eval_steps_per_second": 1.894, + "step": 170000 + }, + { + "epoch": 0.8773559108662116, + "grad_norm": 24830.658203125, + "learning_rate": 1.5270526920253098e-05, + "loss": 0.4053, + "step": 170050 + }, + { + "epoch": 0.8776138808488244, + "grad_norm": 21314.533203125, + "learning_rate": 1.5244256172156923e-05, + "loss": 0.4067, + "step": 170100 + }, + { + "epoch": 0.8778718508314373, + "grad_norm": 23271.314453125, + "learning_rate": 1.521800397600378e-05, + "loss": 0.4024, + "step": 170150 + }, + { + "epoch": 0.8781298208140501, + "grad_norm": 20112.265625, + "learning_rate": 1.5191770345806632e-05, + "loss": 0.4001, + "step": 170200 + }, + { + "epoch": 0.8783877907966628, + "grad_norm": 23957.087890625, + "learning_rate": 1.5165555295568418e-05, + "loss": 0.406, + "step": 170250 + }, + { + "epoch": 0.8786457607792757, + "grad_norm": 23699.181640625, + "learning_rate": 1.5139358839282275e-05, + "loss": 0.4005, + "step": 170300 + }, + { + "epoch": 0.8789037307618885, + "grad_norm": 23276.4453125, + "learning_rate": 1.5113180990931353e-05, + "loss": 0.4057, + "step": 170350 + }, + { + "epoch": 0.8791617007445014, + "grad_norm": 27051.26171875, + "learning_rate": 1.5087021764488867e-05, + "loss": 0.4037, + "step": 170400 + }, + { + "epoch": 0.8794196707271142, + "grad_norm": 24315.11328125, + "learning_rate": 1.5060881173918112e-05, + "loss": 0.4004, + "step": 170450 + }, + { + "epoch": 0.879677640709727, + "grad_norm": 22589.85546875, + "learning_rate": 1.5034759233172419e-05, + "loss": 0.402, + "step": 170500 + }, + { + "epoch": 0.8799356106923398, + "grad_norm": 24601.666015625, + "learning_rate": 1.5008655956195195e-05, + "loss": 0.4083, + "step": 170550 + }, + { + "epoch": 0.8801935806749527, + "grad_norm": 23203.884765625, + "learning_rate": 1.4982571356919862e-05, + "loss": 0.3971, + "step": 170600 + }, + { + "epoch": 0.8804515506575655, + "grad_norm": 28701.162109375, + "learning_rate": 1.4956505449269858e-05, + "loss": 0.3989, + "step": 170650 + }, + { + "epoch": 0.8807095206401783, + "grad_norm": 23548.541015625, + "learning_rate": 1.4930458247158668e-05, + "loss": 0.4014, + "step": 170700 + }, + { + "epoch": 0.8809674906227911, + "grad_norm": 26836.626953125, + "learning_rate": 1.4904429764489792e-05, + "loss": 0.3964, + "step": 170750 + }, + { + "epoch": 0.881225460605404, + "grad_norm": 23989.537109375, + "learning_rate": 1.4878420015156697e-05, + "loss": 0.4062, + "step": 170800 + }, + { + "epoch": 0.8814834305880168, + "grad_norm": 22008.498046875, + "learning_rate": 1.4852429013042945e-05, + "loss": 0.4034, + "step": 170850 + }, + { + "epoch": 0.8817414005706296, + "grad_norm": 22564.548828125, + "learning_rate": 1.4826456772021957e-05, + "loss": 0.3953, + "step": 170900 + }, + { + "epoch": 0.8819993705532424, + "grad_norm": 20611.005859375, + "learning_rate": 1.4800503305957264e-05, + "loss": 0.3993, + "step": 170950 + }, + { + "epoch": 0.8822573405358553, + "grad_norm": 23731.072265625, + "learning_rate": 1.4774568628702312e-05, + "loss": 0.4008, + "step": 171000 + }, + { + "epoch": 0.8825153105184681, + "grad_norm": 23515.265625, + "learning_rate": 1.4748652754100506e-05, + "loss": 0.4093, + "step": 171050 + }, + { + "epoch": 0.882773280501081, + "grad_norm": 20889.193359375, + "learning_rate": 1.4722755695985291e-05, + "loss": 0.4036, + "step": 171100 + }, + { + "epoch": 0.8830312504836937, + "grad_norm": 23561.208984375, + "learning_rate": 1.4696877468179954e-05, + "loss": 0.4009, + "step": 171150 + }, + { + "epoch": 0.8832892204663065, + "grad_norm": 29216.3046875, + "learning_rate": 1.4671018084497828e-05, + "loss": 0.4087, + "step": 171200 + }, + { + "epoch": 0.8835471904489194, + "grad_norm": 24697.615234375, + "learning_rate": 1.4645177558742147e-05, + "loss": 0.3976, + "step": 171250 + }, + { + "epoch": 0.8838051604315322, + "grad_norm": 30338.123046875, + "learning_rate": 1.4619355904706062e-05, + "loss": 0.4046, + "step": 171300 + }, + { + "epoch": 0.884063130414145, + "grad_norm": 22565.310546875, + "learning_rate": 1.4593553136172705e-05, + "loss": 0.4011, + "step": 171350 + }, + { + "epoch": 0.8843211003967578, + "grad_norm": 23498.0, + "learning_rate": 1.4567769266915077e-05, + "loss": 0.4071, + "step": 171400 + }, + { + "epoch": 0.8845790703793707, + "grad_norm": 23772.279296875, + "learning_rate": 1.4542004310696112e-05, + "loss": 0.4048, + "step": 171450 + }, + { + "epoch": 0.8848370403619835, + "grad_norm": 22418.015625, + "learning_rate": 1.4516258281268636e-05, + "loss": 0.4009, + "step": 171500 + }, + { + "epoch": 0.8850950103445963, + "grad_norm": 25706.166015625, + "learning_rate": 1.4490531192375395e-05, + "loss": 0.4017, + "step": 171550 + }, + { + "epoch": 0.8853529803272091, + "grad_norm": 23563.37890625, + "learning_rate": 1.4464823057748982e-05, + "loss": 0.4056, + "step": 171600 + }, + { + "epoch": 0.885610950309822, + "grad_norm": 23104.65234375, + "learning_rate": 1.4439133891111956e-05, + "loss": 0.4014, + "step": 171650 + }, + { + "epoch": 0.8858689202924348, + "grad_norm": 22858.935546875, + "learning_rate": 1.4413463706176627e-05, + "loss": 0.4047, + "step": 171700 + }, + { + "epoch": 0.8861268902750475, + "grad_norm": 23197.859375, + "learning_rate": 1.4387812516645299e-05, + "loss": 0.4032, + "step": 171750 + }, + { + "epoch": 0.8863848602576604, + "grad_norm": 22323.4609375, + "learning_rate": 1.4362180336210057e-05, + "loss": 0.4018, + "step": 171800 + }, + { + "epoch": 0.8866428302402732, + "grad_norm": 23677.431640625, + "learning_rate": 1.433656717855285e-05, + "loss": 0.4057, + "step": 171850 + }, + { + "epoch": 0.8869008002228861, + "grad_norm": 22975.283203125, + "learning_rate": 1.4310973057345538e-05, + "loss": 0.4077, + "step": 171900 + }, + { + "epoch": 0.8871587702054989, + "grad_norm": 23338.005859375, + "learning_rate": 1.4285397986249694e-05, + "loss": 0.4037, + "step": 171950 + }, + { + "epoch": 0.8874167401881117, + "grad_norm": 22469.08203125, + "learning_rate": 1.4259841978916849e-05, + "loss": 0.4025, + "step": 172000 + }, + { + "epoch": 0.8876747101707245, + "grad_norm": 23508.064453125, + "learning_rate": 1.4234305048988288e-05, + "loss": 0.3979, + "step": 172050 + }, + { + "epoch": 0.8879326801533374, + "grad_norm": 25113.62890625, + "learning_rate": 1.4208787210095126e-05, + "loss": 0.3988, + "step": 172100 + }, + { + "epoch": 0.8881906501359502, + "grad_norm": 23230.75, + "learning_rate": 1.4183288475858298e-05, + "loss": 0.4029, + "step": 172150 + }, + { + "epoch": 0.888448620118563, + "grad_norm": 22058.306640625, + "learning_rate": 1.4157808859888516e-05, + "loss": 0.4082, + "step": 172200 + }, + { + "epoch": 0.8887065901011758, + "grad_norm": 23375.91015625, + "learning_rate": 1.4132348375786336e-05, + "loss": 0.407, + "step": 172250 + }, + { + "epoch": 0.8889645600837887, + "grad_norm": 21199.943359375, + "learning_rate": 1.4106907037142059e-05, + "loss": 0.4039, + "step": 172300 + }, + { + "epoch": 0.8892225300664015, + "grad_norm": 22754.287109375, + "learning_rate": 1.4081484857535777e-05, + "loss": 0.4, + "step": 172350 + }, + { + "epoch": 0.8894805000490142, + "grad_norm": 23116.21484375, + "learning_rate": 1.405608185053735e-05, + "loss": 0.4026, + "step": 172400 + }, + { + "epoch": 0.8897384700316271, + "grad_norm": 22281.65625, + "learning_rate": 1.4030698029706423e-05, + "loss": 0.3992, + "step": 172450 + }, + { + "epoch": 0.8899964400142399, + "grad_norm": 22979.447265625, + "learning_rate": 1.400533340859237e-05, + "loss": 0.4027, + "step": 172500 + }, + { + "epoch": 0.8902544099968528, + "grad_norm": 25733.873046875, + "learning_rate": 1.3979988000734373e-05, + "loss": 0.4092, + "step": 172550 + }, + { + "epoch": 0.8905123799794656, + "grad_norm": 23825.38671875, + "learning_rate": 1.395466181966127e-05, + "loss": 0.3997, + "step": 172600 + }, + { + "epoch": 0.8907703499620784, + "grad_norm": 27504.0703125, + "learning_rate": 1.3929354878891715e-05, + "loss": 0.403, + "step": 172650 + }, + { + "epoch": 0.8910283199446912, + "grad_norm": 28201.208984375, + "learning_rate": 1.3904067191934067e-05, + "loss": 0.4029, + "step": 172700 + }, + { + "epoch": 0.8912862899273041, + "grad_norm": 24115.69140625, + "learning_rate": 1.3878798772286377e-05, + "loss": 0.3979, + "step": 172750 + }, + { + "epoch": 0.8915442599099169, + "grad_norm": 20489.552734375, + "learning_rate": 1.3853549633436491e-05, + "loss": 0.4001, + "step": 172800 + }, + { + "epoch": 0.8918022298925297, + "grad_norm": 23580.583984375, + "learning_rate": 1.3828319788861838e-05, + "loss": 0.3983, + "step": 172850 + }, + { + "epoch": 0.8920601998751425, + "grad_norm": 24172.771484375, + "learning_rate": 1.3803109252029678e-05, + "loss": 0.4081, + "step": 172900 + }, + { + "epoch": 0.8923181698577554, + "grad_norm": 26543.375, + "learning_rate": 1.3777918036396887e-05, + "loss": 0.4015, + "step": 172950 + }, + { + "epoch": 0.8925761398403682, + "grad_norm": 27849.654296875, + "learning_rate": 1.3752746155410046e-05, + "loss": 0.4045, + "step": 173000 + }, + { + "epoch": 0.892834109822981, + "grad_norm": 25752.724609375, + "learning_rate": 1.3727593622505424e-05, + "loss": 0.4022, + "step": 173050 + }, + { + "epoch": 0.8930920798055938, + "grad_norm": 22836.892578125, + "learning_rate": 1.3702460451108935e-05, + "loss": 0.4015, + "step": 173100 + }, + { + "epoch": 0.8933500497882066, + "grad_norm": 26556.62890625, + "learning_rate": 1.3677346654636208e-05, + "loss": 0.4017, + "step": 173150 + }, + { + "epoch": 0.8936080197708195, + "grad_norm": 24310.390625, + "learning_rate": 1.3652252246492492e-05, + "loss": 0.4015, + "step": 173200 + }, + { + "epoch": 0.8938659897534323, + "grad_norm": 23713.0859375, + "learning_rate": 1.3627177240072698e-05, + "loss": 0.4024, + "step": 173250 + }, + { + "epoch": 0.8941239597360451, + "grad_norm": 21189.57421875, + "learning_rate": 1.3602121648761373e-05, + "loss": 0.4012, + "step": 173300 + }, + { + "epoch": 0.8943819297186579, + "grad_norm": 24229.1484375, + "learning_rate": 1.3577085485932705e-05, + "loss": 0.4105, + "step": 173350 + }, + { + "epoch": 0.8946398997012708, + "grad_norm": 23998.22265625, + "learning_rate": 1.3552068764950504e-05, + "loss": 0.4004, + "step": 173400 + }, + { + "epoch": 0.8948978696838836, + "grad_norm": 24751.1171875, + "learning_rate": 1.3527071499168253e-05, + "loss": 0.4024, + "step": 173450 + }, + { + "epoch": 0.8951558396664964, + "grad_norm": 24872.029296875, + "learning_rate": 1.3502093701928948e-05, + "loss": 0.406, + "step": 173500 + }, + { + "epoch": 0.8954138096491092, + "grad_norm": 23180.771484375, + "learning_rate": 1.3477135386565297e-05, + "loss": 0.4041, + "step": 173550 + }, + { + "epoch": 0.8956717796317221, + "grad_norm": 23679.1484375, + "learning_rate": 1.3452196566399555e-05, + "loss": 0.4095, + "step": 173600 + }, + { + "epoch": 0.8959297496143349, + "grad_norm": 26730.537109375, + "learning_rate": 1.3427277254743565e-05, + "loss": 0.4058, + "step": 173650 + }, + { + "epoch": 0.8961877195969477, + "grad_norm": 23320.666015625, + "learning_rate": 1.3402377464898813e-05, + "loss": 0.4038, + "step": 173700 + }, + { + "epoch": 0.8964456895795605, + "grad_norm": 22802.87890625, + "learning_rate": 1.3377497210156276e-05, + "loss": 0.3977, + "step": 173750 + }, + { + "epoch": 0.8967036595621733, + "grad_norm": 21257.22265625, + "learning_rate": 1.3352636503796584e-05, + "loss": 0.4074, + "step": 173800 + }, + { + "epoch": 0.8969616295447862, + "grad_norm": 23935.412109375, + "learning_rate": 1.332779535908989e-05, + "loss": 0.4021, + "step": 173850 + }, + { + "epoch": 0.8972195995273989, + "grad_norm": 21819.267578125, + "learning_rate": 1.3302973789295925e-05, + "loss": 0.3992, + "step": 173900 + }, + { + "epoch": 0.8974775695100118, + "grad_norm": 23360.71875, + "learning_rate": 1.327817180766393e-05, + "loss": 0.4051, + "step": 173950 + }, + { + "epoch": 0.8977355394926246, + "grad_norm": 24474.685546875, + "learning_rate": 1.3253389427432772e-05, + "loss": 0.4046, + "step": 174000 + }, + { + "epoch": 0.8979935094752375, + "grad_norm": 29715.3359375, + "learning_rate": 1.3228626661830779e-05, + "loss": 0.4037, + "step": 174050 + }, + { + "epoch": 0.8982514794578503, + "grad_norm": 23241.20703125, + "learning_rate": 1.3203883524075833e-05, + "loss": 0.4003, + "step": 174100 + }, + { + "epoch": 0.8985094494404631, + "grad_norm": 26005.23828125, + "learning_rate": 1.3179160027375347e-05, + "loss": 0.3992, + "step": 174150 + }, + { + "epoch": 0.8987674194230759, + "grad_norm": 23322.212890625, + "learning_rate": 1.3154456184926234e-05, + "loss": 0.4037, + "step": 174200 + }, + { + "epoch": 0.8990253894056888, + "grad_norm": 22434.90234375, + "learning_rate": 1.3129772009914964e-05, + "loss": 0.4044, + "step": 174250 + }, + { + "epoch": 0.8992833593883016, + "grad_norm": 24753.904296875, + "learning_rate": 1.3105107515517418e-05, + "loss": 0.4034, + "step": 174300 + }, + { + "epoch": 0.8995413293709144, + "grad_norm": 23271.814453125, + "learning_rate": 1.3080462714899066e-05, + "loss": 0.3992, + "step": 174350 + }, + { + "epoch": 0.8997992993535272, + "grad_norm": 23929.7578125, + "learning_rate": 1.3055837621214811e-05, + "loss": 0.4018, + "step": 174400 + }, + { + "epoch": 0.90005726933614, + "grad_norm": 25211.7265625, + "learning_rate": 1.3031232247609037e-05, + "loss": 0.4052, + "step": 174450 + }, + { + "epoch": 0.9003152393187529, + "grad_norm": 24554.791015625, + "learning_rate": 1.300664660721566e-05, + "loss": 0.3987, + "step": 174500 + }, + { + "epoch": 0.9005732093013656, + "grad_norm": 26028.396484375, + "learning_rate": 1.2982080713157963e-05, + "loss": 0.4032, + "step": 174550 + }, + { + "epoch": 0.9008311792839785, + "grad_norm": 24228.72265625, + "learning_rate": 1.295753457854878e-05, + "loss": 0.4001, + "step": 174600 + }, + { + "epoch": 0.9010891492665913, + "grad_norm": 24043.064453125, + "learning_rate": 1.293300821649036e-05, + "loss": 0.4009, + "step": 174650 + }, + { + "epoch": 0.9013471192492042, + "grad_norm": 25628.208984375, + "learning_rate": 1.2908501640074388e-05, + "loss": 0.4058, + "step": 174700 + }, + { + "epoch": 0.901605089231817, + "grad_norm": 23927.81640625, + "learning_rate": 1.288401486238201e-05, + "loss": 0.4044, + "step": 174750 + }, + { + "epoch": 0.9018630592144298, + "grad_norm": 23615.923828125, + "learning_rate": 1.2859547896483793e-05, + "loss": 0.4042, + "step": 174800 + }, + { + "epoch": 0.9021210291970426, + "grad_norm": 24990.158203125, + "learning_rate": 1.2835100755439705e-05, + "loss": 0.4033, + "step": 174850 + }, + { + "epoch": 0.9023789991796555, + "grad_norm": 23908.240234375, + "learning_rate": 1.2810673452299194e-05, + "loss": 0.404, + "step": 174900 + }, + { + "epoch": 0.9026369691622683, + "grad_norm": 24776.828125, + "learning_rate": 1.278626600010106e-05, + "loss": 0.4017, + "step": 174950 + }, + { + "epoch": 0.9028949391448811, + "grad_norm": 23400.912109375, + "learning_rate": 1.276187841187354e-05, + "loss": 0.4007, + "step": 175000 + }, + { + "epoch": 0.9028949391448811, + "eval_loss": 0.389443963766098, + "eval_runtime": 3184.6844, + "eval_samples_per_second": 973.761, + "eval_steps_per_second": 1.902, + "step": 175000 + }, + { + "epoch": 0.9031529091274939, + "grad_norm": 23482.337890625, + "learning_rate": 1.2737510700634248e-05, + "loss": 0.4033, + "step": 175050 + }, + { + "epoch": 0.9034108791101068, + "grad_norm": 24351.23828125, + "learning_rate": 1.2713162879390183e-05, + "loss": 0.4031, + "step": 175100 + }, + { + "epoch": 0.9036688490927196, + "grad_norm": 28495.6796875, + "learning_rate": 1.2688834961137785e-05, + "loss": 0.4057, + "step": 175150 + }, + { + "epoch": 0.9039268190753323, + "grad_norm": 23276.583984375, + "learning_rate": 1.2664526958862765e-05, + "loss": 0.4036, + "step": 175200 + }, + { + "epoch": 0.9041847890579452, + "grad_norm": 22784.033203125, + "learning_rate": 1.2640238885540312e-05, + "loss": 0.4054, + "step": 175250 + }, + { + "epoch": 0.904442759040558, + "grad_norm": 22389.21484375, + "learning_rate": 1.2615970754134914e-05, + "loss": 0.4036, + "step": 175300 + }, + { + "epoch": 0.9047007290231709, + "grad_norm": 24767.59375, + "learning_rate": 1.2591722577600412e-05, + "loss": 0.4055, + "step": 175350 + }, + { + "epoch": 0.9049586990057837, + "grad_norm": 24981.552734375, + "learning_rate": 1.2567494368880056e-05, + "loss": 0.3997, + "step": 175400 + }, + { + "epoch": 0.9052166689883965, + "grad_norm": 24523.580078125, + "learning_rate": 1.254328614090634e-05, + "loss": 0.4009, + "step": 175450 + }, + { + "epoch": 0.9054746389710093, + "grad_norm": 29571.404296875, + "learning_rate": 1.251909790660119e-05, + "loss": 0.4013, + "step": 175500 + }, + { + "epoch": 0.9057326089536222, + "grad_norm": 23286.564453125, + "learning_rate": 1.24949296788758e-05, + "loss": 0.3997, + "step": 175550 + }, + { + "epoch": 0.905990578936235, + "grad_norm": 23124.205078125, + "learning_rate": 1.247078147063071e-05, + "loss": 0.4056, + "step": 175600 + }, + { + "epoch": 0.9062485489188478, + "grad_norm": 23467.775390625, + "learning_rate": 1.2446653294755755e-05, + "loss": 0.3976, + "step": 175650 + }, + { + "epoch": 0.9065065189014606, + "grad_norm": 23793.609375, + "learning_rate": 1.2422545164130096e-05, + "loss": 0.4018, + "step": 175700 + }, + { + "epoch": 0.9067644888840735, + "grad_norm": 24439.974609375, + "learning_rate": 1.2398457091622167e-05, + "loss": 0.4063, + "step": 175750 + }, + { + "epoch": 0.9070224588666863, + "grad_norm": 23925.22265625, + "learning_rate": 1.2374389090089744e-05, + "loss": 0.4039, + "step": 175800 + }, + { + "epoch": 0.907280428849299, + "grad_norm": 23174.416015625, + "learning_rate": 1.2350341172379853e-05, + "loss": 0.4031, + "step": 175850 + }, + { + "epoch": 0.9075383988319119, + "grad_norm": 26669.806640625, + "learning_rate": 1.2326313351328794e-05, + "loss": 0.4031, + "step": 175900 + }, + { + "epoch": 0.9077963688145247, + "grad_norm": 21128.041015625, + "learning_rate": 1.2302305639762168e-05, + "loss": 0.407, + "step": 175950 + }, + { + "epoch": 0.9080543387971376, + "grad_norm": 22798.111328125, + "learning_rate": 1.2278318050494797e-05, + "loss": 0.4035, + "step": 176000 + }, + { + "epoch": 0.9083123087797504, + "grad_norm": 23327.587890625, + "learning_rate": 1.2254350596330843e-05, + "loss": 0.3958, + "step": 176050 + }, + { + "epoch": 0.9085702787623632, + "grad_norm": 22225.3125, + "learning_rate": 1.2230403290063613e-05, + "loss": 0.4074, + "step": 176100 + }, + { + "epoch": 0.908828248744976, + "grad_norm": 22727.791015625, + "learning_rate": 1.2206476144475754e-05, + "loss": 0.4063, + "step": 176150 + }, + { + "epoch": 0.9090862187275889, + "grad_norm": 26138.931640625, + "learning_rate": 1.2182569172339098e-05, + "loss": 0.408, + "step": 176200 + }, + { + "epoch": 0.9093441887102017, + "grad_norm": 23436.91796875, + "learning_rate": 1.2158682386414716e-05, + "loss": 0.4038, + "step": 176250 + }, + { + "epoch": 0.9096021586928145, + "grad_norm": 23695.244140625, + "learning_rate": 1.2134815799452947e-05, + "loss": 0.4074, + "step": 176300 + }, + { + "epoch": 0.9098601286754273, + "grad_norm": 25616.240234375, + "learning_rate": 1.2110969424193263e-05, + "loss": 0.3971, + "step": 176350 + }, + { + "epoch": 0.9101180986580402, + "grad_norm": 27326.634765625, + "learning_rate": 1.2087143273364431e-05, + "loss": 0.4045, + "step": 176400 + }, + { + "epoch": 0.910376068640653, + "grad_norm": 23704.775390625, + "learning_rate": 1.2063337359684384e-05, + "loss": 0.4071, + "step": 176450 + }, + { + "epoch": 0.9106340386232658, + "grad_norm": 25532.234375, + "learning_rate": 1.2039551695860251e-05, + "loss": 0.4021, + "step": 176500 + }, + { + "epoch": 0.9108920086058786, + "grad_norm": 25247.884765625, + "learning_rate": 1.201578629458835e-05, + "loss": 0.4074, + "step": 176550 + }, + { + "epoch": 0.9111499785884914, + "grad_norm": 29377.486328125, + "learning_rate": 1.1992041168554236e-05, + "loss": 0.4064, + "step": 176600 + }, + { + "epoch": 0.9114079485711043, + "grad_norm": 22188.34375, + "learning_rate": 1.1968316330432527e-05, + "loss": 0.404, + "step": 176650 + }, + { + "epoch": 0.911665918553717, + "grad_norm": 23766.0546875, + "learning_rate": 1.194461179288714e-05, + "loss": 0.4016, + "step": 176700 + }, + { + "epoch": 0.9119238885363299, + "grad_norm": 21386.623046875, + "learning_rate": 1.1920927568571078e-05, + "loss": 0.4055, + "step": 176750 + }, + { + "epoch": 0.9121818585189427, + "grad_norm": 25873.052734375, + "learning_rate": 1.1897263670126507e-05, + "loss": 0.3978, + "step": 176800 + }, + { + "epoch": 0.9124398285015556, + "grad_norm": 25235.5390625, + "learning_rate": 1.1873620110184803e-05, + "loss": 0.3975, + "step": 176850 + }, + { + "epoch": 0.9126977984841684, + "grad_norm": 22841.5, + "learning_rate": 1.1849996901366383e-05, + "loss": 0.4031, + "step": 176900 + }, + { + "epoch": 0.9129557684667812, + "grad_norm": 21522.388671875, + "learning_rate": 1.1826394056280893e-05, + "loss": 0.4048, + "step": 176950 + }, + { + "epoch": 0.913213738449394, + "grad_norm": 27600.689453125, + "learning_rate": 1.1802811587527074e-05, + "loss": 0.3984, + "step": 177000 + }, + { + "epoch": 0.9134717084320069, + "grad_norm": 24698.60546875, + "learning_rate": 1.177924950769278e-05, + "loss": 0.406, + "step": 177050 + }, + { + "epoch": 0.9137296784146197, + "grad_norm": 27378.033203125, + "learning_rate": 1.1755707829355001e-05, + "loss": 0.3993, + "step": 177100 + }, + { + "epoch": 0.9139876483972325, + "grad_norm": 27578.4296875, + "learning_rate": 1.1732186565079805e-05, + "loss": 0.3984, + "step": 177150 + }, + { + "epoch": 0.9142456183798453, + "grad_norm": 24650.6953125, + "learning_rate": 1.1708685727422424e-05, + "loss": 0.401, + "step": 177200 + }, + { + "epoch": 0.9145035883624582, + "grad_norm": 25550.0859375, + "learning_rate": 1.1685205328927135e-05, + "loss": 0.399, + "step": 177250 + }, + { + "epoch": 0.914761558345071, + "grad_norm": 22760.77734375, + "learning_rate": 1.166174538212732e-05, + "loss": 0.403, + "step": 177300 + }, + { + "epoch": 0.9150195283276837, + "grad_norm": 22038.26171875, + "learning_rate": 1.1638305899545443e-05, + "loss": 0.4066, + "step": 177350 + }, + { + "epoch": 0.9152774983102966, + "grad_norm": 23857.66015625, + "learning_rate": 1.1614886893693044e-05, + "loss": 0.4038, + "step": 177400 + }, + { + "epoch": 0.9155354682929094, + "grad_norm": 24813.55859375, + "learning_rate": 1.1591488377070724e-05, + "loss": 0.3992, + "step": 177450 + }, + { + "epoch": 0.9157934382755223, + "grad_norm": 24467.5859375, + "learning_rate": 1.1568110362168199e-05, + "loss": 0.4, + "step": 177500 + }, + { + "epoch": 0.9160514082581351, + "grad_norm": 22464.98046875, + "learning_rate": 1.1544752861464143e-05, + "loss": 0.4069, + "step": 177550 + }, + { + "epoch": 0.9163093782407479, + "grad_norm": 26591.51171875, + "learning_rate": 1.1521415887426379e-05, + "loss": 0.4008, + "step": 177600 + }, + { + "epoch": 0.9165673482233607, + "grad_norm": 21086.318359375, + "learning_rate": 1.1498099452511724e-05, + "loss": 0.4036, + "step": 177650 + }, + { + "epoch": 0.9168253182059736, + "grad_norm": 24243.072265625, + "learning_rate": 1.147480356916602e-05, + "loss": 0.4019, + "step": 177700 + }, + { + "epoch": 0.9170832881885864, + "grad_norm": 26714.83984375, + "learning_rate": 1.1451528249824206e-05, + "loss": 0.3978, + "step": 177750 + }, + { + "epoch": 0.9173412581711992, + "grad_norm": 24799.712890625, + "learning_rate": 1.1428273506910132e-05, + "loss": 0.4078, + "step": 177800 + }, + { + "epoch": 0.917599228153812, + "grad_norm": 25010.435546875, + "learning_rate": 1.1405039352836777e-05, + "loss": 0.4054, + "step": 177850 + }, + { + "epoch": 0.9178571981364249, + "grad_norm": 23657.78125, + "learning_rate": 1.1381825800006068e-05, + "loss": 0.4001, + "step": 177900 + }, + { + "epoch": 0.9181151681190377, + "grad_norm": 23865.349609375, + "learning_rate": 1.1358632860808955e-05, + "loss": 0.4012, + "step": 177950 + }, + { + "epoch": 0.9183731381016504, + "grad_norm": 26476.04296875, + "learning_rate": 1.1335460547625365e-05, + "loss": 0.3998, + "step": 178000 + }, + { + "epoch": 0.9186311080842633, + "grad_norm": 24907.89453125, + "learning_rate": 1.1312308872824235e-05, + "loss": 0.401, + "step": 178050 + }, + { + "epoch": 0.9188890780668761, + "grad_norm": 24008.54296875, + "learning_rate": 1.1289177848763494e-05, + "loss": 0.3991, + "step": 178100 + }, + { + "epoch": 0.919147048049489, + "grad_norm": 23814.396484375, + "learning_rate": 1.1266067487790027e-05, + "loss": 0.4039, + "step": 178150 + }, + { + "epoch": 0.9194050180321018, + "grad_norm": 25892.994140625, + "learning_rate": 1.1242977802239696e-05, + "loss": 0.4015, + "step": 178200 + }, + { + "epoch": 0.9196629880147146, + "grad_norm": 24185.7265625, + "learning_rate": 1.1219908804437328e-05, + "loss": 0.3992, + "step": 178250 + }, + { + "epoch": 0.9199209579973274, + "grad_norm": 23890.54296875, + "learning_rate": 1.1196860506696705e-05, + "loss": 0.4087, + "step": 178300 + }, + { + "epoch": 0.9201789279799403, + "grad_norm": 25288.83203125, + "learning_rate": 1.1173832921320554e-05, + "loss": 0.4038, + "step": 178350 + }, + { + "epoch": 0.9204368979625531, + "grad_norm": 27609.994140625, + "learning_rate": 1.1150826060600594e-05, + "loss": 0.4047, + "step": 178400 + }, + { + "epoch": 0.9206948679451659, + "grad_norm": 25010.259765625, + "learning_rate": 1.112783993681738e-05, + "loss": 0.4037, + "step": 178450 + }, + { + "epoch": 0.9209528379277787, + "grad_norm": 23663.78515625, + "learning_rate": 1.1104874562240514e-05, + "loss": 0.396, + "step": 178500 + }, + { + "epoch": 0.9212108079103916, + "grad_norm": 24960.072265625, + "learning_rate": 1.108192994912844e-05, + "loss": 0.4024, + "step": 178550 + }, + { + "epoch": 0.9214687778930044, + "grad_norm": 22778.66796875, + "learning_rate": 1.1059006109728543e-05, + "loss": 0.4039, + "step": 178600 + }, + { + "epoch": 0.9217267478756171, + "grad_norm": 20177.640625, + "learning_rate": 1.1036103056277165e-05, + "loss": 0.4008, + "step": 178650 + }, + { + "epoch": 0.92198471785823, + "grad_norm": 25084.703125, + "learning_rate": 1.1013220800999452e-05, + "loss": 0.4082, + "step": 178700 + }, + { + "epoch": 0.9222426878408428, + "grad_norm": 23697.529296875, + "learning_rate": 1.0990359356109558e-05, + "loss": 0.4083, + "step": 178750 + }, + { + "epoch": 0.9225006578234557, + "grad_norm": 26252.25, + "learning_rate": 1.0967518733810462e-05, + "loss": 0.4114, + "step": 178800 + }, + { + "epoch": 0.9227586278060684, + "grad_norm": 25295.103515625, + "learning_rate": 1.094469894629403e-05, + "loss": 0.4062, + "step": 178850 + }, + { + "epoch": 0.9230165977886813, + "grad_norm": 24484.203125, + "learning_rate": 1.0921900005741053e-05, + "loss": 0.4008, + "step": 178900 + }, + { + "epoch": 0.9232745677712941, + "grad_norm": 23360.701171875, + "learning_rate": 1.0899121924321154e-05, + "loss": 0.405, + "step": 178950 + }, + { + "epoch": 0.923532537753907, + "grad_norm": 22507.24609375, + "learning_rate": 1.0876364714192822e-05, + "loss": 0.3968, + "step": 179000 + }, + { + "epoch": 0.9237905077365198, + "grad_norm": 26761.66015625, + "learning_rate": 1.0853628387503423e-05, + "loss": 0.4021, + "step": 179050 + }, + { + "epoch": 0.9240484777191326, + "grad_norm": 26596.376953125, + "learning_rate": 1.0830912956389166e-05, + "loss": 0.3984, + "step": 179100 + }, + { + "epoch": 0.9243064477017454, + "grad_norm": 23996.490234375, + "learning_rate": 1.0808218432975093e-05, + "loss": 0.3996, + "step": 179150 + }, + { + "epoch": 0.9245644176843583, + "grad_norm": 22681.4609375, + "learning_rate": 1.0785544829375143e-05, + "loss": 0.4021, + "step": 179200 + }, + { + "epoch": 0.9248223876669711, + "grad_norm": 25675.728515625, + "learning_rate": 1.0762892157691995e-05, + "loss": 0.3942, + "step": 179250 + }, + { + "epoch": 0.9250803576495839, + "grad_norm": 26039.25, + "learning_rate": 1.0740260430017247e-05, + "loss": 0.4014, + "step": 179300 + }, + { + "epoch": 0.9253383276321967, + "grad_norm": 21596.50390625, + "learning_rate": 1.0717649658431256e-05, + "loss": 0.4017, + "step": 179350 + }, + { + "epoch": 0.9255962976148095, + "grad_norm": 25318.3125, + "learning_rate": 1.0695059855003204e-05, + "loss": 0.3968, + "step": 179400 + }, + { + "epoch": 0.9258542675974224, + "grad_norm": 20999.10546875, + "learning_rate": 1.0672491031791137e-05, + "loss": 0.4032, + "step": 179450 + }, + { + "epoch": 0.9261122375800351, + "grad_norm": 25034.404296875, + "learning_rate": 1.0649943200841794e-05, + "loss": 0.3987, + "step": 179500 + }, + { + "epoch": 0.926370207562648, + "grad_norm": 23470.205078125, + "learning_rate": 1.0627416374190819e-05, + "loss": 0.4009, + "step": 179550 + }, + { + "epoch": 0.9266281775452608, + "grad_norm": 23667.298828125, + "learning_rate": 1.0604910563862575e-05, + "loss": 0.4022, + "step": 179600 + }, + { + "epoch": 0.9268861475278737, + "grad_norm": 25315.5390625, + "learning_rate": 1.058242578187023e-05, + "loss": 0.4023, + "step": 179650 + }, + { + "epoch": 0.9271441175104865, + "grad_norm": 23639.34375, + "learning_rate": 1.0559962040215727e-05, + "loss": 0.407, + "step": 179700 + }, + { + "epoch": 0.9274020874930993, + "grad_norm": 29350.244140625, + "learning_rate": 1.0537519350889764e-05, + "loss": 0.4063, + "step": 179750 + }, + { + "epoch": 0.9276600574757121, + "grad_norm": 26077.30859375, + "learning_rate": 1.051509772587183e-05, + "loss": 0.4011, + "step": 179800 + }, + { + "epoch": 0.927918027458325, + "grad_norm": 22387.8046875, + "learning_rate": 1.0492697177130157e-05, + "loss": 0.398, + "step": 179850 + }, + { + "epoch": 0.9281759974409378, + "grad_norm": 24023.2734375, + "learning_rate": 1.0470317716621719e-05, + "loss": 0.4026, + "step": 179900 + }, + { + "epoch": 0.9284339674235506, + "grad_norm": 24288.666015625, + "learning_rate": 1.044795935629223e-05, + "loss": 0.403, + "step": 179950 + }, + { + "epoch": 0.9286919374061634, + "grad_norm": 26163.923828125, + "learning_rate": 1.042562210807616e-05, + "loss": 0.4001, + "step": 180000 + }, + { + "epoch": 0.9286919374061634, + "eval_loss": 0.3886363208293915, + "eval_runtime": 3188.2841, + "eval_samples_per_second": 972.661, + "eval_steps_per_second": 1.9, + "step": 180000 + }, + { + "epoch": 0.9289499073887763, + "grad_norm": 24379.322265625, + "learning_rate": 1.0403305983896683e-05, + "loss": 0.3978, + "step": 180050 + }, + { + "epoch": 0.9292078773713891, + "grad_norm": 23249.939453125, + "learning_rate": 1.0381010995665752e-05, + "loss": 0.4055, + "step": 180100 + }, + { + "epoch": 0.9294658473540018, + "grad_norm": 25460.6875, + "learning_rate": 1.0358737155283942e-05, + "loss": 0.4059, + "step": 180150 + }, + { + "epoch": 0.9297238173366147, + "grad_norm": 23166.548828125, + "learning_rate": 1.0336484474640651e-05, + "loss": 0.4051, + "step": 180200 + }, + { + "epoch": 0.9299817873192275, + "grad_norm": 23631.94921875, + "learning_rate": 1.0314252965613908e-05, + "loss": 0.3974, + "step": 180250 + }, + { + "epoch": 0.9302397573018404, + "grad_norm": 26213.556640625, + "learning_rate": 1.0292042640070449e-05, + "loss": 0.3983, + "step": 180300 + }, + { + "epoch": 0.9304977272844532, + "grad_norm": 24056.875, + "learning_rate": 1.0269853509865751e-05, + "loss": 0.3979, + "step": 180350 + }, + { + "epoch": 0.930755697267066, + "grad_norm": 24793.658203125, + "learning_rate": 1.0247685586843897e-05, + "loss": 0.3993, + "step": 180400 + }, + { + "epoch": 0.9310136672496788, + "grad_norm": 25296.04296875, + "learning_rate": 1.0225538882837733e-05, + "loss": 0.4047, + "step": 180450 + }, + { + "epoch": 0.9312716372322917, + "grad_norm": 21486.990234375, + "learning_rate": 1.0203413409668722e-05, + "loss": 0.3995, + "step": 180500 + }, + { + "epoch": 0.9315296072149045, + "grad_norm": 24168.083984375, + "learning_rate": 1.018130917914702e-05, + "loss": 0.4081, + "step": 180550 + }, + { + "epoch": 0.9317875771975173, + "grad_norm": 25313.568359375, + "learning_rate": 1.0159226203071431e-05, + "loss": 0.4024, + "step": 180600 + }, + { + "epoch": 0.9320455471801301, + "grad_norm": 22535.845703125, + "learning_rate": 1.0137164493229411e-05, + "loss": 0.3974, + "step": 180650 + }, + { + "epoch": 0.932303517162743, + "grad_norm": 24480.0703125, + "learning_rate": 1.0115124061397102e-05, + "loss": 0.4031, + "step": 180700 + }, + { + "epoch": 0.9325614871453558, + "grad_norm": 29667.470703125, + "learning_rate": 1.0093104919339241e-05, + "loss": 0.3991, + "step": 180750 + }, + { + "epoch": 0.9328194571279685, + "grad_norm": 22311.767578125, + "learning_rate": 1.0071107078809228e-05, + "loss": 0.402, + "step": 180800 + }, + { + "epoch": 0.9330774271105814, + "grad_norm": 22752.642578125, + "learning_rate": 1.0049130551549068e-05, + "loss": 0.4022, + "step": 180850 + }, + { + "epoch": 0.9333353970931942, + "grad_norm": 26333.43359375, + "learning_rate": 1.0027175349289424e-05, + "loss": 0.4006, + "step": 180900 + }, + { + "epoch": 0.9335933670758071, + "grad_norm": 22951.927734375, + "learning_rate": 1.0005241483749533e-05, + "loss": 0.4022, + "step": 180950 + }, + { + "epoch": 0.9338513370584198, + "grad_norm": 24532.15625, + "learning_rate": 9.983328966637318e-06, + "loss": 0.398, + "step": 181000 + }, + { + "epoch": 0.9341093070410327, + "grad_norm": 24624.205078125, + "learning_rate": 9.961437809649188e-06, + "loss": 0.4021, + "step": 181050 + }, + { + "epoch": 0.9343672770236455, + "grad_norm": 23679.087890625, + "learning_rate": 9.93956802447027e-06, + "loss": 0.4038, + "step": 181100 + }, + { + "epoch": 0.9346252470062584, + "grad_norm": 22279.52734375, + "learning_rate": 9.917719622774219e-06, + "loss": 0.3987, + "step": 181150 + }, + { + "epoch": 0.9348832169888712, + "grad_norm": 25709.376953125, + "learning_rate": 9.895892616223268e-06, + "loss": 0.4062, + "step": 181200 + }, + { + "epoch": 0.935141186971484, + "grad_norm": 24607.25, + "learning_rate": 9.874087016468298e-06, + "loss": 0.3973, + "step": 181250 + }, + { + "epoch": 0.9353991569540968, + "grad_norm": 25458.861328125, + "learning_rate": 9.852302835148652e-06, + "loss": 0.3993, + "step": 181300 + }, + { + "epoch": 0.9356571269367097, + "grad_norm": 24070.654296875, + "learning_rate": 9.830540083892358e-06, + "loss": 0.4057, + "step": 181350 + }, + { + "epoch": 0.9359150969193225, + "grad_norm": 25323.736328125, + "learning_rate": 9.80879877431593e-06, + "loss": 0.407, + "step": 181400 + }, + { + "epoch": 0.9361730669019352, + "grad_norm": 27513.087890625, + "learning_rate": 9.787078918024455e-06, + "loss": 0.3979, + "step": 181450 + }, + { + "epoch": 0.9364310368845481, + "grad_norm": 22324.669921875, + "learning_rate": 9.765380526611568e-06, + "loss": 0.3984, + "step": 181500 + }, + { + "epoch": 0.936689006867161, + "grad_norm": 23778.37890625, + "learning_rate": 9.743703611659465e-06, + "loss": 0.4055, + "step": 181550 + }, + { + "epoch": 0.9369469768497738, + "grad_norm": 26777.255859375, + "learning_rate": 9.722048184738864e-06, + "loss": 0.4047, + "step": 181600 + }, + { + "epoch": 0.9372049468323865, + "grad_norm": 23210.876953125, + "learning_rate": 9.700414257409002e-06, + "loss": 0.393, + "step": 181650 + }, + { + "epoch": 0.9374629168149994, + "grad_norm": 22539.84765625, + "learning_rate": 9.67880184121765e-06, + "loss": 0.4069, + "step": 181700 + }, + { + "epoch": 0.9377208867976122, + "grad_norm": 25191.609375, + "learning_rate": 9.65721094770109e-06, + "loss": 0.4069, + "step": 181750 + }, + { + "epoch": 0.9379788567802251, + "grad_norm": 23813.578125, + "learning_rate": 9.63564158838416e-06, + "loss": 0.3954, + "step": 181800 + }, + { + "epoch": 0.9382368267628379, + "grad_norm": 23869.703125, + "learning_rate": 9.614093774780114e-06, + "loss": 0.3998, + "step": 181850 + }, + { + "epoch": 0.9384947967454507, + "grad_norm": 23316.384765625, + "learning_rate": 9.5925675183908e-06, + "loss": 0.3989, + "step": 181900 + }, + { + "epoch": 0.9387527667280635, + "grad_norm": 23641.65625, + "learning_rate": 9.571062830706496e-06, + "loss": 0.4017, + "step": 181950 + }, + { + "epoch": 0.9390107367106764, + "grad_norm": 23724.431640625, + "learning_rate": 9.549579723205982e-06, + "loss": 0.4042, + "step": 182000 + }, + { + "epoch": 0.9392687066932892, + "grad_norm": 24013.849609375, + "learning_rate": 9.528118207356556e-06, + "loss": 0.3966, + "step": 182050 + }, + { + "epoch": 0.939526676675902, + "grad_norm": 21843.55859375, + "learning_rate": 9.506678294613919e-06, + "loss": 0.4051, + "step": 182100 + }, + { + "epoch": 0.9397846466585148, + "grad_norm": 22000.7734375, + "learning_rate": 9.485259996422313e-06, + "loss": 0.4042, + "step": 182150 + }, + { + "epoch": 0.9400426166411276, + "grad_norm": 23307.556640625, + "learning_rate": 9.463863324214395e-06, + "loss": 0.4018, + "step": 182200 + }, + { + "epoch": 0.9403005866237405, + "grad_norm": 22961.353515625, + "learning_rate": 9.4424882894113e-06, + "loss": 0.3991, + "step": 182250 + }, + { + "epoch": 0.9405585566063532, + "grad_norm": 24167.134765625, + "learning_rate": 9.421134903422607e-06, + "loss": 0.4033, + "step": 182300 + }, + { + "epoch": 0.9408165265889661, + "grad_norm": 24116.75, + "learning_rate": 9.399803177646339e-06, + "loss": 0.3979, + "step": 182350 + }, + { + "epoch": 0.9410744965715789, + "grad_norm": 25658.6640625, + "learning_rate": 9.378493123468946e-06, + "loss": 0.4093, + "step": 182400 + }, + { + "epoch": 0.9413324665541918, + "grad_norm": 27761.8828125, + "learning_rate": 9.357204752265341e-06, + "loss": 0.3974, + "step": 182450 + }, + { + "epoch": 0.9415904365368046, + "grad_norm": 23456.90234375, + "learning_rate": 9.335938075398842e-06, + "loss": 0.4072, + "step": 182500 + }, + { + "epoch": 0.9418484065194174, + "grad_norm": 21258.984375, + "learning_rate": 9.314693104221184e-06, + "loss": 0.3952, + "step": 182550 + }, + { + "epoch": 0.9421063765020302, + "grad_norm": 22634.01953125, + "learning_rate": 9.293469850072522e-06, + "loss": 0.402, + "step": 182600 + }, + { + "epoch": 0.9423643464846431, + "grad_norm": 22349.267578125, + "learning_rate": 9.272268324281407e-06, + "loss": 0.3974, + "step": 182650 + }, + { + "epoch": 0.9426223164672559, + "grad_norm": 23658.505859375, + "learning_rate": 9.251088538164837e-06, + "loss": 0.3979, + "step": 182700 + }, + { + "epoch": 0.9428802864498687, + "grad_norm": 26879.39453125, + "learning_rate": 9.229930503028129e-06, + "loss": 0.3965, + "step": 182750 + }, + { + "epoch": 0.9431382564324815, + "grad_norm": 25313.255859375, + "learning_rate": 9.208794230165058e-06, + "loss": 0.4049, + "step": 182800 + }, + { + "epoch": 0.9433962264150944, + "grad_norm": 26135.587890625, + "learning_rate": 9.187679730857756e-06, + "loss": 0.408, + "step": 182850 + }, + { + "epoch": 0.9436541963977072, + "grad_norm": 24064.087890625, + "learning_rate": 9.166587016376715e-06, + "loss": 0.4025, + "step": 182900 + }, + { + "epoch": 0.9439121663803199, + "grad_norm": 24475.30859375, + "learning_rate": 9.145516097980856e-06, + "loss": 0.4019, + "step": 182950 + }, + { + "epoch": 0.9441701363629328, + "grad_norm": 23691.06640625, + "learning_rate": 9.12446698691738e-06, + "loss": 0.4031, + "step": 183000 + }, + { + "epoch": 0.9444281063455456, + "grad_norm": 25653.37109375, + "learning_rate": 9.103439694421928e-06, + "loss": 0.4007, + "step": 183050 + }, + { + "epoch": 0.9446860763281585, + "grad_norm": 22718.71875, + "learning_rate": 9.08243423171845e-06, + "loss": 0.3996, + "step": 183100 + }, + { + "epoch": 0.9449440463107712, + "grad_norm": 23337.986328125, + "learning_rate": 9.061450610019262e-06, + "loss": 0.4043, + "step": 183150 + }, + { + "epoch": 0.9452020162933841, + "grad_norm": 27628.021484375, + "learning_rate": 9.040488840525001e-06, + "loss": 0.409, + "step": 183200 + }, + { + "epoch": 0.9454599862759969, + "grad_norm": 22894.26953125, + "learning_rate": 9.01954893442467e-06, + "loss": 0.4026, + "step": 183250 + }, + { + "epoch": 0.9457179562586098, + "grad_norm": 27624.564453125, + "learning_rate": 8.998630902895566e-06, + "loss": 0.4011, + "step": 183300 + }, + { + "epoch": 0.9459759262412226, + "grad_norm": 25944.05859375, + "learning_rate": 8.977734757103351e-06, + "loss": 0.3995, + "step": 183350 + }, + { + "epoch": 0.9462338962238354, + "grad_norm": 27243.31640625, + "learning_rate": 8.95686050820197e-06, + "loss": 0.3983, + "step": 183400 + }, + { + "epoch": 0.9464918662064482, + "grad_norm": 24556.611328125, + "learning_rate": 8.936008167333699e-06, + "loss": 0.4041, + "step": 183450 + }, + { + "epoch": 0.9467498361890611, + "grad_norm": 22205.880859375, + "learning_rate": 8.915177745629112e-06, + "loss": 0.3973, + "step": 183500 + }, + { + "epoch": 0.9470078061716739, + "grad_norm": 26829.6328125, + "learning_rate": 8.894369254207069e-06, + "loss": 0.4023, + "step": 183550 + }, + { + "epoch": 0.9472657761542866, + "grad_norm": 24388.59765625, + "learning_rate": 8.873582704174776e-06, + "loss": 0.397, + "step": 183600 + }, + { + "epoch": 0.9475237461368995, + "grad_norm": 25665.98828125, + "learning_rate": 8.852818106627647e-06, + "loss": 0.4055, + "step": 183650 + }, + { + "epoch": 0.9477817161195123, + "grad_norm": 24880.47265625, + "learning_rate": 8.83207547264946e-06, + "loss": 0.4016, + "step": 183700 + }, + { + "epoch": 0.9480396861021252, + "grad_norm": 26516.6953125, + "learning_rate": 8.81135481331221e-06, + "loss": 0.3992, + "step": 183750 + }, + { + "epoch": 0.9482976560847379, + "grad_norm": 22604.123046875, + "learning_rate": 8.790656139676179e-06, + "loss": 0.401, + "step": 183800 + }, + { + "epoch": 0.9485556260673508, + "grad_norm": 24668.94921875, + "learning_rate": 8.769979462789957e-06, + "loss": 0.3974, + "step": 183850 + }, + { + "epoch": 0.9488135960499636, + "grad_norm": 26522.896484375, + "learning_rate": 8.749324793690295e-06, + "loss": 0.4048, + "step": 183900 + }, + { + "epoch": 0.9490715660325765, + "grad_norm": 26786.48046875, + "learning_rate": 8.728692143402295e-06, + "loss": 0.4075, + "step": 183950 + }, + { + "epoch": 0.9493295360151893, + "grad_norm": 23683.54296875, + "learning_rate": 8.708081522939265e-06, + "loss": 0.3996, + "step": 184000 + }, + { + "epoch": 0.9495875059978021, + "grad_norm": 23064.400390625, + "learning_rate": 8.687492943302739e-06, + "loss": 0.4036, + "step": 184050 + }, + { + "epoch": 0.9498454759804149, + "grad_norm": 24142.4921875, + "learning_rate": 8.666926415482501e-06, + "loss": 0.4023, + "step": 184100 + }, + { + "epoch": 0.9501034459630278, + "grad_norm": 24012.076171875, + "learning_rate": 8.6463819504566e-06, + "loss": 0.4024, + "step": 184150 + }, + { + "epoch": 0.9503614159456406, + "grad_norm": 22214.41015625, + "learning_rate": 8.625859559191224e-06, + "loss": 0.4002, + "step": 184200 + }, + { + "epoch": 0.9506193859282533, + "grad_norm": 24664.162109375, + "learning_rate": 8.60535925264086e-06, + "loss": 0.4027, + "step": 184250 + }, + { + "epoch": 0.9508773559108662, + "grad_norm": 21136.900390625, + "learning_rate": 8.584881041748171e-06, + "loss": 0.3957, + "step": 184300 + }, + { + "epoch": 0.951135325893479, + "grad_norm": 22411.33984375, + "learning_rate": 8.56442493744401e-06, + "loss": 0.3977, + "step": 184350 + }, + { + "epoch": 0.9513932958760919, + "grad_norm": 23004.173828125, + "learning_rate": 8.54399095064749e-06, + "loss": 0.4014, + "step": 184400 + }, + { + "epoch": 0.9516512658587046, + "grad_norm": 23692.26171875, + "learning_rate": 8.523579092265827e-06, + "loss": 0.4013, + "step": 184450 + }, + { + "epoch": 0.9519092358413175, + "grad_norm": 25310.919921875, + "learning_rate": 8.503189373194509e-06, + "loss": 0.3961, + "step": 184500 + }, + { + "epoch": 0.9521672058239303, + "grad_norm": 25963.943359375, + "learning_rate": 8.482821804317171e-06, + "loss": 0.4049, + "step": 184550 + }, + { + "epoch": 0.9524251758065432, + "grad_norm": 24282.115234375, + "learning_rate": 8.46247639650562e-06, + "loss": 0.4008, + "step": 184600 + }, + { + "epoch": 0.952683145789156, + "grad_norm": 24703.26953125, + "learning_rate": 8.442153160619837e-06, + "loss": 0.4063, + "step": 184650 + }, + { + "epoch": 0.9529411157717688, + "grad_norm": 23616.09375, + "learning_rate": 8.421852107507966e-06, + "loss": 0.3974, + "step": 184700 + }, + { + "epoch": 0.9531990857543816, + "grad_norm": 25447.408203125, + "learning_rate": 8.40157324800634e-06, + "loss": 0.4066, + "step": 184750 + }, + { + "epoch": 0.9534570557369945, + "grad_norm": 25534.3984375, + "learning_rate": 8.381316592939403e-06, + "loss": 0.4027, + "step": 184800 + }, + { + "epoch": 0.9537150257196073, + "grad_norm": 24251.138671875, + "learning_rate": 8.361082153119777e-06, + "loss": 0.3958, + "step": 184850 + }, + { + "epoch": 0.95397299570222, + "grad_norm": 26980.046875, + "learning_rate": 8.3408699393482e-06, + "loss": 0.4058, + "step": 184900 + }, + { + "epoch": 0.9542309656848329, + "grad_norm": 26143.732421875, + "learning_rate": 8.320679962413574e-06, + "loss": 0.4006, + "step": 184950 + }, + { + "epoch": 0.9544889356674457, + "grad_norm": 24566.15234375, + "learning_rate": 8.300512233092893e-06, + "loss": 0.405, + "step": 185000 + }, + { + "epoch": 0.9544889356674457, + "eval_loss": 0.3880694806575775, + "eval_runtime": 3197.8794, + "eval_samples_per_second": 969.743, + "eval_steps_per_second": 1.894, + "step": 185000 + }, + { + "epoch": 0.9547469056500586, + "grad_norm": 22463.359375, + "learning_rate": 8.280366762151349e-06, + "loss": 0.4035, + "step": 185050 + }, + { + "epoch": 0.9550048756326713, + "grad_norm": 23964.845703125, + "learning_rate": 8.260243560342146e-06, + "loss": 0.399, + "step": 185100 + }, + { + "epoch": 0.9552628456152842, + "grad_norm": 22267.978515625, + "learning_rate": 8.2401426384067e-06, + "loss": 0.4065, + "step": 185150 + }, + { + "epoch": 0.955520815597897, + "grad_norm": 23959.732421875, + "learning_rate": 8.220064007074485e-06, + "loss": 0.3988, + "step": 185200 + }, + { + "epoch": 0.9557787855805099, + "grad_norm": 22042.95703125, + "learning_rate": 8.200007677063066e-06, + "loss": 0.4005, + "step": 185250 + }, + { + "epoch": 0.9560367555631226, + "grad_norm": 23760.798828125, + "learning_rate": 8.17997365907816e-06, + "loss": 0.4043, + "step": 185300 + }, + { + "epoch": 0.9562947255457355, + "grad_norm": 23235.8828125, + "learning_rate": 8.1599619638135e-06, + "loss": 0.3999, + "step": 185350 + }, + { + "epoch": 0.9565526955283483, + "grad_norm": 22637.701171875, + "learning_rate": 8.139972601950967e-06, + "loss": 0.4004, + "step": 185400 + }, + { + "epoch": 0.9568106655109612, + "grad_norm": 28806.810546875, + "learning_rate": 8.120005584160489e-06, + "loss": 0.4022, + "step": 185450 + }, + { + "epoch": 0.957068635493574, + "grad_norm": 22143.8203125, + "learning_rate": 8.100060921100067e-06, + "loss": 0.3977, + "step": 185500 + }, + { + "epoch": 0.9573266054761868, + "grad_norm": 22921.810546875, + "learning_rate": 8.080138623415783e-06, + "loss": 0.4, + "step": 185550 + }, + { + "epoch": 0.9575845754587996, + "grad_norm": 25425.640625, + "learning_rate": 8.060238701741762e-06, + "loss": 0.4021, + "step": 185600 + }, + { + "epoch": 0.9578425454414125, + "grad_norm": 27279.6796875, + "learning_rate": 8.040361166700216e-06, + "loss": 0.4064, + "step": 185650 + }, + { + "epoch": 0.9581005154240253, + "grad_norm": 25144.322265625, + "learning_rate": 8.020506028901376e-06, + "loss": 0.4031, + "step": 185700 + }, + { + "epoch": 0.958358485406638, + "grad_norm": 21046.607421875, + "learning_rate": 8.000673298943534e-06, + "loss": 0.4041, + "step": 185750 + }, + { + "epoch": 0.9586164553892509, + "grad_norm": 23166.087890625, + "learning_rate": 7.980862987413018e-06, + "loss": 0.3996, + "step": 185800 + }, + { + "epoch": 0.9588744253718637, + "grad_norm": 23506.693359375, + "learning_rate": 7.961075104884186e-06, + "loss": 0.3973, + "step": 185850 + }, + { + "epoch": 0.9591323953544766, + "grad_norm": 25975.408203125, + "learning_rate": 7.94130966191941e-06, + "loss": 0.4048, + "step": 185900 + }, + { + "epoch": 0.9593903653370893, + "grad_norm": 23704.638671875, + "learning_rate": 7.921566669069147e-06, + "loss": 0.4045, + "step": 185950 + }, + { + "epoch": 0.9596483353197022, + "grad_norm": 27402.2421875, + "learning_rate": 7.901846136871766e-06, + "loss": 0.4007, + "step": 186000 + }, + { + "epoch": 0.959906305302315, + "grad_norm": 23186.658203125, + "learning_rate": 7.882148075853752e-06, + "loss": 0.4072, + "step": 186050 + }, + { + "epoch": 0.9601642752849279, + "grad_norm": 24789.619140625, + "learning_rate": 7.862472496529528e-06, + "loss": 0.4056, + "step": 186100 + }, + { + "epoch": 0.9604222452675407, + "grad_norm": 23849.71875, + "learning_rate": 7.842819409401524e-06, + "loss": 0.4067, + "step": 186150 + }, + { + "epoch": 0.9606802152501535, + "grad_norm": 24820.765625, + "learning_rate": 7.823188824960221e-06, + "loss": 0.4071, + "step": 186200 + }, + { + "epoch": 0.9609381852327663, + "grad_norm": 23276.568359375, + "learning_rate": 7.803580753683992e-06, + "loss": 0.3989, + "step": 186250 + }, + { + "epoch": 0.9611961552153792, + "grad_norm": 21064.8984375, + "learning_rate": 7.783995206039279e-06, + "loss": 0.3994, + "step": 186300 + }, + { + "epoch": 0.961454125197992, + "grad_norm": 27310.30078125, + "learning_rate": 7.764432192480464e-06, + "loss": 0.4015, + "step": 186350 + }, + { + "epoch": 0.9617120951806047, + "grad_norm": 24786.1796875, + "learning_rate": 7.744891723449888e-06, + "loss": 0.4042, + "step": 186400 + }, + { + "epoch": 0.9619700651632176, + "grad_norm": 22362.47265625, + "learning_rate": 7.725373809377911e-06, + "loss": 0.3991, + "step": 186450 + }, + { + "epoch": 0.9622280351458304, + "grad_norm": 23751.4296875, + "learning_rate": 7.705878460682775e-06, + "loss": 0.3988, + "step": 186500 + }, + { + "epoch": 0.9624860051284433, + "grad_norm": 22956.935546875, + "learning_rate": 7.686405687770748e-06, + "loss": 0.4049, + "step": 186550 + }, + { + "epoch": 0.962743975111056, + "grad_norm": 25276.861328125, + "learning_rate": 7.666955501036006e-06, + "loss": 0.4005, + "step": 186600 + }, + { + "epoch": 0.9630019450936689, + "grad_norm": 22390.625, + "learning_rate": 7.647527910860691e-06, + "loss": 0.4008, + "step": 186650 + }, + { + "epoch": 0.9632599150762817, + "grad_norm": 28946.125, + "learning_rate": 7.628122927614856e-06, + "loss": 0.3987, + "step": 186700 + }, + { + "epoch": 0.9635178850588946, + "grad_norm": 23663.3125, + "learning_rate": 7.608740561656541e-06, + "loss": 0.4006, + "step": 186750 + }, + { + "epoch": 0.9637758550415074, + "grad_norm": 21705.16015625, + "learning_rate": 7.589380823331632e-06, + "loss": 0.4023, + "step": 186800 + }, + { + "epoch": 0.9640338250241202, + "grad_norm": 25353.228515625, + "learning_rate": 7.570043722974019e-06, + "loss": 0.4006, + "step": 186850 + }, + { + "epoch": 0.964291795006733, + "grad_norm": 26046.412109375, + "learning_rate": 7.55072927090546e-06, + "loss": 0.3931, + "step": 186900 + }, + { + "epoch": 0.9645497649893459, + "grad_norm": 25989.2578125, + "learning_rate": 7.531437477435621e-06, + "loss": 0.3989, + "step": 186950 + }, + { + "epoch": 0.9648077349719587, + "grad_norm": 22714.423828125, + "learning_rate": 7.51216835286212e-06, + "loss": 0.4018, + "step": 187000 + }, + { + "epoch": 0.9650657049545714, + "grad_norm": 26353.42578125, + "learning_rate": 7.492921907470407e-06, + "loss": 0.4056, + "step": 187050 + }, + { + "epoch": 0.9653236749371843, + "grad_norm": 23085.212890625, + "learning_rate": 7.4736981515338864e-06, + "loss": 0.3995, + "step": 187100 + }, + { + "epoch": 0.9655816449197971, + "grad_norm": 23125.970703125, + "learning_rate": 7.454497095313817e-06, + "loss": 0.4069, + "step": 187150 + }, + { + "epoch": 0.96583961490241, + "grad_norm": 23488.2265625, + "learning_rate": 7.435318749059356e-06, + "loss": 0.4039, + "step": 187200 + }, + { + "epoch": 0.9660975848850227, + "grad_norm": 22577.46875, + "learning_rate": 7.4161631230075305e-06, + "loss": 0.4051, + "step": 187250 + }, + { + "epoch": 0.9663555548676356, + "grad_norm": 22637.890625, + "learning_rate": 7.397030227383228e-06, + "loss": 0.3986, + "step": 187300 + }, + { + "epoch": 0.9666135248502484, + "grad_norm": 26084.412109375, + "learning_rate": 7.377920072399247e-06, + "loss": 0.398, + "step": 187350 + }, + { + "epoch": 0.9668714948328613, + "grad_norm": 25263.6328125, + "learning_rate": 7.3588326682562e-06, + "loss": 0.4035, + "step": 187400 + }, + { + "epoch": 0.9671294648154741, + "grad_norm": 22348.236328125, + "learning_rate": 7.339768025142573e-06, + "loss": 0.4003, + "step": 187450 + }, + { + "epoch": 0.9673874347980869, + "grad_norm": 23006.091796875, + "learning_rate": 7.320726153234714e-06, + "loss": 0.399, + "step": 187500 + }, + { + "epoch": 0.9676454047806997, + "grad_norm": 24137.44921875, + "learning_rate": 7.301707062696794e-06, + "loss": 0.3999, + "step": 187550 + }, + { + "epoch": 0.9679033747633126, + "grad_norm": 26101.837890625, + "learning_rate": 7.282710763680828e-06, + "loss": 0.4007, + "step": 187600 + }, + { + "epoch": 0.9681613447459254, + "grad_norm": 21417.814453125, + "learning_rate": 7.263737266326709e-06, + "loss": 0.3994, + "step": 187650 + }, + { + "epoch": 0.9684193147285381, + "grad_norm": 25831.45703125, + "learning_rate": 7.244786580762075e-06, + "loss": 0.3925, + "step": 187700 + }, + { + "epoch": 0.968677284711151, + "grad_norm": 24546.84765625, + "learning_rate": 7.225858717102474e-06, + "loss": 0.4004, + "step": 187750 + }, + { + "epoch": 0.9689352546937638, + "grad_norm": 23773.09765625, + "learning_rate": 7.206953685451212e-06, + "loss": 0.4041, + "step": 187800 + }, + { + "epoch": 0.9691932246763767, + "grad_norm": 23538.923828125, + "learning_rate": 7.188071495899423e-06, + "loss": 0.3971, + "step": 187850 + }, + { + "epoch": 0.9694511946589894, + "grad_norm": 24968.310546875, + "learning_rate": 7.169212158526084e-06, + "loss": 0.4047, + "step": 187900 + }, + { + "epoch": 0.9697091646416023, + "grad_norm": 24379.23828125, + "learning_rate": 7.150375683397908e-06, + "loss": 0.3983, + "step": 187950 + }, + { + "epoch": 0.9699671346242151, + "grad_norm": 25501.638671875, + "learning_rate": 7.131562080569465e-06, + "loss": 0.4024, + "step": 188000 + }, + { + "epoch": 0.970225104606828, + "grad_norm": 24917.73046875, + "learning_rate": 7.112771360083087e-06, + "loss": 0.3998, + "step": 188050 + }, + { + "epoch": 0.9704830745894407, + "grad_norm": 24725.638671875, + "learning_rate": 7.094003531968896e-06, + "loss": 0.3964, + "step": 188100 + }, + { + "epoch": 0.9707410445720536, + "grad_norm": 23913.5703125, + "learning_rate": 7.075258606244789e-06, + "loss": 0.3987, + "step": 188150 + }, + { + "epoch": 0.9709990145546664, + "grad_norm": 25010.09375, + "learning_rate": 7.05653659291644e-06, + "loss": 0.4021, + "step": 188200 + }, + { + "epoch": 0.9712569845372793, + "grad_norm": 25357.556640625, + "learning_rate": 7.037837501977318e-06, + "loss": 0.4007, + "step": 188250 + }, + { + "epoch": 0.9715149545198921, + "grad_norm": 24599.890625, + "learning_rate": 7.019161343408625e-06, + "loss": 0.3962, + "step": 188300 + }, + { + "epoch": 0.9717729245025049, + "grad_norm": 25866.2734375, + "learning_rate": 7.000508127179328e-06, + "loss": 0.3983, + "step": 188350 + }, + { + "epoch": 0.9720308944851177, + "grad_norm": 22591.40625, + "learning_rate": 6.981877863246161e-06, + "loss": 0.3971, + "step": 188400 + }, + { + "epoch": 0.9722888644677306, + "grad_norm": 20752.091796875, + "learning_rate": 6.963270561553586e-06, + "loss": 0.3946, + "step": 188450 + }, + { + "epoch": 0.9725468344503434, + "grad_norm": 22927.109375, + "learning_rate": 6.94468623203382e-06, + "loss": 0.4036, + "step": 188500 + }, + { + "epoch": 0.9728048044329561, + "grad_norm": 27096.041015625, + "learning_rate": 6.92612488460685e-06, + "loss": 0.3982, + "step": 188550 + }, + { + "epoch": 0.973062774415569, + "grad_norm": 24426.93359375, + "learning_rate": 6.907586529180321e-06, + "loss": 0.4054, + "step": 188600 + }, + { + "epoch": 0.9733207443981818, + "grad_norm": 25097.658203125, + "learning_rate": 6.889071175649669e-06, + "loss": 0.4015, + "step": 188650 + }, + { + "epoch": 0.9735787143807947, + "grad_norm": 24646.548828125, + "learning_rate": 6.870578833898033e-06, + "loss": 0.3977, + "step": 188700 + }, + { + "epoch": 0.9738366843634074, + "grad_norm": 23465.357421875, + "learning_rate": 6.852109513796257e-06, + "loss": 0.396, + "step": 188750 + }, + { + "epoch": 0.9740946543460203, + "grad_norm": 22382.603515625, + "learning_rate": 6.83366322520293e-06, + "loss": 0.4018, + "step": 188800 + }, + { + "epoch": 0.9743526243286331, + "grad_norm": 24666.61328125, + "learning_rate": 6.815239977964283e-06, + "loss": 0.4046, + "step": 188850 + }, + { + "epoch": 0.974610594311246, + "grad_norm": 25308.685546875, + "learning_rate": 6.796839781914321e-06, + "loss": 0.3998, + "step": 188900 + }, + { + "epoch": 0.9748685642938588, + "grad_norm": 24856.64453125, + "learning_rate": 6.778462646874706e-06, + "loss": 0.4014, + "step": 188950 + }, + { + "epoch": 0.9751265342764716, + "grad_norm": 27452.50390625, + "learning_rate": 6.760108582654795e-06, + "loss": 0.4008, + "step": 189000 + }, + { + "epoch": 0.9753845042590844, + "grad_norm": 25027.416015625, + "learning_rate": 6.741777599051629e-06, + "loss": 0.4006, + "step": 189050 + }, + { + "epoch": 0.9756424742416973, + "grad_norm": 24687.740234375, + "learning_rate": 6.723469705849927e-06, + "loss": 0.4056, + "step": 189100 + }, + { + "epoch": 0.9759004442243101, + "grad_norm": 24812.55078125, + "learning_rate": 6.705184912822105e-06, + "loss": 0.4043, + "step": 189150 + }, + { + "epoch": 0.9761584142069228, + "grad_norm": 25776.005859375, + "learning_rate": 6.686923229728214e-06, + "loss": 0.4052, + "step": 189200 + }, + { + "epoch": 0.9764163841895357, + "grad_norm": 24319.34765625, + "learning_rate": 6.668684666316005e-06, + "loss": 0.4014, + "step": 189250 + }, + { + "epoch": 0.9766743541721485, + "grad_norm": 28024.419921875, + "learning_rate": 6.650469232320839e-06, + "loss": 0.3991, + "step": 189300 + }, + { + "epoch": 0.9769323241547614, + "grad_norm": 25074.068359375, + "learning_rate": 6.6322769374658085e-06, + "loss": 0.4034, + "step": 189350 + }, + { + "epoch": 0.9771902941373741, + "grad_norm": 21126.572265625, + "learning_rate": 6.61410779146156e-06, + "loss": 0.3998, + "step": 189400 + }, + { + "epoch": 0.977448264119987, + "grad_norm": 25041.337890625, + "learning_rate": 6.595961804006467e-06, + "loss": 0.4012, + "step": 189450 + }, + { + "epoch": 0.9777062341025998, + "grad_norm": 25474.263671875, + "learning_rate": 6.577838984786489e-06, + "loss": 0.3991, + "step": 189500 + }, + { + "epoch": 0.9779642040852127, + "grad_norm": 22192.98828125, + "learning_rate": 6.55973934347523e-06, + "loss": 0.3965, + "step": 189550 + }, + { + "epoch": 0.9782221740678255, + "grad_norm": 24587.9453125, + "learning_rate": 6.5416628897339625e-06, + "loss": 0.4008, + "step": 189600 + }, + { + "epoch": 0.9784801440504383, + "grad_norm": 23246.314453125, + "learning_rate": 6.523609633211497e-06, + "loss": 0.4036, + "step": 189650 + }, + { + "epoch": 0.9787381140330511, + "grad_norm": 24233.033203125, + "learning_rate": 6.505579583544353e-06, + "loss": 0.4002, + "step": 189700 + }, + { + "epoch": 0.978996084015664, + "grad_norm": 24149.6953125, + "learning_rate": 6.487572750356602e-06, + "loss": 0.4043, + "step": 189750 + }, + { + "epoch": 0.9792540539982768, + "grad_norm": 25376.3046875, + "learning_rate": 6.469589143259952e-06, + "loss": 0.3997, + "step": 189800 + }, + { + "epoch": 0.9795120239808895, + "grad_norm": 25878.90625, + "learning_rate": 6.451628771853696e-06, + "loss": 0.3936, + "step": 189850 + }, + { + "epoch": 0.9797699939635024, + "grad_norm": 24123.169921875, + "learning_rate": 6.433691645724743e-06, + "loss": 0.3976, + "step": 189900 + }, + { + "epoch": 0.9800279639461152, + "grad_norm": 23894.5625, + "learning_rate": 6.4157777744475626e-06, + "loss": 0.4025, + "step": 189950 + }, + { + "epoch": 0.9802859339287281, + "grad_norm": 27271.9609375, + "learning_rate": 6.3978871675842544e-06, + "loss": 0.4007, + "step": 190000 + }, + { + "epoch": 0.9802859339287281, + "eval_loss": 0.3872862458229065, + "eval_runtime": 3184.1416, + "eval_samples_per_second": 973.927, + "eval_steps_per_second": 1.902, + "step": 190000 + }, + { + "epoch": 0.9805439039113408, + "grad_norm": 25592.9296875, + "learning_rate": 6.380019834684475e-06, + "loss": 0.4041, + "step": 190050 + }, + { + "epoch": 0.9808018738939537, + "grad_norm": 22425.51953125, + "learning_rate": 6.362175785285457e-06, + "loss": 0.4028, + "step": 190100 + }, + { + "epoch": 0.9810598438765665, + "grad_norm": 25178.28125, + "learning_rate": 6.344355028912008e-06, + "loss": 0.3972, + "step": 190150 + }, + { + "epoch": 0.9813178138591794, + "grad_norm": 25157.537109375, + "learning_rate": 6.326557575076486e-06, + "loss": 0.3989, + "step": 190200 + }, + { + "epoch": 0.9815757838417921, + "grad_norm": 23774.67578125, + "learning_rate": 6.3087834332788695e-06, + "loss": 0.4057, + "step": 190250 + }, + { + "epoch": 0.981833753824405, + "grad_norm": 25307.736328125, + "learning_rate": 6.2910326130066035e-06, + "loss": 0.3946, + "step": 190300 + }, + { + "epoch": 0.9820917238070178, + "grad_norm": 28657.8125, + "learning_rate": 6.273305123734769e-06, + "loss": 0.4006, + "step": 190350 + }, + { + "epoch": 0.9823496937896307, + "grad_norm": 24404.603515625, + "learning_rate": 6.255600974925935e-06, + "loss": 0.3998, + "step": 190400 + }, + { + "epoch": 0.9826076637722435, + "grad_norm": 22460.1640625, + "learning_rate": 6.237920176030232e-06, + "loss": 0.4039, + "step": 190450 + }, + { + "epoch": 0.9828656337548562, + "grad_norm": 27335.625, + "learning_rate": 6.220262736485355e-06, + "loss": 0.3937, + "step": 190500 + }, + { + "epoch": 0.9831236037374691, + "grad_norm": 27996.9765625, + "learning_rate": 6.202628665716464e-06, + "loss": 0.4025, + "step": 190550 + }, + { + "epoch": 0.983381573720082, + "grad_norm": 23532.66796875, + "learning_rate": 6.18501797313632e-06, + "loss": 0.4007, + "step": 190600 + }, + { + "epoch": 0.9836395437026948, + "grad_norm": 27360.333984375, + "learning_rate": 6.167430668145146e-06, + "loss": 0.3994, + "step": 190650 + }, + { + "epoch": 0.9838975136853075, + "grad_norm": 23754.23828125, + "learning_rate": 6.149866760130718e-06, + "loss": 0.4043, + "step": 190700 + }, + { + "epoch": 0.9841554836679204, + "grad_norm": 24313.943359375, + "learning_rate": 6.1323262584683075e-06, + "loss": 0.4039, + "step": 190750 + }, + { + "epoch": 0.9844134536505332, + "grad_norm": 22932.11328125, + "learning_rate": 6.114809172520686e-06, + "loss": 0.3977, + "step": 190800 + }, + { + "epoch": 0.9846714236331461, + "grad_norm": 27614.103515625, + "learning_rate": 6.097315511638135e-06, + "loss": 0.405, + "step": 190850 + }, + { + "epoch": 0.9849293936157588, + "grad_norm": 21648.470703125, + "learning_rate": 6.079845285158447e-06, + "loss": 0.403, + "step": 190900 + }, + { + "epoch": 0.9851873635983717, + "grad_norm": 25720.76953125, + "learning_rate": 6.0623985024068854e-06, + "loss": 0.4069, + "step": 190950 + }, + { + "epoch": 0.9854453335809845, + "grad_norm": 22051.30078125, + "learning_rate": 6.044975172696199e-06, + "loss": 0.4062, + "step": 191000 + }, + { + "epoch": 0.9857033035635974, + "grad_norm": 27862.138671875, + "learning_rate": 6.027575305326621e-06, + "loss": 0.4029, + "step": 191050 + }, + { + "epoch": 0.9859612735462102, + "grad_norm": 24624.951171875, + "learning_rate": 6.010198909585862e-06, + "loss": 0.3995, + "step": 191100 + }, + { + "epoch": 0.986219243528823, + "grad_norm": 23278.45703125, + "learning_rate": 5.992845994749136e-06, + "loss": 0.3981, + "step": 191150 + }, + { + "epoch": 0.9864772135114358, + "grad_norm": 27549.26953125, + "learning_rate": 5.975516570079048e-06, + "loss": 0.3999, + "step": 191200 + }, + { + "epoch": 0.9867351834940487, + "grad_norm": 24570.40625, + "learning_rate": 5.95821064482574e-06, + "loss": 0.4052, + "step": 191250 + }, + { + "epoch": 0.9869931534766615, + "grad_norm": 23672.029296875, + "learning_rate": 5.9409282282267665e-06, + "loss": 0.4045, + "step": 191300 + }, + { + "epoch": 0.9872511234592742, + "grad_norm": 22627.697265625, + "learning_rate": 5.923669329507148e-06, + "loss": 0.4017, + "step": 191350 + }, + { + "epoch": 0.9875090934418871, + "grad_norm": 22583.0390625, + "learning_rate": 5.906433957879365e-06, + "loss": 0.399, + "step": 191400 + }, + { + "epoch": 0.9877670634244999, + "grad_norm": 22665.984375, + "learning_rate": 5.889222122543298e-06, + "loss": 0.3989, + "step": 191450 + }, + { + "epoch": 0.9880250334071128, + "grad_norm": 25125.6640625, + "learning_rate": 5.872033832686319e-06, + "loss": 0.4001, + "step": 191500 + }, + { + "epoch": 0.9882830033897255, + "grad_norm": 24863.34375, + "learning_rate": 5.8548690974831845e-06, + "loss": 0.3991, + "step": 191550 + }, + { + "epoch": 0.9885409733723384, + "grad_norm": 23538.44921875, + "learning_rate": 5.837727926096109e-06, + "loss": 0.3979, + "step": 191600 + }, + { + "epoch": 0.9887989433549512, + "grad_norm": 23396.3203125, + "learning_rate": 5.820610327674708e-06, + "loss": 0.4049, + "step": 191650 + }, + { + "epoch": 0.9890569133375641, + "grad_norm": 22553.01171875, + "learning_rate": 5.803516311356044e-06, + "loss": 0.3983, + "step": 191700 + }, + { + "epoch": 0.9893148833201769, + "grad_norm": 25163.04296875, + "learning_rate": 5.786445886264541e-06, + "loss": 0.3969, + "step": 191750 + }, + { + "epoch": 0.9895728533027897, + "grad_norm": 22826.181640625, + "learning_rate": 5.769399061512093e-06, + "loss": 0.4016, + "step": 191800 + }, + { + "epoch": 0.9898308232854025, + "grad_norm": 22302.7265625, + "learning_rate": 5.752375846197944e-06, + "loss": 0.3988, + "step": 191850 + }, + { + "epoch": 0.9900887932680154, + "grad_norm": 20985.990234375, + "learning_rate": 5.735376249408753e-06, + "loss": 0.3952, + "step": 191900 + }, + { + "epoch": 0.9903467632506282, + "grad_norm": 23513.19921875, + "learning_rate": 5.718400280218611e-06, + "loss": 0.4052, + "step": 191950 + }, + { + "epoch": 0.9906047332332409, + "grad_norm": 23184.818359375, + "learning_rate": 5.7014479476889145e-06, + "loss": 0.399, + "step": 192000 + }, + { + "epoch": 0.9908627032158538, + "grad_norm": 23472.9453125, + "learning_rate": 5.684519260868521e-06, + "loss": 0.3946, + "step": 192050 + }, + { + "epoch": 0.9911206731984666, + "grad_norm": 26255.388671875, + "learning_rate": 5.667614228793622e-06, + "loss": 0.3964, + "step": 192100 + }, + { + "epoch": 0.9913786431810795, + "grad_norm": 23894.54296875, + "learning_rate": 5.650732860487806e-06, + "loss": 0.3928, + "step": 192150 + }, + { + "epoch": 0.9916366131636922, + "grad_norm": 24135.478515625, + "learning_rate": 5.633875164962016e-06, + "loss": 0.4019, + "step": 192200 + }, + { + "epoch": 0.9918945831463051, + "grad_norm": 26928.08984375, + "learning_rate": 5.617041151214553e-06, + "loss": 0.3958, + "step": 192250 + }, + { + "epoch": 0.9921525531289179, + "grad_norm": 22469.884765625, + "learning_rate": 5.600230828231107e-06, + "loss": 0.4031, + "step": 192300 + }, + { + "epoch": 0.9924105231115308, + "grad_norm": 23694.59765625, + "learning_rate": 5.583444204984695e-06, + "loss": 0.3926, + "step": 192350 + }, + { + "epoch": 0.9926684930941435, + "grad_norm": 23482.986328125, + "learning_rate": 5.566681290435688e-06, + "loss": 0.4112, + "step": 192400 + }, + { + "epoch": 0.9929264630767564, + "grad_norm": 22524.994140625, + "learning_rate": 5.549942093531812e-06, + "loss": 0.3981, + "step": 192450 + }, + { + "epoch": 0.9931844330593692, + "grad_norm": 27258.35546875, + "learning_rate": 5.5332266232081155e-06, + "loss": 0.4024, + "step": 192500 + }, + { + "epoch": 0.9934424030419821, + "grad_norm": 19928.40625, + "learning_rate": 5.516534888386992e-06, + "loss": 0.4028, + "step": 192550 + }, + { + "epoch": 0.9937003730245949, + "grad_norm": 21809.205078125, + "learning_rate": 5.499866897978189e-06, + "loss": 0.3996, + "step": 192600 + }, + { + "epoch": 0.9939583430072076, + "grad_norm": 22132.6171875, + "learning_rate": 5.483222660878729e-06, + "loss": 0.4012, + "step": 192650 + }, + { + "epoch": 0.9942163129898205, + "grad_norm": 25306.728515625, + "learning_rate": 5.466602185973002e-06, + "loss": 0.3987, + "step": 192700 + }, + { + "epoch": 0.9944742829724333, + "grad_norm": 29266.78515625, + "learning_rate": 5.4500054821326865e-06, + "loss": 0.4028, + "step": 192750 + }, + { + "epoch": 0.9947322529550462, + "grad_norm": 23506.931640625, + "learning_rate": 5.433432558216778e-06, + "loss": 0.3948, + "step": 192800 + }, + { + "epoch": 0.9949902229376589, + "grad_norm": 22564.177734375, + "learning_rate": 5.416883423071606e-06, + "loss": 0.4015, + "step": 192850 + }, + { + "epoch": 0.9952481929202718, + "grad_norm": 24564.380859375, + "learning_rate": 5.400358085530738e-06, + "loss": 0.4046, + "step": 192900 + }, + { + "epoch": 0.9955061629028846, + "grad_norm": 24793.91796875, + "learning_rate": 5.383856554415117e-06, + "loss": 0.4003, + "step": 192950 + }, + { + "epoch": 0.9957641328854975, + "grad_norm": 23798.228515625, + "learning_rate": 5.367378838532927e-06, + "loss": 0.3982, + "step": 193000 + }, + { + "epoch": 0.9960221028681102, + "grad_norm": 23164.642578125, + "learning_rate": 5.350924946679653e-06, + "loss": 0.3977, + "step": 193050 + }, + { + "epoch": 0.9962800728507231, + "grad_norm": 25646.29296875, + "learning_rate": 5.334494887638058e-06, + "loss": 0.3992, + "step": 193100 + }, + { + "epoch": 0.9965380428333359, + "grad_norm": 24146.2421875, + "learning_rate": 5.318088670178189e-06, + "loss": 0.4037, + "step": 193150 + }, + { + "epoch": 0.9967960128159488, + "grad_norm": 22594.72265625, + "learning_rate": 5.301706303057386e-06, + "loss": 0.4004, + "step": 193200 + }, + { + "epoch": 0.9970539827985616, + "grad_norm": 23395.515625, + "learning_rate": 5.285347795020224e-06, + "loss": 0.3958, + "step": 193250 + }, + { + "epoch": 0.9973119527811743, + "grad_norm": 23383.431640625, + "learning_rate": 5.269013154798558e-06, + "loss": 0.3998, + "step": 193300 + }, + { + "epoch": 0.9975699227637872, + "grad_norm": 20586.341796875, + "learning_rate": 5.252702391111508e-06, + "loss": 0.3979, + "step": 193350 + }, + { + "epoch": 0.9978278927464, + "grad_norm": 26526.83203125, + "learning_rate": 5.236415512665438e-06, + "loss": 0.4036, + "step": 193400 + }, + { + "epoch": 0.9980858627290129, + "grad_norm": 25045.224609375, + "learning_rate": 5.220152528153965e-06, + "loss": 0.4028, + "step": 193450 + }, + { + "epoch": 0.9983438327116256, + "grad_norm": 23480.755859375, + "learning_rate": 5.20391344625798e-06, + "loss": 0.4053, + "step": 193500 + }, + { + "epoch": 0.9986018026942385, + "grad_norm": 25235.927734375, + "learning_rate": 5.187698275645553e-06, + "loss": 0.3964, + "step": 193550 + }, + { + "epoch": 0.9988597726768513, + "grad_norm": 24883.29296875, + "learning_rate": 5.1715070249720555e-06, + "loss": 0.3978, + "step": 193600 + }, + { + "epoch": 0.9991177426594642, + "grad_norm": 25161.71484375, + "learning_rate": 5.155339702880052e-06, + "loss": 0.3998, + "step": 193650 + }, + { + "epoch": 0.9993757126420769, + "grad_norm": 21524.724609375, + "learning_rate": 5.13919631799934e-06, + "loss": 0.3955, + "step": 193700 + }, + { + "epoch": 0.9996336826246898, + "grad_norm": 23394.1015625, + "learning_rate": 5.123076878946981e-06, + "loss": 0.3962, + "step": 193750 + }, + { + "epoch": 0.9998916526073026, + "grad_norm": 24562.419921875, + "learning_rate": 5.106981394327165e-06, + "loss": 0.4, + "step": 193800 + }, + { + "epoch": 1.0001496225899154, + "grad_norm": 23818.201171875, + "learning_rate": 5.090909872731392e-06, + "loss": 0.4065, + "step": 193850 + }, + { + "epoch": 1.0004075925725282, + "grad_norm": 25973.83984375, + "learning_rate": 5.074862322738316e-06, + "loss": 0.4015, + "step": 193900 + }, + { + "epoch": 1.000665562555141, + "grad_norm": 26476.041015625, + "learning_rate": 5.0588387529138085e-06, + "loss": 0.401, + "step": 193950 + }, + { + "epoch": 1.000923532537754, + "grad_norm": 22776.267578125, + "learning_rate": 5.042839171810937e-06, + "loss": 0.4021, + "step": 194000 + }, + { + "epoch": 1.0011815025203668, + "grad_norm": 22484.884765625, + "learning_rate": 5.026863587969966e-06, + "loss": 0.4013, + "step": 194050 + }, + { + "epoch": 1.0014394725029796, + "grad_norm": 21445.009765625, + "learning_rate": 5.010912009918361e-06, + "loss": 0.4001, + "step": 194100 + }, + { + "epoch": 1.0016974424855924, + "grad_norm": 23748.365234375, + "learning_rate": 4.994984446170764e-06, + "loss": 0.3985, + "step": 194150 + }, + { + "epoch": 1.0019554124682053, + "grad_norm": 25007.73828125, + "learning_rate": 4.9790809052289996e-06, + "loss": 0.403, + "step": 194200 + }, + { + "epoch": 1.002213382450818, + "grad_norm": 26824.900390625, + "learning_rate": 4.963201395582062e-06, + "loss": 0.3966, + "step": 194250 + }, + { + "epoch": 1.0024713524334308, + "grad_norm": 21838.662109375, + "learning_rate": 4.947345925706148e-06, + "loss": 0.3955, + "step": 194300 + }, + { + "epoch": 1.0027293224160436, + "grad_norm": 20830.59375, + "learning_rate": 4.931514504064566e-06, + "loss": 0.3976, + "step": 194350 + }, + { + "epoch": 1.0029872923986565, + "grad_norm": 24187.484375, + "learning_rate": 4.915707139107856e-06, + "loss": 0.4009, + "step": 194400 + }, + { + "epoch": 1.0032452623812693, + "grad_norm": 23026.99609375, + "learning_rate": 4.899923839273662e-06, + "loss": 0.4017, + "step": 194450 + }, + { + "epoch": 1.0035032323638822, + "grad_norm": 25855.919921875, + "learning_rate": 4.884164612986808e-06, + "loss": 0.3966, + "step": 194500 + }, + { + "epoch": 1.003761202346495, + "grad_norm": 23424.58984375, + "learning_rate": 4.86842946865928e-06, + "loss": 0.4007, + "step": 194550 + }, + { + "epoch": 1.0040191723291079, + "grad_norm": 20644.318359375, + "learning_rate": 4.852718414690166e-06, + "loss": 0.405, + "step": 194600 + }, + { + "epoch": 1.0042771423117207, + "grad_norm": 24923.30078125, + "learning_rate": 4.8370314594657405e-06, + "loss": 0.3961, + "step": 194650 + }, + { + "epoch": 1.0045351122943333, + "grad_norm": 23334.19921875, + "learning_rate": 4.821368611359395e-06, + "loss": 0.3981, + "step": 194700 + }, + { + "epoch": 1.0047930822769462, + "grad_norm": 24258.54296875, + "learning_rate": 4.8057298787316516e-06, + "loss": 0.3998, + "step": 194750 + }, + { + "epoch": 1.005051052259559, + "grad_norm": 23366.234375, + "learning_rate": 4.790115269930162e-06, + "loss": 0.3998, + "step": 194800 + }, + { + "epoch": 1.005309022242172, + "grad_norm": 22389.498046875, + "learning_rate": 4.774524793289692e-06, + "loss": 0.4025, + "step": 194850 + }, + { + "epoch": 1.0055669922247847, + "grad_norm": 25497.361328125, + "learning_rate": 4.758958457132157e-06, + "loss": 0.3979, + "step": 194900 + }, + { + "epoch": 1.0058249622073976, + "grad_norm": 24179.626953125, + "learning_rate": 4.7434162697665595e-06, + "loss": 0.3984, + "step": 194950 + }, + { + "epoch": 1.0060829321900104, + "grad_norm": 24002.955078125, + "learning_rate": 4.727898239489015e-06, + "loss": 0.398, + "step": 195000 + }, + { + "epoch": 1.0060829321900104, + "eval_loss": 0.3868441879749298, + "eval_runtime": 3205.6792, + "eval_samples_per_second": 967.383, + "eval_steps_per_second": 1.889, + "step": 195000 + }, + { + "epoch": 1.0063409021726233, + "grad_norm": 26567.27734375, + "learning_rate": 4.712404374582741e-06, + "loss": 0.399, + "step": 195050 + }, + { + "epoch": 1.006598872155236, + "grad_norm": 25244.615234375, + "learning_rate": 4.696934683318077e-06, + "loss": 0.3998, + "step": 195100 + }, + { + "epoch": 1.0068568421378488, + "grad_norm": 23278.265625, + "learning_rate": 4.6814891739524195e-06, + "loss": 0.4002, + "step": 195150 + }, + { + "epoch": 1.0071148121204616, + "grad_norm": 23141.138671875, + "learning_rate": 4.666067854730322e-06, + "loss": 0.3965, + "step": 195200 + }, + { + "epoch": 1.0073727821030745, + "grad_norm": 23506.640625, + "learning_rate": 4.650670733883344e-06, + "loss": 0.3962, + "step": 195250 + }, + { + "epoch": 1.0076307520856873, + "grad_norm": 26591.212890625, + "learning_rate": 4.635297819630202e-06, + "loss": 0.3992, + "step": 195300 + }, + { + "epoch": 1.0078887220683002, + "grad_norm": 22111.640625, + "learning_rate": 4.619949120176642e-06, + "loss": 0.401, + "step": 195350 + }, + { + "epoch": 1.008146692050913, + "grad_norm": 25048.17578125, + "learning_rate": 4.604624643715505e-06, + "loss": 0.4016, + "step": 195400 + }, + { + "epoch": 1.0084046620335259, + "grad_norm": 23263.23828125, + "learning_rate": 4.589324398426714e-06, + "loss": 0.3942, + "step": 195450 + }, + { + "epoch": 1.0086626320161387, + "grad_norm": 23640.9296875, + "learning_rate": 4.57404839247722e-06, + "loss": 0.4039, + "step": 195500 + }, + { + "epoch": 1.0089206019987513, + "grad_norm": 25680.390625, + "learning_rate": 4.558796634021079e-06, + "loss": 0.3986, + "step": 195550 + }, + { + "epoch": 1.0091785719813642, + "grad_norm": 23321.78125, + "learning_rate": 4.543569131199382e-06, + "loss": 0.4039, + "step": 195600 + }, + { + "epoch": 1.009436541963977, + "grad_norm": 24123.205078125, + "learning_rate": 4.528365892140263e-06, + "loss": 0.397, + "step": 195650 + }, + { + "epoch": 1.0096945119465899, + "grad_norm": 23332.673828125, + "learning_rate": 4.513186924958928e-06, + "loss": 0.3941, + "step": 195700 + }, + { + "epoch": 1.0099524819292027, + "grad_norm": 25583.609375, + "learning_rate": 4.498032237757605e-06, + "loss": 0.4046, + "step": 195750 + }, + { + "epoch": 1.0102104519118156, + "grad_norm": 25230.3515625, + "learning_rate": 4.482901838625586e-06, + "loss": 0.4012, + "step": 195800 + }, + { + "epoch": 1.0104684218944284, + "grad_norm": 24376.5859375, + "learning_rate": 4.46779573563918e-06, + "loss": 0.3911, + "step": 195850 + }, + { + "epoch": 1.0107263918770413, + "grad_norm": 23978.17578125, + "learning_rate": 4.452713936861724e-06, + "loss": 0.4031, + "step": 195900 + }, + { + "epoch": 1.010984361859654, + "grad_norm": 23535.03515625, + "learning_rate": 4.437656450343602e-06, + "loss": 0.3933, + "step": 195950 + }, + { + "epoch": 1.0112423318422668, + "grad_norm": 24465.794921875, + "learning_rate": 4.422623284122207e-06, + "loss": 0.4027, + "step": 196000 + }, + { + "epoch": 1.0115003018248796, + "grad_norm": 23942.03125, + "learning_rate": 4.407614446221936e-06, + "loss": 0.4024, + "step": 196050 + }, + { + "epoch": 1.0117582718074924, + "grad_norm": 23610.720703125, + "learning_rate": 4.392629944654248e-06, + "loss": 0.3982, + "step": 196100 + }, + { + "epoch": 1.0120162417901053, + "grad_norm": 25937.53125, + "learning_rate": 4.3776697874175375e-06, + "loss": 0.3991, + "step": 196150 + }, + { + "epoch": 1.0122742117727181, + "grad_norm": 24008.5234375, + "learning_rate": 4.362733982497286e-06, + "loss": 0.3968, + "step": 196200 + }, + { + "epoch": 1.012532181755331, + "grad_norm": 23377.744140625, + "learning_rate": 4.347822537865914e-06, + "loss": 0.3958, + "step": 196250 + }, + { + "epoch": 1.0127901517379438, + "grad_norm": 23768.7421875, + "learning_rate": 4.332935461482862e-06, + "loss": 0.4004, + "step": 196300 + }, + { + "epoch": 1.0130481217205567, + "grad_norm": 25974.603515625, + "learning_rate": 4.3180727612945896e-06, + "loss": 0.4038, + "step": 196350 + }, + { + "epoch": 1.0133060917031693, + "grad_norm": 22376.34765625, + "learning_rate": 4.303234445234477e-06, + "loss": 0.3991, + "step": 196400 + }, + { + "epoch": 1.0135640616857822, + "grad_norm": 22145.03515625, + "learning_rate": 4.288420521222963e-06, + "loss": 0.3971, + "step": 196450 + }, + { + "epoch": 1.013822031668395, + "grad_norm": 21512.77734375, + "learning_rate": 4.273630997167422e-06, + "loss": 0.399, + "step": 196500 + }, + { + "epoch": 1.0140800016510079, + "grad_norm": 22957.626953125, + "learning_rate": 4.258865880962215e-06, + "loss": 0.3995, + "step": 196550 + }, + { + "epoch": 1.0143379716336207, + "grad_norm": 21951.89453125, + "learning_rate": 4.244125180488673e-06, + "loss": 0.3961, + "step": 196600 + }, + { + "epoch": 1.0145959416162336, + "grad_norm": 23440.005859375, + "learning_rate": 4.229408903615095e-06, + "loss": 0.4057, + "step": 196650 + }, + { + "epoch": 1.0148539115988464, + "grad_norm": 23987.21484375, + "learning_rate": 4.214717058196754e-06, + "loss": 0.3999, + "step": 196700 + }, + { + "epoch": 1.0151118815814593, + "grad_norm": 24526.482421875, + "learning_rate": 4.200049652075866e-06, + "loss": 0.3964, + "step": 196750 + }, + { + "epoch": 1.0153698515640721, + "grad_norm": 23351.193359375, + "learning_rate": 4.185406693081612e-06, + "loss": 0.3978, + "step": 196800 + }, + { + "epoch": 1.0156278215466847, + "grad_norm": 25014.873046875, + "learning_rate": 4.170788189030106e-06, + "loss": 0.3963, + "step": 196850 + }, + { + "epoch": 1.0158857915292976, + "grad_norm": 21085.181640625, + "learning_rate": 4.156194147724451e-06, + "loss": 0.4015, + "step": 196900 + }, + { + "epoch": 1.0161437615119104, + "grad_norm": 20203.427734375, + "learning_rate": 4.141624576954634e-06, + "loss": 0.4037, + "step": 196950 + }, + { + "epoch": 1.0164017314945233, + "grad_norm": 23869.416015625, + "learning_rate": 4.1270794844976255e-06, + "loss": 0.4038, + "step": 197000 + }, + { + "epoch": 1.0166597014771361, + "grad_norm": 24936.158203125, + "learning_rate": 4.112558878117318e-06, + "loss": 0.4073, + "step": 197050 + }, + { + "epoch": 1.016917671459749, + "grad_norm": 23021.921875, + "learning_rate": 4.098062765564509e-06, + "loss": 0.4056, + "step": 197100 + }, + { + "epoch": 1.0171756414423618, + "grad_norm": 21626.19921875, + "learning_rate": 4.083591154576971e-06, + "loss": 0.3989, + "step": 197150 + }, + { + "epoch": 1.0174336114249747, + "grad_norm": 25556.169921875, + "learning_rate": 4.069144052879342e-06, + "loss": 0.3975, + "step": 197200 + }, + { + "epoch": 1.0176915814075873, + "grad_norm": 23286.365234375, + "learning_rate": 4.054721468183226e-06, + "loss": 0.3974, + "step": 197250 + }, + { + "epoch": 1.0179495513902002, + "grad_norm": 24497.57421875, + "learning_rate": 4.040323408187113e-06, + "loss": 0.4028, + "step": 197300 + }, + { + "epoch": 1.018207521372813, + "grad_norm": 26279.40625, + "learning_rate": 4.025949880576407e-06, + "loss": 0.4034, + "step": 197350 + }, + { + "epoch": 1.0184654913554259, + "grad_norm": 22679.267578125, + "learning_rate": 4.011600893023421e-06, + "loss": 0.3991, + "step": 197400 + }, + { + "epoch": 1.0187234613380387, + "grad_norm": 25421.83984375, + "learning_rate": 3.997276453187365e-06, + "loss": 0.4023, + "step": 197450 + }, + { + "epoch": 1.0189814313206516, + "grad_norm": 25313.75, + "learning_rate": 3.982976568714336e-06, + "loss": 0.4018, + "step": 197500 + }, + { + "epoch": 1.0192394013032644, + "grad_norm": 24318.505859375, + "learning_rate": 3.96870124723736e-06, + "loss": 0.4027, + "step": 197550 + }, + { + "epoch": 1.0194973712858773, + "grad_norm": 22409.70703125, + "learning_rate": 3.9544504963763105e-06, + "loss": 0.3982, + "step": 197600 + }, + { + "epoch": 1.01975534126849, + "grad_norm": 25028.7265625, + "learning_rate": 3.9402243237379675e-06, + "loss": 0.4037, + "step": 197650 + }, + { + "epoch": 1.0200133112511027, + "grad_norm": 21235.19140625, + "learning_rate": 3.926022736915985e-06, + "loss": 0.3972, + "step": 197700 + }, + { + "epoch": 1.0202712812337156, + "grad_norm": 24214.41015625, + "learning_rate": 3.911845743490889e-06, + "loss": 0.3984, + "step": 197750 + }, + { + "epoch": 1.0205292512163284, + "grad_norm": 24445.375, + "learning_rate": 3.897693351030102e-06, + "loss": 0.4025, + "step": 197800 + }, + { + "epoch": 1.0207872211989413, + "grad_norm": 25233.3515625, + "learning_rate": 3.883565567087871e-06, + "loss": 0.3993, + "step": 197850 + }, + { + "epoch": 1.0210451911815541, + "grad_norm": 23982.43359375, + "learning_rate": 3.8694623992053534e-06, + "loss": 0.4023, + "step": 197900 + }, + { + "epoch": 1.021303161164167, + "grad_norm": 28533.689453125, + "learning_rate": 3.855383854910549e-06, + "loss": 0.3917, + "step": 197950 + }, + { + "epoch": 1.0215611311467798, + "grad_norm": 26334.77734375, + "learning_rate": 3.841329941718286e-06, + "loss": 0.3989, + "step": 198000 + }, + { + "epoch": 1.0218191011293927, + "grad_norm": 24765.802734375, + "learning_rate": 3.827300667130312e-06, + "loss": 0.398, + "step": 198050 + }, + { + "epoch": 1.0220770711120055, + "grad_norm": 25089.34765625, + "learning_rate": 3.8132960386351445e-06, + "loss": 0.4049, + "step": 198100 + }, + { + "epoch": 1.0223350410946181, + "grad_norm": 23840.72265625, + "learning_rate": 3.7993160637082027e-06, + "loss": 0.3998, + "step": 198150 + }, + { + "epoch": 1.022593011077231, + "grad_norm": 21590.1328125, + "learning_rate": 3.7853607498117282e-06, + "loss": 0.404, + "step": 198200 + }, + { + "epoch": 1.0228509810598438, + "grad_norm": 24620.478515625, + "learning_rate": 3.7714301043947855e-06, + "loss": 0.3958, + "step": 198250 + }, + { + "epoch": 1.0231089510424567, + "grad_norm": 22476.82421875, + "learning_rate": 3.757524134893292e-06, + "loss": 0.3993, + "step": 198300 + }, + { + "epoch": 1.0233669210250695, + "grad_norm": 22550.45703125, + "learning_rate": 3.7436428487299836e-06, + "loss": 0.3983, + "step": 198350 + }, + { + "epoch": 1.0236248910076824, + "grad_norm": 23764.958984375, + "learning_rate": 3.7297862533144045e-06, + "loss": 0.4005, + "step": 198400 + }, + { + "epoch": 1.0238828609902952, + "grad_norm": 23600.103515625, + "learning_rate": 3.7159543560429667e-06, + "loss": 0.3976, + "step": 198450 + }, + { + "epoch": 1.024140830972908, + "grad_norm": 24258.537109375, + "learning_rate": 3.7021471642988583e-06, + "loss": 0.4015, + "step": 198500 + }, + { + "epoch": 1.0243988009555207, + "grad_norm": 22559.609375, + "learning_rate": 3.6883646854520837e-06, + "loss": 0.4028, + "step": 198550 + }, + { + "epoch": 1.0246567709381336, + "grad_norm": 20827.234375, + "learning_rate": 3.67460692685947e-06, + "loss": 0.3954, + "step": 198600 + }, + { + "epoch": 1.0249147409207464, + "grad_norm": 24864.171875, + "learning_rate": 3.6608738958646303e-06, + "loss": 0.3919, + "step": 198650 + }, + { + "epoch": 1.0251727109033593, + "grad_norm": 25603.6796875, + "learning_rate": 3.647165599798019e-06, + "loss": 0.3984, + "step": 198700 + }, + { + "epoch": 1.0254306808859721, + "grad_norm": 21448.0234375, + "learning_rate": 3.6334820459768217e-06, + "loss": 0.4031, + "step": 198750 + }, + { + "epoch": 1.025688650868585, + "grad_norm": 24923.51953125, + "learning_rate": 3.6198232417050782e-06, + "loss": 0.4023, + "step": 198800 + }, + { + "epoch": 1.0259466208511978, + "grad_norm": 21672.09765625, + "learning_rate": 3.6061891942735957e-06, + "loss": 0.4027, + "step": 198850 + }, + { + "epoch": 1.0262045908338107, + "grad_norm": 24733.31640625, + "learning_rate": 3.5925799109599423e-06, + "loss": 0.401, + "step": 198900 + }, + { + "epoch": 1.0264625608164235, + "grad_norm": 25941.05859375, + "learning_rate": 3.5789953990285284e-06, + "loss": 0.3944, + "step": 198950 + }, + { + "epoch": 1.0267205307990361, + "grad_norm": 25462.96875, + "learning_rate": 3.56543566573046e-06, + "loss": 0.4021, + "step": 199000 + }, + { + "epoch": 1.026978500781649, + "grad_norm": 24243.462890625, + "learning_rate": 3.5519007183036856e-06, + "loss": 0.4009, + "step": 199050 + }, + { + "epoch": 1.0272364707642618, + "grad_norm": 22507.208984375, + "learning_rate": 3.5383905639728987e-06, + "loss": 0.3968, + "step": 199100 + }, + { + "epoch": 1.0274944407468747, + "grad_norm": 22496.060546875, + "learning_rate": 3.524905209949553e-06, + "loss": 0.3988, + "step": 199150 + }, + { + "epoch": 1.0277524107294875, + "grad_norm": 22755.974609375, + "learning_rate": 3.511444663431862e-06, + "loss": 0.3944, + "step": 199200 + }, + { + "epoch": 1.0280103807121004, + "grad_norm": 24945.93359375, + "learning_rate": 3.498008931604818e-06, + "loss": 0.4015, + "step": 199250 + }, + { + "epoch": 1.0282683506947132, + "grad_norm": 23216.15625, + "learning_rate": 3.484598021640134e-06, + "loss": 0.3982, + "step": 199300 + }, + { + "epoch": 1.028526320677326, + "grad_norm": 24690.8203125, + "learning_rate": 3.4712119406963174e-06, + "loss": 0.4, + "step": 199350 + }, + { + "epoch": 1.0287842906599387, + "grad_norm": 23324.27734375, + "learning_rate": 3.4578506959185907e-06, + "loss": 0.4005, + "step": 199400 + }, + { + "epoch": 1.0290422606425516, + "grad_norm": 22831.544921875, + "learning_rate": 3.444514294438922e-06, + "loss": 0.3987, + "step": 199450 + }, + { + "epoch": 1.0293002306251644, + "grad_norm": 22126.681640625, + "learning_rate": 3.4312027433760383e-06, + "loss": 0.4044, + "step": 199500 + }, + { + "epoch": 1.0295582006077773, + "grad_norm": 22105.94140625, + "learning_rate": 3.417916049835368e-06, + "loss": 0.4023, + "step": 199550 + }, + { + "epoch": 1.02981617059039, + "grad_norm": 24164.646484375, + "learning_rate": 3.4046542209091037e-06, + "loss": 0.3968, + "step": 199600 + }, + { + "epoch": 1.030074140573003, + "grad_norm": 23752.33203125, + "learning_rate": 3.3914172636761554e-06, + "loss": 0.3974, + "step": 199650 + }, + { + "epoch": 1.0303321105556158, + "grad_norm": 21793.787109375, + "learning_rate": 3.3782051852021433e-06, + "loss": 0.3981, + "step": 199700 + }, + { + "epoch": 1.0305900805382286, + "grad_norm": 26727.91796875, + "learning_rate": 3.365017992539432e-06, + "loss": 0.4025, + "step": 199750 + }, + { + "epoch": 1.0308480505208415, + "grad_norm": 21089.958984375, + "learning_rate": 3.3518556927270683e-06, + "loss": 0.4001, + "step": 199800 + }, + { + "epoch": 1.0311060205034541, + "grad_norm": 23690.0390625, + "learning_rate": 3.33871829279086e-06, + "loss": 0.3956, + "step": 199850 + }, + { + "epoch": 1.031363990486067, + "grad_norm": 24266.84375, + "learning_rate": 3.325605799743281e-06, + "loss": 0.3966, + "step": 199900 + }, + { + "epoch": 1.0316219604686798, + "grad_norm": 22199.455078125, + "learning_rate": 3.312518220583527e-06, + "loss": 0.4058, + "step": 199950 + }, + { + "epoch": 1.0318799304512927, + "grad_norm": 21272.033203125, + "learning_rate": 3.299455562297504e-06, + "loss": 0.3969, + "step": 200000 + }, + { + "epoch": 1.0318799304512927, + "eval_loss": 0.38684460520744324, + "eval_runtime": 3230.0057, + "eval_samples_per_second": 960.097, + "eval_steps_per_second": 1.875, + "step": 200000 + }, + { + "epoch": 1.0321379004339055, + "grad_norm": 23089.7578125, + "learning_rate": 3.286417831857791e-06, + "loss": 0.4011, + "step": 200050 + }, + { + "epoch": 1.0323958704165184, + "grad_norm": 27875.5859375, + "learning_rate": 3.2734050362236814e-06, + "loss": 0.4014, + "step": 200100 + }, + { + "epoch": 1.0326538403991312, + "grad_norm": 22023.40234375, + "learning_rate": 3.260417182341169e-06, + "loss": 0.398, + "step": 200150 + }, + { + "epoch": 1.032911810381744, + "grad_norm": 23899.208984375, + "learning_rate": 3.247454277142892e-06, + "loss": 0.3976, + "step": 200200 + }, + { + "epoch": 1.0331697803643567, + "grad_norm": 22874.44921875, + "learning_rate": 3.2345163275482147e-06, + "loss": 0.4014, + "step": 200250 + }, + { + "epoch": 1.0334277503469695, + "grad_norm": 21650.296875, + "learning_rate": 3.221603340463164e-06, + "loss": 0.4012, + "step": 200300 + }, + { + "epoch": 1.0336857203295824, + "grad_norm": 24189.89453125, + "learning_rate": 3.2087153227804314e-06, + "loss": 0.401, + "step": 200350 + }, + { + "epoch": 1.0339436903121952, + "grad_norm": 21525.12109375, + "learning_rate": 3.1958522813794134e-06, + "loss": 0.4016, + "step": 200400 + }, + { + "epoch": 1.034201660294808, + "grad_norm": 23732.640625, + "learning_rate": 3.1830142231261294e-06, + "loss": 0.4021, + "step": 200450 + }, + { + "epoch": 1.034459630277421, + "grad_norm": 24911.607421875, + "learning_rate": 3.170201154873298e-06, + "loss": 0.3943, + "step": 200500 + }, + { + "epoch": 1.0347176002600338, + "grad_norm": 25295.861328125, + "learning_rate": 3.1574130834602813e-06, + "loss": 0.401, + "step": 200550 + }, + { + "epoch": 1.0349755702426466, + "grad_norm": 23536.498046875, + "learning_rate": 3.1446500157131075e-06, + "loss": 0.3964, + "step": 200600 + }, + { + "epoch": 1.0352335402252595, + "grad_norm": 26484.287109375, + "learning_rate": 3.131911958444461e-06, + "loss": 0.4068, + "step": 200650 + }, + { + "epoch": 1.0354915102078721, + "grad_norm": 24330.001953125, + "learning_rate": 3.1191989184536474e-06, + "loss": 0.3911, + "step": 200700 + }, + { + "epoch": 1.035749480190485, + "grad_norm": 21095.994140625, + "learning_rate": 3.1065109025266713e-06, + "loss": 0.4, + "step": 200750 + }, + { + "epoch": 1.0360074501730978, + "grad_norm": 21829.64453125, + "learning_rate": 3.093847917436132e-06, + "loss": 0.4016, + "step": 200800 + }, + { + "epoch": 1.0362654201557107, + "grad_norm": 25772.79296875, + "learning_rate": 3.0812099699412953e-06, + "loss": 0.4032, + "step": 200850 + }, + { + "epoch": 1.0365233901383235, + "grad_norm": 25614.240234375, + "learning_rate": 3.0685970667880425e-06, + "loss": 0.3976, + "step": 200900 + }, + { + "epoch": 1.0367813601209364, + "grad_norm": 26170.455078125, + "learning_rate": 3.056009214708905e-06, + "loss": 0.4001, + "step": 200950 + }, + { + "epoch": 1.0370393301035492, + "grad_norm": 24801.76171875, + "learning_rate": 3.0434464204230186e-06, + "loss": 0.3924, + "step": 201000 + }, + { + "epoch": 1.037297300086162, + "grad_norm": 28940.640625, + "learning_rate": 3.0309086906361917e-06, + "loss": 0.3998, + "step": 201050 + }, + { + "epoch": 1.037555270068775, + "grad_norm": 23856.90625, + "learning_rate": 3.018396032040788e-06, + "loss": 0.397, + "step": 201100 + }, + { + "epoch": 1.0378132400513875, + "grad_norm": 23309.861328125, + "learning_rate": 3.005908451315842e-06, + "loss": 0.4026, + "step": 201150 + }, + { + "epoch": 1.0380712100340004, + "grad_norm": 23592.7265625, + "learning_rate": 2.993445955126978e-06, + "loss": 0.3971, + "step": 201200 + }, + { + "epoch": 1.0383291800166132, + "grad_norm": 23301.861328125, + "learning_rate": 2.9810085501264296e-06, + "loss": 0.403, + "step": 201250 + }, + { + "epoch": 1.038587149999226, + "grad_norm": 23200.0859375, + "learning_rate": 2.968596242953059e-06, + "loss": 0.4001, + "step": 201300 + }, + { + "epoch": 1.038845119981839, + "grad_norm": 26894.70703125, + "learning_rate": 2.956209040232294e-06, + "loss": 0.3988, + "step": 201350 + }, + { + "epoch": 1.0391030899644518, + "grad_norm": 22423.931640625, + "learning_rate": 2.9438469485761956e-06, + "loss": 0.3981, + "step": 201400 + }, + { + "epoch": 1.0393610599470646, + "grad_norm": 24167.068359375, + "learning_rate": 2.9315099745834073e-06, + "loss": 0.4024, + "step": 201450 + }, + { + "epoch": 1.0396190299296775, + "grad_norm": 25832.712890625, + "learning_rate": 2.9191981248391677e-06, + "loss": 0.3937, + "step": 201500 + }, + { + "epoch": 1.03987699991229, + "grad_norm": 26923.005859375, + "learning_rate": 2.9069114059153024e-06, + "loss": 0.3922, + "step": 201550 + }, + { + "epoch": 1.040134969894903, + "grad_norm": 23295.380859375, + "learning_rate": 2.8946498243702158e-06, + "loss": 0.4011, + "step": 201600 + }, + { + "epoch": 1.0403929398775158, + "grad_norm": 23378.5234375, + "learning_rate": 2.882413386748922e-06, + "loss": 0.4033, + "step": 201650 + }, + { + "epoch": 1.0406509098601286, + "grad_norm": 24349.9140625, + "learning_rate": 2.8702020995829803e-06, + "loss": 0.3964, + "step": 201700 + }, + { + "epoch": 1.0409088798427415, + "grad_norm": 24178.61328125, + "learning_rate": 2.8580159693905485e-06, + "loss": 0.3978, + "step": 201750 + }, + { + "epoch": 1.0411668498253543, + "grad_norm": 24998.189453125, + "learning_rate": 2.8458550026763344e-06, + "loss": 0.3943, + "step": 201800 + }, + { + "epoch": 1.0414248198079672, + "grad_norm": 28928.828125, + "learning_rate": 2.8337192059316344e-06, + "loss": 0.3998, + "step": 201850 + }, + { + "epoch": 1.04168278979058, + "grad_norm": 24329.37890625, + "learning_rate": 2.8216085856342946e-06, + "loss": 0.3976, + "step": 201900 + }, + { + "epoch": 1.041940759773193, + "grad_norm": 24121.482421875, + "learning_rate": 2.809523148248744e-06, + "loss": 0.3952, + "step": 201950 + }, + { + "epoch": 1.0421987297558055, + "grad_norm": 23812.671875, + "learning_rate": 2.7974629002259443e-06, + "loss": 0.4052, + "step": 202000 + }, + { + "epoch": 1.0424566997384184, + "grad_norm": 25162.40234375, + "learning_rate": 2.785427848003419e-06, + "loss": 0.3948, + "step": 202050 + }, + { + "epoch": 1.0427146697210312, + "grad_norm": 23631.462890625, + "learning_rate": 2.773417998005262e-06, + "loss": 0.3982, + "step": 202100 + }, + { + "epoch": 1.042972639703644, + "grad_norm": 24178.177734375, + "learning_rate": 2.761433356642079e-06, + "loss": 0.4012, + "step": 202150 + }, + { + "epoch": 1.043230609686257, + "grad_norm": 24726.37890625, + "learning_rate": 2.7494739303110527e-06, + "loss": 0.3926, + "step": 202200 + }, + { + "epoch": 1.0434885796688698, + "grad_norm": 23798.73828125, + "learning_rate": 2.7375397253958935e-06, + "loss": 0.3998, + "step": 202250 + }, + { + "epoch": 1.0437465496514826, + "grad_norm": 25162.677734375, + "learning_rate": 2.725630748266844e-06, + "loss": 0.4038, + "step": 202300 + }, + { + "epoch": 1.0440045196340955, + "grad_norm": 28668.78515625, + "learning_rate": 2.7137470052806814e-06, + "loss": 0.3989, + "step": 202350 + }, + { + "epoch": 1.0442624896167083, + "grad_norm": 22550.810546875, + "learning_rate": 2.7018885027807195e-06, + "loss": 0.3994, + "step": 202400 + }, + { + "epoch": 1.044520459599321, + "grad_norm": 26758.71484375, + "learning_rate": 2.6900552470968064e-06, + "loss": 0.4063, + "step": 202450 + }, + { + "epoch": 1.0447784295819338, + "grad_norm": 24895.77734375, + "learning_rate": 2.678247244545301e-06, + "loss": 0.3968, + "step": 202500 + }, + { + "epoch": 1.0450363995645466, + "grad_norm": 22442.416015625, + "learning_rate": 2.6664645014290833e-06, + "loss": 0.4009, + "step": 202550 + }, + { + "epoch": 1.0452943695471595, + "grad_norm": 24647.232421875, + "learning_rate": 2.654707024037556e-06, + "loss": 0.3984, + "step": 202600 + }, + { + "epoch": 1.0455523395297723, + "grad_norm": 24156.189453125, + "learning_rate": 2.6429748186466265e-06, + "loss": 0.3983, + "step": 202650 + }, + { + "epoch": 1.0458103095123852, + "grad_norm": 24131.658203125, + "learning_rate": 2.6312678915187185e-06, + "loss": 0.3941, + "step": 202700 + }, + { + "epoch": 1.046068279494998, + "grad_norm": 24890.5625, + "learning_rate": 2.6195862489027833e-06, + "loss": 0.3936, + "step": 202750 + }, + { + "epoch": 1.0463262494776109, + "grad_norm": 26486.58203125, + "learning_rate": 2.607929897034228e-06, + "loss": 0.4073, + "step": 202800 + }, + { + "epoch": 1.0465842194602235, + "grad_norm": 24554.09375, + "learning_rate": 2.5962988421350033e-06, + "loss": 0.3985, + "step": 202850 + }, + { + "epoch": 1.0468421894428364, + "grad_norm": 24964.349609375, + "learning_rate": 2.584693090413537e-06, + "loss": 0.3974, + "step": 202900 + }, + { + "epoch": 1.0471001594254492, + "grad_norm": 21256.87890625, + "learning_rate": 2.5731126480647516e-06, + "loss": 0.3969, + "step": 202950 + }, + { + "epoch": 1.047358129408062, + "grad_norm": 23721.197265625, + "learning_rate": 2.5615575212700804e-06, + "loss": 0.4039, + "step": 203000 + }, + { + "epoch": 1.047616099390675, + "grad_norm": 25096.4609375, + "learning_rate": 2.550027716197395e-06, + "loss": 0.3953, + "step": 203050 + }, + { + "epoch": 1.0478740693732878, + "grad_norm": 22199.11328125, + "learning_rate": 2.5385232390011114e-06, + "loss": 0.3979, + "step": 203100 + }, + { + "epoch": 1.0481320393559006, + "grad_norm": 24967.4609375, + "learning_rate": 2.527044095822084e-06, + "loss": 0.4023, + "step": 203150 + }, + { + "epoch": 1.0483900093385135, + "grad_norm": 28301.302734375, + "learning_rate": 2.5155902927876564e-06, + "loss": 0.4047, + "step": 203200 + }, + { + "epoch": 1.0486479793211263, + "grad_norm": 22268.037109375, + "learning_rate": 2.504161836011648e-06, + "loss": 0.4032, + "step": 203250 + }, + { + "epoch": 1.048905949303739, + "grad_norm": 28254.658203125, + "learning_rate": 2.4927587315943414e-06, + "loss": 0.3915, + "step": 203300 + }, + { + "epoch": 1.0491639192863518, + "grad_norm": 24471.462890625, + "learning_rate": 2.4813809856225112e-06, + "loss": 0.3986, + "step": 203350 + }, + { + "epoch": 1.0494218892689646, + "grad_norm": 24208.7578125, + "learning_rate": 2.470028604169361e-06, + "loss": 0.3969, + "step": 203400 + }, + { + "epoch": 1.0496798592515775, + "grad_norm": 23962.025390625, + "learning_rate": 2.4587015932945824e-06, + "loss": 0.3992, + "step": 203450 + }, + { + "epoch": 1.0499378292341903, + "grad_norm": 24777.421875, + "learning_rate": 2.4473999590443054e-06, + "loss": 0.4042, + "step": 203500 + }, + { + "epoch": 1.0501957992168032, + "grad_norm": 26705.40234375, + "learning_rate": 2.4361237074511323e-06, + "loss": 0.3985, + "step": 203550 + }, + { + "epoch": 1.050453769199416, + "grad_norm": 22508.51171875, + "learning_rate": 2.424872844534093e-06, + "loss": 0.3967, + "step": 203600 + }, + { + "epoch": 1.0507117391820289, + "grad_norm": 24678.62109375, + "learning_rate": 2.4136473762987057e-06, + "loss": 0.4002, + "step": 203650 + }, + { + "epoch": 1.0509697091646415, + "grad_norm": 24190.259765625, + "learning_rate": 2.402447308736883e-06, + "loss": 0.4002, + "step": 203700 + }, + { + "epoch": 1.0512276791472543, + "grad_norm": 27986.912109375, + "learning_rate": 2.391272647827014e-06, + "loss": 0.406, + "step": 203750 + }, + { + "epoch": 1.0514856491298672, + "grad_norm": 23664.740234375, + "learning_rate": 2.3801233995339236e-06, + "loss": 0.3988, + "step": 203800 + }, + { + "epoch": 1.05174361911248, + "grad_norm": 32503.17578125, + "learning_rate": 2.368999569808844e-06, + "loss": 0.3996, + "step": 203850 + }, + { + "epoch": 1.052001589095093, + "grad_norm": 24140.591796875, + "learning_rate": 2.3579011645894933e-06, + "loss": 0.4021, + "step": 203900 + }, + { + "epoch": 1.0522595590777057, + "grad_norm": 24920.033203125, + "learning_rate": 2.3468281897999487e-06, + "loss": 0.4038, + "step": 203950 + }, + { + "epoch": 1.0525175290603186, + "grad_norm": 20836.1796875, + "learning_rate": 2.335780651350772e-06, + "loss": 0.3929, + "step": 204000 + }, + { + "epoch": 1.0527754990429314, + "grad_norm": 22305.021484375, + "learning_rate": 2.324758555138923e-06, + "loss": 0.3963, + "step": 204050 + }, + { + "epoch": 1.0530334690255443, + "grad_norm": 22536.13671875, + "learning_rate": 2.3137619070477788e-06, + "loss": 0.3923, + "step": 204100 + }, + { + "epoch": 1.053291439008157, + "grad_norm": 23319.326171875, + "learning_rate": 2.3027907129471395e-06, + "loss": 0.4034, + "step": 204150 + }, + { + "epoch": 1.0535494089907698, + "grad_norm": 25774.677734375, + "learning_rate": 2.2918449786932085e-06, + "loss": 0.4015, + "step": 204200 + }, + { + "epoch": 1.0538073789733826, + "grad_norm": 23130.119140625, + "learning_rate": 2.280924710128618e-06, + "loss": 0.3971, + "step": 204250 + }, + { + "epoch": 1.0540653489559955, + "grad_norm": 23122.1875, + "learning_rate": 2.270029913082394e-06, + "loss": 0.3969, + "step": 204300 + }, + { + "epoch": 1.0543233189386083, + "grad_norm": 21518.763671875, + "learning_rate": 2.2591605933699632e-06, + "loss": 0.3992, + "step": 204350 + }, + { + "epoch": 1.0545812889212212, + "grad_norm": 25077.322265625, + "learning_rate": 2.248316756793156e-06, + "loss": 0.405, + "step": 204400 + }, + { + "epoch": 1.054839258903834, + "grad_norm": 23907.869140625, + "learning_rate": 2.237498409140215e-06, + "loss": 0.4009, + "step": 204450 + }, + { + "epoch": 1.0550972288864469, + "grad_norm": 22796.865234375, + "learning_rate": 2.2267055561857484e-06, + "loss": 0.4044, + "step": 204500 + }, + { + "epoch": 1.0553551988690595, + "grad_norm": 33471.05859375, + "learning_rate": 2.2159382036907927e-06, + "loss": 0.4021, + "step": 204550 + }, + { + "epoch": 1.0556131688516723, + "grad_norm": 23975.6640625, + "learning_rate": 2.2051963574027225e-06, + "loss": 0.3922, + "step": 204600 + }, + { + "epoch": 1.0558711388342852, + "grad_norm": 24563.220703125, + "learning_rate": 2.194480023055351e-06, + "loss": 0.3952, + "step": 204650 + }, + { + "epoch": 1.056129108816898, + "grad_norm": 24479.20703125, + "learning_rate": 2.1837892063688525e-06, + "loss": 0.4005, + "step": 204700 + }, + { + "epoch": 1.0563870787995109, + "grad_norm": 24895.6640625, + "learning_rate": 2.173123913049757e-06, + "loss": 0.3985, + "step": 204750 + }, + { + "epoch": 1.0566450487821237, + "grad_norm": 25606.34765625, + "learning_rate": 2.1624841487910052e-06, + "loss": 0.4019, + "step": 204800 + }, + { + "epoch": 1.0569030187647366, + "grad_norm": 23026.8828125, + "learning_rate": 2.151869919271904e-06, + "loss": 0.4023, + "step": 204850 + }, + { + "epoch": 1.0571609887473494, + "grad_norm": 24365.9609375, + "learning_rate": 2.1412812301581097e-06, + "loss": 0.3992, + "step": 204900 + }, + { + "epoch": 1.0574189587299623, + "grad_norm": 25374.990234375, + "learning_rate": 2.130718087101663e-06, + "loss": 0.4009, + "step": 204950 + }, + { + "epoch": 1.057676928712575, + "grad_norm": 23697.388671875, + "learning_rate": 2.1201804957409697e-06, + "loss": 0.4042, + "step": 205000 + }, + { + "epoch": 1.057676928712575, + "eval_loss": 0.386392205953598, + "eval_runtime": 3213.2768, + "eval_samples_per_second": 965.096, + "eval_steps_per_second": 1.885, + "step": 205000 + }, + { + "epoch": 1.0579348986951878, + "grad_norm": 23768.669921875, + "learning_rate": 2.109668461700781e-06, + "loss": 0.4058, + "step": 205050 + }, + { + "epoch": 1.0581928686778006, + "grad_norm": 24203.693359375, + "learning_rate": 2.099181990592236e-06, + "loss": 0.3971, + "step": 205100 + }, + { + "epoch": 1.0584508386604135, + "grad_norm": 26739.72265625, + "learning_rate": 2.088721088012796e-06, + "loss": 0.4008, + "step": 205150 + }, + { + "epoch": 1.0587088086430263, + "grad_norm": 25664.5, + "learning_rate": 2.078285759546289e-06, + "loss": 0.4001, + "step": 205200 + }, + { + "epoch": 1.0589667786256391, + "grad_norm": 22887.986328125, + "learning_rate": 2.067876010762898e-06, + "loss": 0.3946, + "step": 205250 + }, + { + "epoch": 1.059224748608252, + "grad_norm": 24908.890625, + "learning_rate": 2.057491847219134e-06, + "loss": 0.3997, + "step": 205300 + }, + { + "epoch": 1.0594827185908648, + "grad_norm": 26352.986328125, + "learning_rate": 2.0471332744578853e-06, + "loss": 0.4022, + "step": 205350 + }, + { + "epoch": 1.0597406885734777, + "grad_norm": 23545.6640625, + "learning_rate": 2.0368002980083235e-06, + "loss": 0.3976, + "step": 205400 + }, + { + "epoch": 1.0599986585560903, + "grad_norm": 24206.896484375, + "learning_rate": 2.02649292338602e-06, + "loss": 0.3934, + "step": 205450 + }, + { + "epoch": 1.0602566285387032, + "grad_norm": 22331.580078125, + "learning_rate": 2.0162111560928345e-06, + "loss": 0.3969, + "step": 205500 + }, + { + "epoch": 1.060514598521316, + "grad_norm": 24358.099609375, + "learning_rate": 2.0059550016169827e-06, + "loss": 0.3934, + "step": 205550 + }, + { + "epoch": 1.0607725685039289, + "grad_norm": 23970.693359375, + "learning_rate": 1.9957244654330133e-06, + "loss": 0.4012, + "step": 205600 + }, + { + "epoch": 1.0610305384865417, + "grad_norm": 23980.03125, + "learning_rate": 1.985519553001758e-06, + "loss": 0.3979, + "step": 205650 + }, + { + "epoch": 1.0612885084691546, + "grad_norm": 25418.708984375, + "learning_rate": 1.9753402697704313e-06, + "loss": 0.3988, + "step": 205700 + }, + { + "epoch": 1.0615464784517674, + "grad_norm": 22902.38671875, + "learning_rate": 1.965186621172521e-06, + "loss": 0.393, + "step": 205750 + }, + { + "epoch": 1.0618044484343803, + "grad_norm": 24547.4375, + "learning_rate": 1.9550586126278525e-06, + "loss": 0.4, + "step": 205800 + }, + { + "epoch": 1.062062418416993, + "grad_norm": 24038.619140625, + "learning_rate": 1.9449562495425623e-06, + "loss": 0.3995, + "step": 205850 + }, + { + "epoch": 1.0623203883996057, + "grad_norm": 22873.3515625, + "learning_rate": 1.9348795373090977e-06, + "loss": 0.4028, + "step": 205900 + }, + { + "epoch": 1.0625783583822186, + "grad_norm": 22140.7890625, + "learning_rate": 1.9248284813061957e-06, + "loss": 0.4036, + "step": 205950 + }, + { + "epoch": 1.0628363283648314, + "grad_norm": 23617.9140625, + "learning_rate": 1.914803086898942e-06, + "loss": 0.4005, + "step": 206000 + }, + { + "epoch": 1.0630942983474443, + "grad_norm": 22808.267578125, + "learning_rate": 1.9048033594386838e-06, + "loss": 0.3989, + "step": 206050 + }, + { + "epoch": 1.0633522683300571, + "grad_norm": 23189.298828125, + "learning_rate": 1.8948293042630794e-06, + "loss": 0.3982, + "step": 206100 + }, + { + "epoch": 1.06361023831267, + "grad_norm": 23994.052734375, + "learning_rate": 1.884880926696092e-06, + "loss": 0.4023, + "step": 206150 + }, + { + "epoch": 1.0638682082952828, + "grad_norm": 25587.49609375, + "learning_rate": 1.8749582320479687e-06, + "loss": 0.4056, + "step": 206200 + }, + { + "epoch": 1.0641261782778957, + "grad_norm": 22929.3984375, + "learning_rate": 1.865061225615261e-06, + "loss": 0.3987, + "step": 206250 + }, + { + "epoch": 1.0643841482605083, + "grad_norm": 24747.65234375, + "learning_rate": 1.8551899126807825e-06, + "loss": 0.3959, + "step": 206300 + }, + { + "epoch": 1.0646421182431212, + "grad_norm": 24856.77734375, + "learning_rate": 1.8453442985136682e-06, + "loss": 0.3989, + "step": 206350 + }, + { + "epoch": 1.064900088225734, + "grad_norm": 28136.97265625, + "learning_rate": 1.835524388369303e-06, + "loss": 0.396, + "step": 206400 + }, + { + "epoch": 1.0651580582083469, + "grad_norm": 25035.076171875, + "learning_rate": 1.8257301874893607e-06, + "loss": 0.4, + "step": 206450 + }, + { + "epoch": 1.0654160281909597, + "grad_norm": 23690.525390625, + "learning_rate": 1.8159617011018205e-06, + "loss": 0.3982, + "step": 206500 + }, + { + "epoch": 1.0656739981735726, + "grad_norm": 25419.333984375, + "learning_rate": 1.8062189344208835e-06, + "loss": 0.401, + "step": 206550 + }, + { + "epoch": 1.0659319681561854, + "grad_norm": 24045.5703125, + "learning_rate": 1.7965018926470622e-06, + "loss": 0.3974, + "step": 206600 + }, + { + "epoch": 1.0661899381387983, + "grad_norm": 23030.5625, + "learning_rate": 1.7868105809671298e-06, + "loss": 0.4049, + "step": 206650 + }, + { + "epoch": 1.066447908121411, + "grad_norm": 26036.546875, + "learning_rate": 1.7771450045541149e-06, + "loss": 0.3948, + "step": 206700 + }, + { + "epoch": 1.0667058781040237, + "grad_norm": 25025.001953125, + "learning_rate": 1.7675051685673127e-06, + "loss": 0.404, + "step": 206750 + }, + { + "epoch": 1.0669638480866366, + "grad_norm": 24296.775390625, + "learning_rate": 1.757891078152285e-06, + "loss": 0.3953, + "step": 206800 + }, + { + "epoch": 1.0672218180692494, + "grad_norm": 22506.177734375, + "learning_rate": 1.748302738440838e-06, + "loss": 0.4, + "step": 206850 + }, + { + "epoch": 1.0674797880518623, + "grad_norm": 26741.724609375, + "learning_rate": 1.738740154551055e-06, + "loss": 0.3971, + "step": 206900 + }, + { + "epoch": 1.0677377580344751, + "grad_norm": 24435.2421875, + "learning_rate": 1.7292033315872592e-06, + "loss": 0.3955, + "step": 206950 + }, + { + "epoch": 1.067995728017088, + "grad_norm": 21010.521484375, + "learning_rate": 1.7196922746400058e-06, + "loss": 0.3997, + "step": 207000 + }, + { + "epoch": 1.0682536979997008, + "grad_norm": 21215.384765625, + "learning_rate": 1.710206988786134e-06, + "loss": 0.3971, + "step": 207050 + }, + { + "epoch": 1.0685116679823137, + "grad_norm": 23713.73046875, + "learning_rate": 1.7007474790886823e-06, + "loss": 0.4022, + "step": 207100 + }, + { + "epoch": 1.0687696379649263, + "grad_norm": 26804.658203125, + "learning_rate": 1.691313750596979e-06, + "loss": 0.4028, + "step": 207150 + }, + { + "epoch": 1.0690276079475391, + "grad_norm": 24873.318359375, + "learning_rate": 1.68190580834654e-06, + "loss": 0.3995, + "step": 207200 + }, + { + "epoch": 1.069285577930152, + "grad_norm": 23567.91796875, + "learning_rate": 1.6725236573591596e-06, + "loss": 0.3988, + "step": 207250 + }, + { + "epoch": 1.0695435479127648, + "grad_norm": 23659.513671875, + "learning_rate": 1.6631673026428484e-06, + "loss": 0.4036, + "step": 207300 + }, + { + "epoch": 1.0698015178953777, + "grad_norm": 22014.623046875, + "learning_rate": 1.6538367491918339e-06, + "loss": 0.4003, + "step": 207350 + }, + { + "epoch": 1.0700594878779905, + "grad_norm": 26985.240234375, + "learning_rate": 1.6445320019865984e-06, + "loss": 0.3949, + "step": 207400 + }, + { + "epoch": 1.0703174578606034, + "grad_norm": 25032.328125, + "learning_rate": 1.635253065993836e-06, + "loss": 0.4072, + "step": 207450 + }, + { + "epoch": 1.0705754278432162, + "grad_norm": 23999.62890625, + "learning_rate": 1.6259999461664566e-06, + "loss": 0.4018, + "step": 207500 + }, + { + "epoch": 1.070833397825829, + "grad_norm": 24842.439453125, + "learning_rate": 1.616772647443593e-06, + "loss": 0.3992, + "step": 207550 + }, + { + "epoch": 1.0710913678084417, + "grad_norm": 26740.083984375, + "learning_rate": 1.6075711747506106e-06, + "loss": 0.3954, + "step": 207600 + }, + { + "epoch": 1.0713493377910546, + "grad_norm": 25067.95703125, + "learning_rate": 1.598395532999064e-06, + "loss": 0.4008, + "step": 207650 + }, + { + "epoch": 1.0716073077736674, + "grad_norm": 22218.814453125, + "learning_rate": 1.5892457270867467e-06, + "loss": 0.4005, + "step": 207700 + }, + { + "epoch": 1.0718652777562803, + "grad_norm": 25727.36328125, + "learning_rate": 1.5801217618976294e-06, + "loss": 0.402, + "step": 207750 + }, + { + "epoch": 1.0721232477388931, + "grad_norm": 24692.19921875, + "learning_rate": 1.5710236423019275e-06, + "loss": 0.4035, + "step": 207800 + }, + { + "epoch": 1.072381217721506, + "grad_norm": 25514.009765625, + "learning_rate": 1.5619513731560342e-06, + "loss": 0.3964, + "step": 207850 + }, + { + "epoch": 1.0726391877041188, + "grad_norm": 24503.408203125, + "learning_rate": 1.5529049593025425e-06, + "loss": 0.4036, + "step": 207900 + }, + { + "epoch": 1.0728971576867317, + "grad_norm": 27466.498046875, + "learning_rate": 1.5438844055702728e-06, + "loss": 0.4019, + "step": 207950 + }, + { + "epoch": 1.0731551276693443, + "grad_norm": 24170.1171875, + "learning_rate": 1.5348897167742015e-06, + "loss": 0.4005, + "step": 208000 + }, + { + "epoch": 1.0734130976519571, + "grad_norm": 24094.044921875, + "learning_rate": 1.525920897715527e-06, + "loss": 0.402, + "step": 208050 + }, + { + "epoch": 1.07367106763457, + "grad_norm": 22958.8125, + "learning_rate": 1.5169779531816365e-06, + "loss": 0.4041, + "step": 208100 + }, + { + "epoch": 1.0739290376171828, + "grad_norm": 24056.849609375, + "learning_rate": 1.508060887946089e-06, + "loss": 0.4017, + "step": 208150 + }, + { + "epoch": 1.0741870075997957, + "grad_norm": 22397.435546875, + "learning_rate": 1.499169706768655e-06, + "loss": 0.4036, + "step": 208200 + }, + { + "epoch": 1.0744449775824085, + "grad_norm": 22317.74609375, + "learning_rate": 1.4903044143952604e-06, + "loss": 0.3992, + "step": 208250 + }, + { + "epoch": 1.0747029475650214, + "grad_norm": 22923.57421875, + "learning_rate": 1.4814650155580367e-06, + "loss": 0.3953, + "step": 208300 + }, + { + "epoch": 1.0749609175476342, + "grad_norm": 24276.650390625, + "learning_rate": 1.4726515149752818e-06, + "loss": 0.4052, + "step": 208350 + }, + { + "epoch": 1.075218887530247, + "grad_norm": 27791.369140625, + "learning_rate": 1.4638639173514712e-06, + "loss": 0.4027, + "step": 208400 + }, + { + "epoch": 1.0754768575128597, + "grad_norm": 22683.73046875, + "learning_rate": 1.4551022273772585e-06, + "loss": 0.4036, + "step": 208450 + }, + { + "epoch": 1.0757348274954726, + "grad_norm": 26474.087890625, + "learning_rate": 1.4463664497294527e-06, + "loss": 0.3966, + "step": 208500 + }, + { + "epoch": 1.0759927974780854, + "grad_norm": 25933.25390625, + "learning_rate": 1.4376565890710514e-06, + "loss": 0.4042, + "step": 208550 + }, + { + "epoch": 1.0762507674606983, + "grad_norm": 23373.078125, + "learning_rate": 1.4289726500512134e-06, + "loss": 0.3971, + "step": 208600 + }, + { + "epoch": 1.076508737443311, + "grad_norm": 23282.916015625, + "learning_rate": 1.4203146373052423e-06, + "loss": 0.4038, + "step": 208650 + }, + { + "epoch": 1.076766707425924, + "grad_norm": 26307.12109375, + "learning_rate": 1.4116825554546353e-06, + "loss": 0.3959, + "step": 208700 + }, + { + "epoch": 1.0770246774085368, + "grad_norm": 24472.884765625, + "learning_rate": 1.4030764091070237e-06, + "loss": 0.3954, + "step": 208750 + }, + { + "epoch": 1.0772826473911497, + "grad_norm": 25234.9375, + "learning_rate": 1.394496202856188e-06, + "loss": 0.4003, + "step": 208800 + }, + { + "epoch": 1.0775406173737623, + "grad_norm": 31742.607421875, + "learning_rate": 1.385941941282104e-06, + "loss": 0.3963, + "step": 208850 + }, + { + "epoch": 1.0777985873563751, + "grad_norm": 22577.599609375, + "learning_rate": 1.3774136289508466e-06, + "loss": 0.4003, + "step": 208900 + }, + { + "epoch": 1.078056557338988, + "grad_norm": 24765.111328125, + "learning_rate": 1.3689112704146745e-06, + "loss": 0.3965, + "step": 208950 + }, + { + "epoch": 1.0783145273216008, + "grad_norm": 24935.205078125, + "learning_rate": 1.3604348702119795e-06, + "loss": 0.4001, + "step": 209000 + }, + { + "epoch": 1.0785724973042137, + "grad_norm": 25825.361328125, + "learning_rate": 1.3519844328673037e-06, + "loss": 0.4014, + "step": 209050 + }, + { + "epoch": 1.0788304672868265, + "grad_norm": 23713.068359375, + "learning_rate": 1.343559962891322e-06, + "loss": 0.3961, + "step": 209100 + }, + { + "epoch": 1.0790884372694394, + "grad_norm": 24578.435546875, + "learning_rate": 1.3351614647808542e-06, + "loss": 0.3983, + "step": 209150 + }, + { + "epoch": 1.0793464072520522, + "grad_norm": 22323.19140625, + "learning_rate": 1.3267889430188585e-06, + "loss": 0.4005, + "step": 209200 + }, + { + "epoch": 1.079604377234665, + "grad_norm": 22834.76953125, + "learning_rate": 1.3184424020744212e-06, + "loss": 0.3964, + "step": 209250 + }, + { + "epoch": 1.0798623472172777, + "grad_norm": 22097.615234375, + "learning_rate": 1.3101218464027676e-06, + "loss": 0.3932, + "step": 209300 + }, + { + "epoch": 1.0801203171998905, + "grad_norm": 23564.677734375, + "learning_rate": 1.3018272804452503e-06, + "loss": 0.3996, + "step": 209350 + }, + { + "epoch": 1.0803782871825034, + "grad_norm": 25264.150390625, + "learning_rate": 1.2935587086293443e-06, + "loss": 0.3975, + "step": 209400 + }, + { + "epoch": 1.0806362571651162, + "grad_norm": 22622.1015625, + "learning_rate": 1.2853161353686526e-06, + "loss": 0.4028, + "step": 209450 + }, + { + "epoch": 1.080894227147729, + "grad_norm": 25768.478515625, + "learning_rate": 1.2770995650629058e-06, + "loss": 0.395, + "step": 209500 + }, + { + "epoch": 1.081152197130342, + "grad_norm": 21997.23828125, + "learning_rate": 1.2689090020979455e-06, + "loss": 0.3997, + "step": 209550 + }, + { + "epoch": 1.0814101671129548, + "grad_norm": 24653.796875, + "learning_rate": 1.26074445084573e-06, + "loss": 0.3992, + "step": 209600 + }, + { + "epoch": 1.0816681370955676, + "grad_norm": 25631.18359375, + "learning_rate": 1.252605915664362e-06, + "loss": 0.4006, + "step": 209650 + }, + { + "epoch": 1.0819261070781803, + "grad_norm": 25373.162109375, + "learning_rate": 1.2444934008980058e-06, + "loss": 0.4047, + "step": 209700 + }, + { + "epoch": 1.0821840770607931, + "grad_norm": 23108.03125, + "learning_rate": 1.2364069108769804e-06, + "loss": 0.3994, + "step": 209750 + }, + { + "epoch": 1.082442047043406, + "grad_norm": 23362.0546875, + "learning_rate": 1.2283464499177e-06, + "loss": 0.4059, + "step": 209800 + }, + { + "epoch": 1.0827000170260188, + "grad_norm": 19350.4609375, + "learning_rate": 1.2203120223226727e-06, + "loss": 0.4012, + "step": 209850 + }, + { + "epoch": 1.0829579870086317, + "grad_norm": 24877.921875, + "learning_rate": 1.2123036323805237e-06, + "loss": 0.3989, + "step": 209900 + }, + { + "epoch": 1.0832159569912445, + "grad_norm": 25544.15625, + "learning_rate": 1.2043212843659724e-06, + "loss": 0.3963, + "step": 209950 + }, + { + "epoch": 1.0834739269738574, + "grad_norm": 24271.380859375, + "learning_rate": 1.1963649825398494e-06, + "loss": 0.4026, + "step": 210000 + }, + { + "epoch": 1.0834739269738574, + "eval_loss": 0.3863469064235687, + "eval_runtime": 3245.2698, + "eval_samples_per_second": 955.582, + "eval_steps_per_second": 1.866, + "step": 210000 + }, + { + "epoch": 1.0837318969564702, + "grad_norm": 23787.166015625, + "learning_rate": 1.188434731149074e-06, + "loss": 0.3965, + "step": 210050 + }, + { + "epoch": 1.083989866939083, + "grad_norm": 23804.666015625, + "learning_rate": 1.1805305344266604e-06, + "loss": 0.402, + "step": 210100 + }, + { + "epoch": 1.084247836921696, + "grad_norm": 25432.59765625, + "learning_rate": 1.1726523965917113e-06, + "loss": 0.4004, + "step": 210150 + }, + { + "epoch": 1.0845058069043085, + "grad_norm": 23947.248046875, + "learning_rate": 1.1648003218494242e-06, + "loss": 0.404, + "step": 210200 + }, + { + "epoch": 1.0847637768869214, + "grad_norm": 25563.51953125, + "learning_rate": 1.156974314391085e-06, + "loss": 0.3985, + "step": 210250 + }, + { + "epoch": 1.0850217468695342, + "grad_norm": 36885.1484375, + "learning_rate": 1.1491743783940801e-06, + "loss": 0.3974, + "step": 210300 + }, + { + "epoch": 1.085279716852147, + "grad_norm": 25414.0859375, + "learning_rate": 1.1414005180218346e-06, + "loss": 0.3988, + "step": 210350 + }, + { + "epoch": 1.08553768683476, + "grad_norm": 24175.00390625, + "learning_rate": 1.1336527374239125e-06, + "loss": 0.3995, + "step": 210400 + }, + { + "epoch": 1.0857956568173728, + "grad_norm": 21867.005859375, + "learning_rate": 1.1259310407359114e-06, + "loss": 0.3963, + "step": 210450 + }, + { + "epoch": 1.0860536267999856, + "grad_norm": 22963.5078125, + "learning_rate": 1.1182354320795285e-06, + "loss": 0.3961, + "step": 210500 + }, + { + "epoch": 1.0863115967825985, + "grad_norm": 24181.43359375, + "learning_rate": 1.1105659155625393e-06, + "loss": 0.3936, + "step": 210550 + }, + { + "epoch": 1.086569566765211, + "grad_norm": 26222.150390625, + "learning_rate": 1.1029224952787687e-06, + "loss": 0.402, + "step": 210600 + }, + { + "epoch": 1.086827536747824, + "grad_norm": 23023.005859375, + "learning_rate": 1.0953051753081368e-06, + "loss": 0.3887, + "step": 210650 + }, + { + "epoch": 1.0870855067304368, + "grad_norm": 23715.748046875, + "learning_rate": 1.0877139597166186e-06, + "loss": 0.3984, + "step": 210700 + }, + { + "epoch": 1.0873434767130497, + "grad_norm": 25058.646484375, + "learning_rate": 1.0801488525562565e-06, + "loss": 0.395, + "step": 210750 + }, + { + "epoch": 1.0876014466956625, + "grad_norm": 23697.73046875, + "learning_rate": 1.0726098578651588e-06, + "loss": 0.3993, + "step": 210800 + }, + { + "epoch": 1.0878594166782753, + "grad_norm": 23380.267578125, + "learning_rate": 1.065096979667496e-06, + "loss": 0.393, + "step": 210850 + }, + { + "epoch": 1.0881173866608882, + "grad_norm": 27738.705078125, + "learning_rate": 1.0576102219734985e-06, + "loss": 0.393, + "step": 210900 + }, + { + "epoch": 1.088375356643501, + "grad_norm": 26636.419921875, + "learning_rate": 1.0501495887794478e-06, + "loss": 0.3969, + "step": 210950 + }, + { + "epoch": 1.088633326626114, + "grad_norm": 23831.09765625, + "learning_rate": 1.0427150840676913e-06, + "loss": 0.3962, + "step": 211000 + }, + { + "epoch": 1.0888912966087265, + "grad_norm": 25435.53515625, + "learning_rate": 1.0353067118066163e-06, + "loss": 0.4019, + "step": 211050 + }, + { + "epoch": 1.0891492665913394, + "grad_norm": 25170.9296875, + "learning_rate": 1.0279244759506756e-06, + "loss": 0.4062, + "step": 211100 + }, + { + "epoch": 1.0894072365739522, + "grad_norm": 22411.763671875, + "learning_rate": 1.0205683804403564e-06, + "loss": 0.4056, + "step": 211150 + }, + { + "epoch": 1.089665206556565, + "grad_norm": 20984.373046875, + "learning_rate": 1.0132384292022124e-06, + "loss": 0.3999, + "step": 211200 + }, + { + "epoch": 1.089923176539178, + "grad_norm": 23698.67578125, + "learning_rate": 1.0059346261488133e-06, + "loss": 0.3983, + "step": 211250 + }, + { + "epoch": 1.0901811465217908, + "grad_norm": 25426.779296875, + "learning_rate": 9.986569751788023e-07, + "loss": 0.3923, + "step": 211300 + }, + { + "epoch": 1.0904391165044036, + "grad_norm": 26947.849609375, + "learning_rate": 9.914054801768435e-07, + "loss": 0.3961, + "step": 211350 + }, + { + "epoch": 1.0906970864870165, + "grad_norm": 25117.458984375, + "learning_rate": 9.841801450136357e-07, + "loss": 0.389, + "step": 211400 + }, + { + "epoch": 1.090955056469629, + "grad_norm": 23383.60546875, + "learning_rate": 9.76980973545938e-07, + "loss": 0.4045, + "step": 211450 + }, + { + "epoch": 1.091213026452242, + "grad_norm": 26197.37109375, + "learning_rate": 9.698079696165153e-07, + "loss": 0.3956, + "step": 211500 + }, + { + "epoch": 1.0914709964348548, + "grad_norm": 24827.63671875, + "learning_rate": 9.626611370541882e-07, + "loss": 0.3959, + "step": 211550 + }, + { + "epoch": 1.0917289664174676, + "grad_norm": 23645.685546875, + "learning_rate": 9.555404796737944e-07, + "loss": 0.3967, + "step": 211600 + }, + { + "epoch": 1.0919869364000805, + "grad_norm": 23297.22265625, + "learning_rate": 9.484460012761986e-07, + "loss": 0.3979, + "step": 211650 + }, + { + "epoch": 1.0922449063826933, + "grad_norm": 25558.216796875, + "learning_rate": 9.413777056482998e-07, + "loss": 0.4049, + "step": 211700 + }, + { + "epoch": 1.0925028763653062, + "grad_norm": 24427.572265625, + "learning_rate": 9.343355965630185e-07, + "loss": 0.4005, + "step": 211750 + }, + { + "epoch": 1.092760846347919, + "grad_norm": 21614.783203125, + "learning_rate": 9.273196777792926e-07, + "loss": 0.4008, + "step": 211800 + }, + { + "epoch": 1.0930188163305319, + "grad_norm": 24935.197265625, + "learning_rate": 9.203299530420873e-07, + "loss": 0.3974, + "step": 211850 + }, + { + "epoch": 1.0932767863131445, + "grad_norm": 25032.623046875, + "learning_rate": 9.133664260823848e-07, + "loss": 0.3974, + "step": 211900 + }, + { + "epoch": 1.0935347562957574, + "grad_norm": 25528.576171875, + "learning_rate": 9.064291006171732e-07, + "loss": 0.3969, + "step": 211950 + }, + { + "epoch": 1.0937927262783702, + "grad_norm": 25501.603515625, + "learning_rate": 8.99517980349468e-07, + "loss": 0.3995, + "step": 212000 + }, + { + "epoch": 1.094050696260983, + "grad_norm": 23834.970703125, + "learning_rate": 8.926330689682849e-07, + "loss": 0.4029, + "step": 212050 + }, + { + "epoch": 1.094308666243596, + "grad_norm": 25106.78125, + "learning_rate": 8.857743701486676e-07, + "loss": 0.3977, + "step": 212100 + }, + { + "epoch": 1.0945666362262088, + "grad_norm": 20878.955078125, + "learning_rate": 8.789418875516431e-07, + "loss": 0.3995, + "step": 212150 + }, + { + "epoch": 1.0948246062088216, + "grad_norm": 20234.91796875, + "learning_rate": 8.721356248242662e-07, + "loss": 0.3987, + "step": 212200 + }, + { + "epoch": 1.0950825761914345, + "grad_norm": 25996.611328125, + "learning_rate": 8.653555855995921e-07, + "loss": 0.3962, + "step": 212250 + }, + { + "epoch": 1.095340546174047, + "grad_norm": 26965.455078125, + "learning_rate": 8.586017734966644e-07, + "loss": 0.4022, + "step": 212300 + }, + { + "epoch": 1.09559851615666, + "grad_norm": 25616.74609375, + "learning_rate": 8.518741921205498e-07, + "loss": 0.3956, + "step": 212350 + }, + { + "epoch": 1.0958564861392728, + "grad_norm": 23590.908203125, + "learning_rate": 8.451728450622864e-07, + "loss": 0.3989, + "step": 212400 + }, + { + "epoch": 1.0961144561218856, + "grad_norm": 24570.7109375, + "learning_rate": 8.384977358989355e-07, + "loss": 0.3975, + "step": 212450 + }, + { + "epoch": 1.0963724261044985, + "grad_norm": 24186.62890625, + "learning_rate": 8.318488681935354e-07, + "loss": 0.4027, + "step": 212500 + }, + { + "epoch": 1.0966303960871113, + "grad_norm": 26281.365234375, + "learning_rate": 8.252262454951198e-07, + "loss": 0.398, + "step": 212550 + }, + { + "epoch": 1.0968883660697242, + "grad_norm": 25870.1484375, + "learning_rate": 8.186298713387219e-07, + "loss": 0.4032, + "step": 212600 + }, + { + "epoch": 1.097146336052337, + "grad_norm": 25573.958984375, + "learning_rate": 8.120597492453586e-07, + "loss": 0.3986, + "step": 212650 + }, + { + "epoch": 1.0974043060349499, + "grad_norm": 24439.345703125, + "learning_rate": 8.055158827220355e-07, + "loss": 0.4053, + "step": 212700 + }, + { + "epoch": 1.0976622760175625, + "grad_norm": 26741.91015625, + "learning_rate": 7.989982752617364e-07, + "loss": 0.4, + "step": 212750 + }, + { + "epoch": 1.0979202460001753, + "grad_norm": 24200.69921875, + "learning_rate": 7.92506930343434e-07, + "loss": 0.4003, + "step": 212800 + }, + { + "epoch": 1.0981782159827882, + "grad_norm": 22554.67578125, + "learning_rate": 7.860418514320844e-07, + "loss": 0.4023, + "step": 212850 + }, + { + "epoch": 1.098436185965401, + "grad_norm": 24178.01171875, + "learning_rate": 7.796030419786271e-07, + "loss": 0.3966, + "step": 212900 + }, + { + "epoch": 1.098694155948014, + "grad_norm": 23407.0859375, + "learning_rate": 7.731905054199629e-07, + "loss": 0.3986, + "step": 212950 + }, + { + "epoch": 1.0989521259306267, + "grad_norm": 23653.775390625, + "learning_rate": 7.66804245178987e-07, + "loss": 0.4052, + "step": 213000 + }, + { + "epoch": 1.0992100959132396, + "grad_norm": 24526.658203125, + "learning_rate": 7.604442646645615e-07, + "loss": 0.4013, + "step": 213050 + }, + { + "epoch": 1.0994680658958524, + "grad_norm": 22805.16015625, + "learning_rate": 7.541105672715154e-07, + "loss": 0.4032, + "step": 213100 + }, + { + "epoch": 1.099726035878465, + "grad_norm": 25872.771484375, + "learning_rate": 7.478031563806664e-07, + "loss": 0.3991, + "step": 213150 + }, + { + "epoch": 1.099984005861078, + "grad_norm": 26245.833984375, + "learning_rate": 7.415220353587715e-07, + "loss": 0.4038, + "step": 213200 + }, + { + "epoch": 1.1002419758436908, + "grad_norm": 23856.12109375, + "learning_rate": 7.35267207558582e-07, + "loss": 0.4061, + "step": 213250 + }, + { + "epoch": 1.1004999458263036, + "grad_norm": 22112.50390625, + "learning_rate": 7.290386763187995e-07, + "loss": 0.3976, + "step": 213300 + }, + { + "epoch": 1.1007579158089165, + "grad_norm": 23340.501953125, + "learning_rate": 7.228364449640978e-07, + "loss": 0.4038, + "step": 213350 + }, + { + "epoch": 1.1010158857915293, + "grad_norm": 23487.26171875, + "learning_rate": 7.166605168051066e-07, + "loss": 0.4002, + "step": 213400 + }, + { + "epoch": 1.1012738557741422, + "grad_norm": 25290.654296875, + "learning_rate": 7.105108951384109e-07, + "loss": 0.3952, + "step": 213450 + }, + { + "epoch": 1.101531825756755, + "grad_norm": 21821.9921875, + "learning_rate": 7.04387583246563e-07, + "loss": 0.4004, + "step": 213500 + }, + { + "epoch": 1.1017897957393679, + "grad_norm": 25063.50390625, + "learning_rate": 6.982905843980758e-07, + "loss": 0.4081, + "step": 213550 + }, + { + "epoch": 1.1020477657219805, + "grad_norm": 25687.083984375, + "learning_rate": 6.922199018474018e-07, + "loss": 0.3988, + "step": 213600 + }, + { + "epoch": 1.1023057357045933, + "grad_norm": 26373.240234375, + "learning_rate": 6.861755388349489e-07, + "loss": 0.3971, + "step": 213650 + }, + { + "epoch": 1.1025637056872062, + "grad_norm": 23180.66015625, + "learning_rate": 6.801574985870917e-07, + "loss": 0.394, + "step": 213700 + }, + { + "epoch": 1.102821675669819, + "grad_norm": 25684.03515625, + "learning_rate": 6.741657843161387e-07, + "loss": 0.3972, + "step": 213750 + }, + { + "epoch": 1.1030796456524319, + "grad_norm": 24192.1328125, + "learning_rate": 6.682003992203534e-07, + "loss": 0.3941, + "step": 213800 + }, + { + "epoch": 1.1033376156350447, + "grad_norm": 23294.033203125, + "learning_rate": 6.622613464839334e-07, + "loss": 0.3984, + "step": 213850 + }, + { + "epoch": 1.1035955856176576, + "grad_norm": 24957.40234375, + "learning_rate": 6.563486292770426e-07, + "loss": 0.3983, + "step": 213900 + }, + { + "epoch": 1.1038535556002704, + "grad_norm": 22199.44921875, + "learning_rate": 6.504622507557678e-07, + "loss": 0.3998, + "step": 213950 + }, + { + "epoch": 1.1041115255828833, + "grad_norm": 22867.84375, + "learning_rate": 6.4460221406214e-07, + "loss": 0.4024, + "step": 214000 + }, + { + "epoch": 1.104369495565496, + "grad_norm": 23964.05078125, + "learning_rate": 6.387685223241513e-07, + "loss": 0.3981, + "step": 214050 + }, + { + "epoch": 1.1046274655481088, + "grad_norm": 21671.08984375, + "learning_rate": 6.329611786556888e-07, + "loss": 0.3927, + "step": 214100 + }, + { + "epoch": 1.1048854355307216, + "grad_norm": 23079.9453125, + "learning_rate": 6.271801861566229e-07, + "loss": 0.3966, + "step": 214150 + }, + { + "epoch": 1.1051434055133345, + "grad_norm": 25017.455078125, + "learning_rate": 6.214255479127185e-07, + "loss": 0.3958, + "step": 214200 + }, + { + "epoch": 1.1054013754959473, + "grad_norm": 24560.5, + "learning_rate": 6.156972669956961e-07, + "loss": 0.3952, + "step": 214250 + }, + { + "epoch": 1.1056593454785602, + "grad_norm": 23092.80859375, + "learning_rate": 6.099953464632047e-07, + "loss": 0.3996, + "step": 214300 + }, + { + "epoch": 1.105917315461173, + "grad_norm": 24638.765625, + "learning_rate": 6.043197893588149e-07, + "loss": 0.396, + "step": 214350 + }, + { + "epoch": 1.1061752854437858, + "grad_norm": 24716.921875, + "learning_rate": 5.986705987120256e-07, + "loss": 0.3991, + "step": 214400 + }, + { + "epoch": 1.1064332554263987, + "grad_norm": 25669.224609375, + "learning_rate": 5.930477775382748e-07, + "loss": 0.4001, + "step": 214450 + }, + { + "epoch": 1.1066912254090113, + "grad_norm": 22935.7265625, + "learning_rate": 5.874513288389116e-07, + "loss": 0.3984, + "step": 214500 + }, + { + "epoch": 1.1069491953916242, + "grad_norm": 21897.39453125, + "learning_rate": 5.818812556012076e-07, + "loss": 0.4009, + "step": 214550 + }, + { + "epoch": 1.107207165374237, + "grad_norm": 23161.447265625, + "learning_rate": 5.763375607983679e-07, + "loss": 0.3986, + "step": 214600 + }, + { + "epoch": 1.1074651353568499, + "grad_norm": 21095.642578125, + "learning_rate": 5.708202473894975e-07, + "loss": 0.4029, + "step": 214650 + }, + { + "epoch": 1.1077231053394627, + "grad_norm": 22999.712890625, + "learning_rate": 5.653293183196462e-07, + "loss": 0.3958, + "step": 214700 + }, + { + "epoch": 1.1079810753220756, + "grad_norm": 23144.787109375, + "learning_rate": 5.598647765197529e-07, + "loss": 0.3959, + "step": 214750 + }, + { + "epoch": 1.1082390453046884, + "grad_norm": 22194.73046875, + "learning_rate": 5.544266249066899e-07, + "loss": 0.3974, + "step": 214800 + }, + { + "epoch": 1.1084970152873013, + "grad_norm": 21749.3046875, + "learning_rate": 5.490148663832406e-07, + "loss": 0.3991, + "step": 214850 + }, + { + "epoch": 1.108754985269914, + "grad_norm": 22517.267578125, + "learning_rate": 5.43629503838089e-07, + "loss": 0.4011, + "step": 214900 + }, + { + "epoch": 1.1090129552525267, + "grad_norm": 26576.787109375, + "learning_rate": 5.382705401458465e-07, + "loss": 0.3984, + "step": 214950 + }, + { + "epoch": 1.1092709252351396, + "grad_norm": 22315.609375, + "learning_rate": 5.329379781670196e-07, + "loss": 0.4007, + "step": 215000 + }, + { + "epoch": 1.1092709252351396, + "eval_loss": 0.3860665559768677, + "eval_runtime": 3233.1388, + "eval_samples_per_second": 959.167, + "eval_steps_per_second": 1.873, + "step": 215000 + } + ], + "logging_steps": 50, + "max_steps": 225000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 5000, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 5, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.916937061638367e+17, + "train_batch_size": 128, + "trial_name": null, + "trial_params": null +} diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/training_args.bin b/pretrain_glome_nano_model_tiny/checkpoint-215000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..afd49ab13e1adc210b7ee9755ab768f1bc6434dc --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c41aa9c6023a3a9650c2ca731b440abde601b316b41906bb1dab8748c3c13ed +size 5304 diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/vocab.json b/pretrain_glome_nano_model_tiny/checkpoint-215000/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..54045330cccae0d703647b73183868a84aa6c91f --- /dev/null +++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/vocab.json @@ -0,0 +1 @@ +{"A":0,"R":1,"N":2,"D":3,"C":4,"Q":5,"E":6,"G":7,"H":8,"I":9,"L":10,"K":11,"M":12,"F":13,"P":14,"S":15,"T":16,"W":17,"Y":18,"V":19,"X":20,"B":21,"U":22,"Z":23,"O":24,".":25,"-":26,"":27,"":28,"":29,"":30,"":31} \ No newline at end of file diff --git a/pretrain_glome_nano_model_tiny/logs/events.out.tfevents.1765940241.amax.612137.0 b/pretrain_glome_nano_model_tiny/logs/events.out.tfevents.1765940241.amax.612137.0 new file mode 100644 index 0000000000000000000000000000000000000000..179e01ca216169225277f4a05fde5c4100b6e1db --- /dev/null +++ b/pretrain_glome_nano_model_tiny/logs/events.out.tfevents.1765940241.amax.612137.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa4b1f7df48a807b1536a66293f8d3c69d22c2f41844975ef08715dd8b0d776 +size 6779 diff --git a/pretrain_glome_nano_model_tiny/logs/events.out.tfevents.1765940354.amax.615566.0 b/pretrain_glome_nano_model_tiny/logs/events.out.tfevents.1765940354.amax.615566.0 new file mode 100644 index 0000000000000000000000000000000000000000..8603703ac942f839bf7a3cd5617a5bd4fb3c4d96 --- /dev/null +++ b/pretrain_glome_nano_model_tiny/logs/events.out.tfevents.1765940354.amax.615566.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85dabbf1e456e1f20a84dc748d61ed39952becc5b14189935f10ff23d484249b +size 984259